xref: /openbmc/linux/fs/gfs2/rgrp.c (revision 455f9726)
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9 
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 
12 #include <linux/slab.h>
13 #include <linux/spinlock.h>
14 #include <linux/completion.h>
15 #include <linux/buffer_head.h>
16 #include <linux/fs.h>
17 #include <linux/gfs2_ondisk.h>
18 #include <linux/prefetch.h>
19 #include <linux/blkdev.h>
20 #include <linux/rbtree.h>
21 #include <linux/random.h>
22 
23 #include "gfs2.h"
24 #include "incore.h"
25 #include "glock.h"
26 #include "glops.h"
27 #include "lops.h"
28 #include "meta_io.h"
29 #include "quota.h"
30 #include "rgrp.h"
31 #include "super.h"
32 #include "trans.h"
33 #include "util.h"
34 #include "log.h"
35 #include "inode.h"
36 #include "trace_gfs2.h"
37 
38 #define BFITNOENT ((u32)~0)
39 #define NO_BLOCK ((u64)~0)
40 
41 #if BITS_PER_LONG == 32
42 #define LBITMASK   (0x55555555UL)
43 #define LBITSKIP55 (0x55555555UL)
44 #define LBITSKIP00 (0x00000000UL)
45 #else
46 #define LBITMASK   (0x5555555555555555UL)
47 #define LBITSKIP55 (0x5555555555555555UL)
48 #define LBITSKIP00 (0x0000000000000000UL)
49 #endif
50 
51 /*
52  * These routines are used by the resource group routines (rgrp.c)
53  * to keep track of block allocation.  Each block is represented by two
54  * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
55  *
56  * 0 = Free
57  * 1 = Used (not metadata)
58  * 2 = Unlinked (still in use) inode
59  * 3 = Used (metadata)
60  */
61 
62 struct gfs2_extent {
63 	struct gfs2_rbm rbm;
64 	u32 len;
65 };
66 
67 static const char valid_change[16] = {
68 	        /* current */
69 	/* n */ 0, 1, 1, 1,
70 	/* e */ 1, 0, 0, 0,
71 	/* w */ 0, 0, 0, 1,
72 	        1, 0, 0, 0
73 };
74 
75 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
76 			 const struct gfs2_inode *ip, bool nowrap,
77 			 const struct gfs2_alloc_parms *ap);
78 
79 
80 /**
81  * gfs2_setbit - Set a bit in the bitmaps
82  * @rbm: The position of the bit to set
83  * @do_clone: Also set the clone bitmap, if it exists
84  * @new_state: the new state of the block
85  *
86  */
87 
88 static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
89 			       unsigned char new_state)
90 {
91 	unsigned char *byte1, *byte2, *end, cur_state;
92 	struct gfs2_bitmap *bi = rbm_bi(rbm);
93 	unsigned int buflen = bi->bi_len;
94 	const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
95 
96 	byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
97 	end = bi->bi_bh->b_data + bi->bi_offset + buflen;
98 
99 	BUG_ON(byte1 >= end);
100 
101 	cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
102 
103 	if (unlikely(!valid_change[new_state * 4 + cur_state])) {
104 		pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n",
105 			rbm->offset, cur_state, new_state);
106 		pr_warn("rgrp=0x%llx bi_start=0x%x\n",
107 			(unsigned long long)rbm->rgd->rd_addr, bi->bi_start);
108 		pr_warn("bi_offset=0x%x bi_len=0x%x\n",
109 			bi->bi_offset, bi->bi_len);
110 		dump_stack();
111 		gfs2_consist_rgrpd(rbm->rgd);
112 		return;
113 	}
114 	*byte1 ^= (cur_state ^ new_state) << bit;
115 
116 	if (do_clone && bi->bi_clone) {
117 		byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
118 		cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
119 		*byte2 ^= (cur_state ^ new_state) << bit;
120 	}
121 }
122 
123 /**
124  * gfs2_testbit - test a bit in the bitmaps
125  * @rbm: The bit to test
126  *
127  * Returns: The two bit block state of the requested bit
128  */
129 
130 static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
131 {
132 	struct gfs2_bitmap *bi = rbm_bi(rbm);
133 	const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset;
134 	const u8 *byte;
135 	unsigned int bit;
136 
137 	byte = buffer + (rbm->offset / GFS2_NBBY);
138 	bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
139 
140 	return (*byte >> bit) & GFS2_BIT_MASK;
141 }
142 
143 /**
144  * gfs2_bit_search
145  * @ptr: Pointer to bitmap data
146  * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
147  * @state: The state we are searching for
148  *
149  * We xor the bitmap data with a patter which is the bitwise opposite
150  * of what we are looking for, this gives rise to a pattern of ones
151  * wherever there is a match. Since we have two bits per entry, we
152  * take this pattern, shift it down by one place and then and it with
153  * the original. All the even bit positions (0,2,4, etc) then represent
154  * successful matches, so we mask with 0x55555..... to remove the unwanted
155  * odd bit positions.
156  *
157  * This allows searching of a whole u64 at once (32 blocks) with a
158  * single test (on 64 bit arches).
159  */
160 
161 static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
162 {
163 	u64 tmp;
164 	static const u64 search[] = {
165 		[0] = 0xffffffffffffffffULL,
166 		[1] = 0xaaaaaaaaaaaaaaaaULL,
167 		[2] = 0x5555555555555555ULL,
168 		[3] = 0x0000000000000000ULL,
169 	};
170 	tmp = le64_to_cpu(*ptr) ^ search[state];
171 	tmp &= (tmp >> 1);
172 	tmp &= mask;
173 	return tmp;
174 }
175 
176 /**
177  * rs_cmp - multi-block reservation range compare
178  * @blk: absolute file system block number of the new reservation
179  * @len: number of blocks in the new reservation
180  * @rs: existing reservation to compare against
181  *
182  * returns: 1 if the block range is beyond the reach of the reservation
183  *         -1 if the block range is before the start of the reservation
184  *          0 if the block range overlaps with the reservation
185  */
186 static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
187 {
188 	u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
189 
190 	if (blk >= startblk + rs->rs_free)
191 		return 1;
192 	if (blk + len - 1 < startblk)
193 		return -1;
194 	return 0;
195 }
196 
197 /**
198  * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
199  *       a block in a given allocation state.
200  * @buf: the buffer that holds the bitmaps
201  * @len: the length (in bytes) of the buffer
202  * @goal: start search at this block's bit-pair (within @buffer)
203  * @state: GFS2_BLKST_XXX the state of the block we're looking for.
204  *
205  * Scope of @goal and returned block number is only within this bitmap buffer,
206  * not entire rgrp or filesystem.  @buffer will be offset from the actual
207  * beginning of a bitmap block buffer, skipping any header structures, but
208  * headers are always a multiple of 64 bits long so that the buffer is
209  * always aligned to a 64 bit boundary.
210  *
211  * The size of the buffer is in bytes, but is it assumed that it is
212  * always ok to read a complete multiple of 64 bits at the end
213  * of the block in case the end is no aligned to a natural boundary.
214  *
215  * Return: the block number (bitmap buffer scope) that was found
216  */
217 
218 static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
219 		       u32 goal, u8 state)
220 {
221 	u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
222 	const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
223 	const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
224 	u64 tmp;
225 	u64 mask = 0x5555555555555555ULL;
226 	u32 bit;
227 
228 	/* Mask off bits we don't care about at the start of the search */
229 	mask <<= spoint;
230 	tmp = gfs2_bit_search(ptr, mask, state);
231 	ptr++;
232 	while(tmp == 0 && ptr < end) {
233 		tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
234 		ptr++;
235 	}
236 	/* Mask off any bits which are more than len bytes from the start */
237 	if (ptr == end && (len & (sizeof(u64) - 1)))
238 		tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
239 	/* Didn't find anything, so return */
240 	if (tmp == 0)
241 		return BFITNOENT;
242 	ptr--;
243 	bit = __ffs64(tmp);
244 	bit /= 2;	/* two bits per entry in the bitmap */
245 	return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
246 }
247 
248 /**
249  * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
250  * @rbm: The rbm with rgd already set correctly
251  * @block: The block number (filesystem relative)
252  *
253  * This sets the bi and offset members of an rbm based on a
254  * resource group and a filesystem relative block number. The
255  * resource group must be set in the rbm on entry, the bi and
256  * offset members will be set by this function.
257  *
258  * Returns: 0 on success, or an error code
259  */
260 
261 static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
262 {
263 	u64 rblock = block - rbm->rgd->rd_data0;
264 
265 	if (WARN_ON_ONCE(rblock > UINT_MAX))
266 		return -EINVAL;
267 	if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
268 		return -E2BIG;
269 
270 	rbm->bii = 0;
271 	rbm->offset = (u32)(rblock);
272 	/* Check if the block is within the first block */
273 	if (rbm->offset < rbm_bi(rbm)->bi_blocks)
274 		return 0;
275 
276 	/* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
277 	rbm->offset += (sizeof(struct gfs2_rgrp) -
278 			sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
279 	rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
280 	rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
281 	return 0;
282 }
283 
284 /**
285  * gfs2_rbm_incr - increment an rbm structure
286  * @rbm: The rbm with rgd already set correctly
287  *
288  * This function takes an existing rbm structure and increments it to the next
289  * viable block offset.
290  *
291  * Returns: If incrementing the offset would cause the rbm to go past the
292  *          end of the rgrp, true is returned, otherwise false.
293  *
294  */
295 
296 static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
297 {
298 	if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
299 		rbm->offset++;
300 		return false;
301 	}
302 	if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
303 		return true;
304 
305 	rbm->offset = 0;
306 	rbm->bii++;
307 	return false;
308 }
309 
310 /**
311  * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
312  * @rbm: Position to search (value/result)
313  * @n_unaligned: Number of unaligned blocks to check
314  * @len: Decremented for each block found (terminate on zero)
315  *
316  * Returns: true if a non-free block is encountered
317  */
318 
319 static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
320 {
321 	u32 n;
322 	u8 res;
323 
324 	for (n = 0; n < n_unaligned; n++) {
325 		res = gfs2_testbit(rbm);
326 		if (res != GFS2_BLKST_FREE)
327 			return true;
328 		(*len)--;
329 		if (*len == 0)
330 			return true;
331 		if (gfs2_rbm_incr(rbm))
332 			return true;
333 	}
334 
335 	return false;
336 }
337 
338 /**
339  * gfs2_free_extlen - Return extent length of free blocks
340  * @rrbm: Starting position
341  * @len: Max length to check
342  *
343  * Starting at the block specified by the rbm, see how many free blocks
344  * there are, not reading more than len blocks ahead. This can be done
345  * using memchr_inv when the blocks are byte aligned, but has to be done
346  * on a block by block basis in case of unaligned blocks. Also this
347  * function can cope with bitmap boundaries (although it must stop on
348  * a resource group boundary)
349  *
350  * Returns: Number of free blocks in the extent
351  */
352 
353 static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
354 {
355 	struct gfs2_rbm rbm = *rrbm;
356 	u32 n_unaligned = rbm.offset & 3;
357 	u32 size = len;
358 	u32 bytes;
359 	u32 chunk_size;
360 	u8 *ptr, *start, *end;
361 	u64 block;
362 	struct gfs2_bitmap *bi;
363 
364 	if (n_unaligned &&
365 	    gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
366 		goto out;
367 
368 	n_unaligned = len & 3;
369 	/* Start is now byte aligned */
370 	while (len > 3) {
371 		bi = rbm_bi(&rbm);
372 		start = bi->bi_bh->b_data;
373 		if (bi->bi_clone)
374 			start = bi->bi_clone;
375 		end = start + bi->bi_bh->b_size;
376 		start += bi->bi_offset;
377 		BUG_ON(rbm.offset & 3);
378 		start += (rbm.offset / GFS2_NBBY);
379 		bytes = min_t(u32, len / GFS2_NBBY, (end - start));
380 		ptr = memchr_inv(start, 0, bytes);
381 		chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
382 		chunk_size *= GFS2_NBBY;
383 		BUG_ON(len < chunk_size);
384 		len -= chunk_size;
385 		block = gfs2_rbm_to_block(&rbm);
386 		if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
387 			n_unaligned = 0;
388 			break;
389 		}
390 		if (ptr) {
391 			n_unaligned = 3;
392 			break;
393 		}
394 		n_unaligned = len & 3;
395 	}
396 
397 	/* Deal with any bits left over at the end */
398 	if (n_unaligned)
399 		gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
400 out:
401 	return size - len;
402 }
403 
404 /**
405  * gfs2_bitcount - count the number of bits in a certain state
406  * @rgd: the resource group descriptor
407  * @buffer: the buffer that holds the bitmaps
408  * @buflen: the length (in bytes) of the buffer
409  * @state: the state of the block we're looking for
410  *
411  * Returns: The number of bits
412  */
413 
414 static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
415 			 unsigned int buflen, u8 state)
416 {
417 	const u8 *byte = buffer;
418 	const u8 *end = buffer + buflen;
419 	const u8 state1 = state << 2;
420 	const u8 state2 = state << 4;
421 	const u8 state3 = state << 6;
422 	u32 count = 0;
423 
424 	for (; byte < end; byte++) {
425 		if (((*byte) & 0x03) == state)
426 			count++;
427 		if (((*byte) & 0x0C) == state1)
428 			count++;
429 		if (((*byte) & 0x30) == state2)
430 			count++;
431 		if (((*byte) & 0xC0) == state3)
432 			count++;
433 	}
434 
435 	return count;
436 }
437 
438 /**
439  * gfs2_rgrp_verify - Verify that a resource group is consistent
440  * @rgd: the rgrp
441  *
442  */
443 
444 void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
445 {
446 	struct gfs2_sbd *sdp = rgd->rd_sbd;
447 	struct gfs2_bitmap *bi = NULL;
448 	u32 length = rgd->rd_length;
449 	u32 count[4], tmp;
450 	int buf, x;
451 
452 	memset(count, 0, 4 * sizeof(u32));
453 
454 	/* Count # blocks in each of 4 possible allocation states */
455 	for (buf = 0; buf < length; buf++) {
456 		bi = rgd->rd_bits + buf;
457 		for (x = 0; x < 4; x++)
458 			count[x] += gfs2_bitcount(rgd,
459 						  bi->bi_bh->b_data +
460 						  bi->bi_offset,
461 						  bi->bi_len, x);
462 	}
463 
464 	if (count[0] != rgd->rd_free) {
465 		if (gfs2_consist_rgrpd(rgd))
466 			fs_err(sdp, "free data mismatch:  %u != %u\n",
467 			       count[0], rgd->rd_free);
468 		return;
469 	}
470 
471 	tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
472 	if (count[1] != tmp) {
473 		if (gfs2_consist_rgrpd(rgd))
474 			fs_err(sdp, "used data mismatch:  %u != %u\n",
475 			       count[1], tmp);
476 		return;
477 	}
478 
479 	if (count[2] + count[3] != rgd->rd_dinodes) {
480 		if (gfs2_consist_rgrpd(rgd))
481 			fs_err(sdp, "used metadata mismatch:  %u != %u\n",
482 			       count[2] + count[3], rgd->rd_dinodes);
483 		return;
484 	}
485 }
486 
487 static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
488 {
489 	u64 first = rgd->rd_data0;
490 	u64 last = first + rgd->rd_data;
491 	return first <= block && block < last;
492 }
493 
494 /**
495  * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
496  * @sdp: The GFS2 superblock
497  * @blk: The data block number
498  * @exact: True if this needs to be an exact match
499  *
500  * Returns: The resource group, or NULL if not found
501  */
502 
503 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
504 {
505 	struct rb_node *n, *next;
506 	struct gfs2_rgrpd *cur;
507 
508 	spin_lock(&sdp->sd_rindex_spin);
509 	n = sdp->sd_rindex_tree.rb_node;
510 	while (n) {
511 		cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
512 		next = NULL;
513 		if (blk < cur->rd_addr)
514 			next = n->rb_left;
515 		else if (blk >= cur->rd_data0 + cur->rd_data)
516 			next = n->rb_right;
517 		if (next == NULL) {
518 			spin_unlock(&sdp->sd_rindex_spin);
519 			if (exact) {
520 				if (blk < cur->rd_addr)
521 					return NULL;
522 				if (blk >= cur->rd_data0 + cur->rd_data)
523 					return NULL;
524 			}
525 			return cur;
526 		}
527 		n = next;
528 	}
529 	spin_unlock(&sdp->sd_rindex_spin);
530 
531 	return NULL;
532 }
533 
534 /**
535  * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
536  * @sdp: The GFS2 superblock
537  *
538  * Returns: The first rgrp in the filesystem
539  */
540 
541 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
542 {
543 	const struct rb_node *n;
544 	struct gfs2_rgrpd *rgd;
545 
546 	spin_lock(&sdp->sd_rindex_spin);
547 	n = rb_first(&sdp->sd_rindex_tree);
548 	rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
549 	spin_unlock(&sdp->sd_rindex_spin);
550 
551 	return rgd;
552 }
553 
554 /**
555  * gfs2_rgrpd_get_next - get the next RG
556  * @rgd: the resource group descriptor
557  *
558  * Returns: The next rgrp
559  */
560 
561 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
562 {
563 	struct gfs2_sbd *sdp = rgd->rd_sbd;
564 	const struct rb_node *n;
565 
566 	spin_lock(&sdp->sd_rindex_spin);
567 	n = rb_next(&rgd->rd_node);
568 	if (n == NULL)
569 		n = rb_first(&sdp->sd_rindex_tree);
570 
571 	if (unlikely(&rgd->rd_node == n)) {
572 		spin_unlock(&sdp->sd_rindex_spin);
573 		return NULL;
574 	}
575 	rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
576 	spin_unlock(&sdp->sd_rindex_spin);
577 	return rgd;
578 }
579 
580 void gfs2_free_clones(struct gfs2_rgrpd *rgd)
581 {
582 	int x;
583 
584 	for (x = 0; x < rgd->rd_length; x++) {
585 		struct gfs2_bitmap *bi = rgd->rd_bits + x;
586 		kfree(bi->bi_clone);
587 		bi->bi_clone = NULL;
588 	}
589 }
590 
591 /**
592  * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
593  * @ip: the inode for this reservation
594  */
595 int gfs2_rs_alloc(struct gfs2_inode *ip)
596 {
597 	int error = 0;
598 
599 	down_write(&ip->i_rw_mutex);
600 	if (ip->i_res)
601 		goto out;
602 
603 	ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
604 	if (!ip->i_res) {
605 		error = -ENOMEM;
606 		goto out;
607 	}
608 
609 	RB_CLEAR_NODE(&ip->i_res->rs_node);
610 out:
611 	up_write(&ip->i_rw_mutex);
612 	return error;
613 }
614 
615 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
616 {
617 	gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
618 		       (unsigned long long)rs->rs_inum,
619 		       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
620 		       rs->rs_rbm.offset, rs->rs_free);
621 }
622 
623 /**
624  * __rs_deltree - remove a multi-block reservation from the rgd tree
625  * @rs: The reservation to remove
626  *
627  */
628 static void __rs_deltree(struct gfs2_blkreserv *rs)
629 {
630 	struct gfs2_rgrpd *rgd;
631 
632 	if (!gfs2_rs_active(rs))
633 		return;
634 
635 	rgd = rs->rs_rbm.rgd;
636 	trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
637 	rb_erase(&rs->rs_node, &rgd->rd_rstree);
638 	RB_CLEAR_NODE(&rs->rs_node);
639 
640 	if (rs->rs_free) {
641 		struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm);
642 
643 		/* return reserved blocks to the rgrp */
644 		BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
645 		rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
646 		/* The rgrp extent failure point is likely not to increase;
647 		   it will only do so if the freed blocks are somehow
648 		   contiguous with a span of free blocks that follows. Still,
649 		   it will force the number to be recalculated later. */
650 		rgd->rd_extfail_pt += rs->rs_free;
651 		rs->rs_free = 0;
652 		clear_bit(GBF_FULL, &bi->bi_flags);
653 	}
654 }
655 
656 /**
657  * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
658  * @rs: The reservation to remove
659  *
660  */
661 void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
662 {
663 	struct gfs2_rgrpd *rgd;
664 
665 	rgd = rs->rs_rbm.rgd;
666 	if (rgd) {
667 		spin_lock(&rgd->rd_rsspin);
668 		__rs_deltree(rs);
669 		spin_unlock(&rgd->rd_rsspin);
670 	}
671 }
672 
673 /**
674  * gfs2_rs_delete - delete a multi-block reservation
675  * @ip: The inode for this reservation
676  * @wcount: The inode's write count, or NULL
677  *
678  */
679 void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
680 {
681 	down_write(&ip->i_rw_mutex);
682 	if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) {
683 		gfs2_rs_deltree(ip->i_res);
684 		BUG_ON(ip->i_res->rs_free);
685 		kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
686 		ip->i_res = NULL;
687 	}
688 	up_write(&ip->i_rw_mutex);
689 }
690 
691 /**
692  * return_all_reservations - return all reserved blocks back to the rgrp.
693  * @rgd: the rgrp that needs its space back
694  *
695  * We previously reserved a bunch of blocks for allocation. Now we need to
696  * give them back. This leave the reservation structures in tact, but removes
697  * all of their corresponding "no-fly zones".
698  */
699 static void return_all_reservations(struct gfs2_rgrpd *rgd)
700 {
701 	struct rb_node *n;
702 	struct gfs2_blkreserv *rs;
703 
704 	spin_lock(&rgd->rd_rsspin);
705 	while ((n = rb_first(&rgd->rd_rstree))) {
706 		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
707 		__rs_deltree(rs);
708 	}
709 	spin_unlock(&rgd->rd_rsspin);
710 }
711 
712 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
713 {
714 	struct rb_node *n;
715 	struct gfs2_rgrpd *rgd;
716 	struct gfs2_glock *gl;
717 
718 	while ((n = rb_first(&sdp->sd_rindex_tree))) {
719 		rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
720 		gl = rgd->rd_gl;
721 
722 		rb_erase(n, &sdp->sd_rindex_tree);
723 
724 		if (gl) {
725 			spin_lock(&gl->gl_spin);
726 			gl->gl_object = NULL;
727 			spin_unlock(&gl->gl_spin);
728 			gfs2_glock_add_to_lru(gl);
729 			gfs2_glock_put(gl);
730 		}
731 
732 		gfs2_free_clones(rgd);
733 		kfree(rgd->rd_bits);
734 		return_all_reservations(rgd);
735 		kmem_cache_free(gfs2_rgrpd_cachep, rgd);
736 	}
737 }
738 
739 static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
740 {
741 	pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
742 	pr_info("ri_length = %u\n", rgd->rd_length);
743 	pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
744 	pr_info("ri_data = %u\n", rgd->rd_data);
745 	pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes);
746 }
747 
748 /**
749  * gfs2_compute_bitstructs - Compute the bitmap sizes
750  * @rgd: The resource group descriptor
751  *
752  * Calculates bitmap descriptors, one for each block that contains bitmap data
753  *
754  * Returns: errno
755  */
756 
757 static int compute_bitstructs(struct gfs2_rgrpd *rgd)
758 {
759 	struct gfs2_sbd *sdp = rgd->rd_sbd;
760 	struct gfs2_bitmap *bi;
761 	u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
762 	u32 bytes_left, bytes;
763 	int x;
764 
765 	if (!length)
766 		return -EINVAL;
767 
768 	rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
769 	if (!rgd->rd_bits)
770 		return -ENOMEM;
771 
772 	bytes_left = rgd->rd_bitbytes;
773 
774 	for (x = 0; x < length; x++) {
775 		bi = rgd->rd_bits + x;
776 
777 		bi->bi_flags = 0;
778 		/* small rgrp; bitmap stored completely in header block */
779 		if (length == 1) {
780 			bytes = bytes_left;
781 			bi->bi_offset = sizeof(struct gfs2_rgrp);
782 			bi->bi_start = 0;
783 			bi->bi_len = bytes;
784 			bi->bi_blocks = bytes * GFS2_NBBY;
785 		/* header block */
786 		} else if (x == 0) {
787 			bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
788 			bi->bi_offset = sizeof(struct gfs2_rgrp);
789 			bi->bi_start = 0;
790 			bi->bi_len = bytes;
791 			bi->bi_blocks = bytes * GFS2_NBBY;
792 		/* last block */
793 		} else if (x + 1 == length) {
794 			bytes = bytes_left;
795 			bi->bi_offset = sizeof(struct gfs2_meta_header);
796 			bi->bi_start = rgd->rd_bitbytes - bytes_left;
797 			bi->bi_len = bytes;
798 			bi->bi_blocks = bytes * GFS2_NBBY;
799 		/* other blocks */
800 		} else {
801 			bytes = sdp->sd_sb.sb_bsize -
802 				sizeof(struct gfs2_meta_header);
803 			bi->bi_offset = sizeof(struct gfs2_meta_header);
804 			bi->bi_start = rgd->rd_bitbytes - bytes_left;
805 			bi->bi_len = bytes;
806 			bi->bi_blocks = bytes * GFS2_NBBY;
807 		}
808 
809 		bytes_left -= bytes;
810 	}
811 
812 	if (bytes_left) {
813 		gfs2_consist_rgrpd(rgd);
814 		return -EIO;
815 	}
816 	bi = rgd->rd_bits + (length - 1);
817 	if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
818 		if (gfs2_consist_rgrpd(rgd)) {
819 			gfs2_rindex_print(rgd);
820 			fs_err(sdp, "start=%u len=%u offset=%u\n",
821 			       bi->bi_start, bi->bi_len, bi->bi_offset);
822 		}
823 		return -EIO;
824 	}
825 
826 	return 0;
827 }
828 
829 /**
830  * gfs2_ri_total - Total up the file system space, according to the rindex.
831  * @sdp: the filesystem
832  *
833  */
834 u64 gfs2_ri_total(struct gfs2_sbd *sdp)
835 {
836 	u64 total_data = 0;
837 	struct inode *inode = sdp->sd_rindex;
838 	struct gfs2_inode *ip = GFS2_I(inode);
839 	char buf[sizeof(struct gfs2_rindex)];
840 	int error, rgrps;
841 
842 	for (rgrps = 0;; rgrps++) {
843 		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
844 
845 		if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
846 			break;
847 		error = gfs2_internal_read(ip, buf, &pos,
848 					   sizeof(struct gfs2_rindex));
849 		if (error != sizeof(struct gfs2_rindex))
850 			break;
851 		total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
852 	}
853 	return total_data;
854 }
855 
856 static int rgd_insert(struct gfs2_rgrpd *rgd)
857 {
858 	struct gfs2_sbd *sdp = rgd->rd_sbd;
859 	struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
860 
861 	/* Figure out where to put new node */
862 	while (*newn) {
863 		struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
864 						  rd_node);
865 
866 		parent = *newn;
867 		if (rgd->rd_addr < cur->rd_addr)
868 			newn = &((*newn)->rb_left);
869 		else if (rgd->rd_addr > cur->rd_addr)
870 			newn = &((*newn)->rb_right);
871 		else
872 			return -EEXIST;
873 	}
874 
875 	rb_link_node(&rgd->rd_node, parent, newn);
876 	rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
877 	sdp->sd_rgrps++;
878 	return 0;
879 }
880 
881 /**
882  * read_rindex_entry - Pull in a new resource index entry from the disk
883  * @ip: Pointer to the rindex inode
884  *
885  * Returns: 0 on success, > 0 on EOF, error code otherwise
886  */
887 
888 static int read_rindex_entry(struct gfs2_inode *ip)
889 {
890 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
891 	const unsigned bsize = sdp->sd_sb.sb_bsize;
892 	loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
893 	struct gfs2_rindex buf;
894 	int error;
895 	struct gfs2_rgrpd *rgd;
896 
897 	if (pos >= i_size_read(&ip->i_inode))
898 		return 1;
899 
900 	error = gfs2_internal_read(ip, (char *)&buf, &pos,
901 				   sizeof(struct gfs2_rindex));
902 
903 	if (error != sizeof(struct gfs2_rindex))
904 		return (error == 0) ? 1 : error;
905 
906 	rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
907 	error = -ENOMEM;
908 	if (!rgd)
909 		return error;
910 
911 	rgd->rd_sbd = sdp;
912 	rgd->rd_addr = be64_to_cpu(buf.ri_addr);
913 	rgd->rd_length = be32_to_cpu(buf.ri_length);
914 	rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
915 	rgd->rd_data = be32_to_cpu(buf.ri_data);
916 	rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
917 	spin_lock_init(&rgd->rd_rsspin);
918 
919 	error = compute_bitstructs(rgd);
920 	if (error)
921 		goto fail;
922 
923 	error = gfs2_glock_get(sdp, rgd->rd_addr,
924 			       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
925 	if (error)
926 		goto fail;
927 
928 	rgd->rd_gl->gl_object = rgd;
929 	rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize;
930 	rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1;
931 	rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
932 	rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
933 	if (rgd->rd_data > sdp->sd_max_rg_data)
934 		sdp->sd_max_rg_data = rgd->rd_data;
935 	spin_lock(&sdp->sd_rindex_spin);
936 	error = rgd_insert(rgd);
937 	spin_unlock(&sdp->sd_rindex_spin);
938 	if (!error)
939 		return 0;
940 
941 	error = 0; /* someone else read in the rgrp; free it and ignore it */
942 	gfs2_glock_put(rgd->rd_gl);
943 
944 fail:
945 	kfree(rgd->rd_bits);
946 	kmem_cache_free(gfs2_rgrpd_cachep, rgd);
947 	return error;
948 }
949 
950 /**
951  * gfs2_ri_update - Pull in a new resource index from the disk
952  * @ip: pointer to the rindex inode
953  *
954  * Returns: 0 on successful update, error code otherwise
955  */
956 
957 static int gfs2_ri_update(struct gfs2_inode *ip)
958 {
959 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
960 	int error;
961 
962 	do {
963 		error = read_rindex_entry(ip);
964 	} while (error == 0);
965 
966 	if (error < 0)
967 		return error;
968 
969 	sdp->sd_rindex_uptodate = 1;
970 	return 0;
971 }
972 
973 /**
974  * gfs2_rindex_update - Update the rindex if required
975  * @sdp: The GFS2 superblock
976  *
977  * We grab a lock on the rindex inode to make sure that it doesn't
978  * change whilst we are performing an operation. We keep this lock
979  * for quite long periods of time compared to other locks. This
980  * doesn't matter, since it is shared and it is very, very rarely
981  * accessed in the exclusive mode (i.e. only when expanding the filesystem).
982  *
983  * This makes sure that we're using the latest copy of the resource index
984  * special file, which might have been updated if someone expanded the
985  * filesystem (via gfs2_grow utility), which adds new resource groups.
986  *
987  * Returns: 0 on succeess, error code otherwise
988  */
989 
990 int gfs2_rindex_update(struct gfs2_sbd *sdp)
991 {
992 	struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
993 	struct gfs2_glock *gl = ip->i_gl;
994 	struct gfs2_holder ri_gh;
995 	int error = 0;
996 	int unlock_required = 0;
997 
998 	/* Read new copy from disk if we don't have the latest */
999 	if (!sdp->sd_rindex_uptodate) {
1000 		if (!gfs2_glock_is_locked_by_me(gl)) {
1001 			error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
1002 			if (error)
1003 				return error;
1004 			unlock_required = 1;
1005 		}
1006 		if (!sdp->sd_rindex_uptodate)
1007 			error = gfs2_ri_update(ip);
1008 		if (unlock_required)
1009 			gfs2_glock_dq_uninit(&ri_gh);
1010 	}
1011 
1012 	return error;
1013 }
1014 
1015 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
1016 {
1017 	const struct gfs2_rgrp *str = buf;
1018 	u32 rg_flags;
1019 
1020 	rg_flags = be32_to_cpu(str->rg_flags);
1021 	rg_flags &= ~GFS2_RDF_MASK;
1022 	rgd->rd_flags &= GFS2_RDF_MASK;
1023 	rgd->rd_flags |= rg_flags;
1024 	rgd->rd_free = be32_to_cpu(str->rg_free);
1025 	rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
1026 	rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
1027 }
1028 
1029 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
1030 {
1031 	struct gfs2_rgrp *str = buf;
1032 
1033 	str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
1034 	str->rg_free = cpu_to_be32(rgd->rd_free);
1035 	str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
1036 	str->__pad = cpu_to_be32(0);
1037 	str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
1038 	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
1039 }
1040 
1041 static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1042 {
1043 	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1044 	struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
1045 
1046 	if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
1047 	    rgl->rl_dinodes != str->rg_dinodes ||
1048 	    rgl->rl_igeneration != str->rg_igeneration)
1049 		return 0;
1050 	return 1;
1051 }
1052 
1053 static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1054 {
1055 	const struct gfs2_rgrp *str = buf;
1056 
1057 	rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1058 	rgl->rl_flags = str->rg_flags;
1059 	rgl->rl_free = str->rg_free;
1060 	rgl->rl_dinodes = str->rg_dinodes;
1061 	rgl->rl_igeneration = str->rg_igeneration;
1062 	rgl->__pad = 0UL;
1063 }
1064 
1065 static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
1066 {
1067 	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1068 	u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
1069 	rgl->rl_unlinked = cpu_to_be32(unlinked);
1070 }
1071 
1072 static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1073 {
1074 	struct gfs2_bitmap *bi;
1075 	const u32 length = rgd->rd_length;
1076 	const u8 *buffer = NULL;
1077 	u32 i, goal, count = 0;
1078 
1079 	for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1080 		goal = 0;
1081 		buffer = bi->bi_bh->b_data + bi->bi_offset;
1082 		WARN_ON(!buffer_uptodate(bi->bi_bh));
1083 		while (goal < bi->bi_len * GFS2_NBBY) {
1084 			goal = gfs2_bitfit(buffer, bi->bi_len, goal,
1085 					   GFS2_BLKST_UNLINKED);
1086 			if (goal == BFITNOENT)
1087 				break;
1088 			count++;
1089 			goal++;
1090 		}
1091 	}
1092 
1093 	return count;
1094 }
1095 
1096 
1097 /**
1098  * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1099  * @rgd: the struct gfs2_rgrpd describing the RG to read in
1100  *
1101  * Read in all of a Resource Group's header and bitmap blocks.
1102  * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
1103  *
1104  * Returns: errno
1105  */
1106 
1107 static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
1108 {
1109 	struct gfs2_sbd *sdp = rgd->rd_sbd;
1110 	struct gfs2_glock *gl = rgd->rd_gl;
1111 	unsigned int length = rgd->rd_length;
1112 	struct gfs2_bitmap *bi;
1113 	unsigned int x, y;
1114 	int error;
1115 
1116 	if (rgd->rd_bits[0].bi_bh != NULL)
1117 		return 0;
1118 
1119 	for (x = 0; x < length; x++) {
1120 		bi = rgd->rd_bits + x;
1121 		error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
1122 		if (error)
1123 			goto fail;
1124 	}
1125 
1126 	for (y = length; y--;) {
1127 		bi = rgd->rd_bits + y;
1128 		error = gfs2_meta_wait(sdp, bi->bi_bh);
1129 		if (error)
1130 			goto fail;
1131 		if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
1132 					      GFS2_METATYPE_RG)) {
1133 			error = -EIO;
1134 			goto fail;
1135 		}
1136 	}
1137 
1138 	if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
1139 		for (x = 0; x < length; x++)
1140 			clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
1141 		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1142 		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1143 		rgd->rd_free_clone = rgd->rd_free;
1144 		/* max out the rgrp allocation failure point */
1145 		rgd->rd_extfail_pt = rgd->rd_free;
1146 	}
1147 	if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1148 		rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1149 		gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1150 				     rgd->rd_bits[0].bi_bh->b_data);
1151 	}
1152 	else if (sdp->sd_args.ar_rgrplvb) {
1153 		if (!gfs2_rgrp_lvb_valid(rgd)){
1154 			gfs2_consist_rgrpd(rgd);
1155 			error = -EIO;
1156 			goto fail;
1157 		}
1158 		if (rgd->rd_rgl->rl_unlinked == 0)
1159 			rgd->rd_flags &= ~GFS2_RDF_CHECK;
1160 	}
1161 	return 0;
1162 
1163 fail:
1164 	while (x--) {
1165 		bi = rgd->rd_bits + x;
1166 		brelse(bi->bi_bh);
1167 		bi->bi_bh = NULL;
1168 		gfs2_assert_warn(sdp, !bi->bi_clone);
1169 	}
1170 
1171 	return error;
1172 }
1173 
1174 static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
1175 {
1176 	u32 rl_flags;
1177 
1178 	if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1179 		return 0;
1180 
1181 	if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1182 		return gfs2_rgrp_bh_get(rgd);
1183 
1184 	rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1185 	rl_flags &= ~GFS2_RDF_MASK;
1186 	rgd->rd_flags &= GFS2_RDF_MASK;
1187 	rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1188 	if (rgd->rd_rgl->rl_unlinked == 0)
1189 		rgd->rd_flags &= ~GFS2_RDF_CHECK;
1190 	rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1191 	rgd->rd_free_clone = rgd->rd_free;
1192 	rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1193 	rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1194 	return 0;
1195 }
1196 
1197 int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1198 {
1199 	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1200 	struct gfs2_sbd *sdp = rgd->rd_sbd;
1201 
1202 	if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1203 		return 0;
1204 	return gfs2_rgrp_bh_get(rgd);
1205 }
1206 
1207 /**
1208  * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1209  * @gh: The glock holder for the resource group
1210  *
1211  */
1212 
1213 void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
1214 {
1215 	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1216 	int x, length = rgd->rd_length;
1217 
1218 	for (x = 0; x < length; x++) {
1219 		struct gfs2_bitmap *bi = rgd->rd_bits + x;
1220 		if (bi->bi_bh) {
1221 			brelse(bi->bi_bh);
1222 			bi->bi_bh = NULL;
1223 		}
1224 	}
1225 
1226 }
1227 
1228 int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1229 			     struct buffer_head *bh,
1230 			     const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1231 {
1232 	struct super_block *sb = sdp->sd_vfs;
1233 	u64 blk;
1234 	sector_t start = 0;
1235 	sector_t nr_blks = 0;
1236 	int rv;
1237 	unsigned int x;
1238 	u32 trimmed = 0;
1239 	u8 diff;
1240 
1241 	for (x = 0; x < bi->bi_len; x++) {
1242 		const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1243 		clone += bi->bi_offset;
1244 		clone += x;
1245 		if (bh) {
1246 			const u8 *orig = bh->b_data + bi->bi_offset + x;
1247 			diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1248 		} else {
1249 			diff = ~(*clone | (*clone >> 1));
1250 		}
1251 		diff &= 0x55;
1252 		if (diff == 0)
1253 			continue;
1254 		blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1255 		while(diff) {
1256 			if (diff & 1) {
1257 				if (nr_blks == 0)
1258 					goto start_new_extent;
1259 				if ((start + nr_blks) != blk) {
1260 					if (nr_blks >= minlen) {
1261 						rv = sb_issue_discard(sb,
1262 							start, nr_blks,
1263 							GFP_NOFS, 0);
1264 						if (rv)
1265 							goto fail;
1266 						trimmed += nr_blks;
1267 					}
1268 					nr_blks = 0;
1269 start_new_extent:
1270 					start = blk;
1271 				}
1272 				nr_blks++;
1273 			}
1274 			diff >>= 2;
1275 			blk++;
1276 		}
1277 	}
1278 	if (nr_blks >= minlen) {
1279 		rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
1280 		if (rv)
1281 			goto fail;
1282 		trimmed += nr_blks;
1283 	}
1284 	if (ptrimmed)
1285 		*ptrimmed = trimmed;
1286 	return 0;
1287 
1288 fail:
1289 	if (sdp->sd_args.ar_discard)
1290 		fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
1291 	sdp->sd_args.ar_discard = 0;
1292 	return -EIO;
1293 }
1294 
1295 /**
1296  * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1297  * @filp: Any file on the filesystem
1298  * @argp: Pointer to the arguments (also used to pass result)
1299  *
1300  * Returns: 0 on success, otherwise error code
1301  */
1302 
1303 int gfs2_fitrim(struct file *filp, void __user *argp)
1304 {
1305 	struct inode *inode = file_inode(filp);
1306 	struct gfs2_sbd *sdp = GFS2_SB(inode);
1307 	struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1308 	struct buffer_head *bh;
1309 	struct gfs2_rgrpd *rgd;
1310 	struct gfs2_rgrpd *rgd_end;
1311 	struct gfs2_holder gh;
1312 	struct fstrim_range r;
1313 	int ret = 0;
1314 	u64 amt;
1315 	u64 trimmed = 0;
1316 	u64 start, end, minlen;
1317 	unsigned int x;
1318 	unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1319 
1320 	if (!capable(CAP_SYS_ADMIN))
1321 		return -EPERM;
1322 
1323 	if (!blk_queue_discard(q))
1324 		return -EOPNOTSUPP;
1325 
1326 	if (copy_from_user(&r, argp, sizeof(r)))
1327 		return -EFAULT;
1328 
1329 	ret = gfs2_rindex_update(sdp);
1330 	if (ret)
1331 		return ret;
1332 
1333 	start = r.start >> bs_shift;
1334 	end = start + (r.len >> bs_shift);
1335 	minlen = max_t(u64, r.minlen,
1336 		       q->limits.discard_granularity) >> bs_shift;
1337 
1338 	if (end <= start || minlen > sdp->sd_max_rg_data)
1339 		return -EINVAL;
1340 
1341 	rgd = gfs2_blk2rgrpd(sdp, start, 0);
1342 	rgd_end = gfs2_blk2rgrpd(sdp, end, 0);
1343 
1344 	if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end))
1345 	    && (start > rgd_end->rd_data0 + rgd_end->rd_data))
1346 		return -EINVAL; /* start is beyond the end of the fs */
1347 
1348 	while (1) {
1349 
1350 		ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1351 		if (ret)
1352 			goto out;
1353 
1354 		if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1355 			/* Trim each bitmap in the rgrp */
1356 			for (x = 0; x < rgd->rd_length; x++) {
1357 				struct gfs2_bitmap *bi = rgd->rd_bits + x;
1358 				ret = gfs2_rgrp_send_discards(sdp,
1359 						rgd->rd_data0, NULL, bi, minlen,
1360 						&amt);
1361 				if (ret) {
1362 					gfs2_glock_dq_uninit(&gh);
1363 					goto out;
1364 				}
1365 				trimmed += amt;
1366 			}
1367 
1368 			/* Mark rgrp as having been trimmed */
1369 			ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1370 			if (ret == 0) {
1371 				bh = rgd->rd_bits[0].bi_bh;
1372 				rgd->rd_flags |= GFS2_RGF_TRIMMED;
1373 				gfs2_trans_add_meta(rgd->rd_gl, bh);
1374 				gfs2_rgrp_out(rgd, bh->b_data);
1375 				gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
1376 				gfs2_trans_end(sdp);
1377 			}
1378 		}
1379 		gfs2_glock_dq_uninit(&gh);
1380 
1381 		if (rgd == rgd_end)
1382 			break;
1383 
1384 		rgd = gfs2_rgrpd_get_next(rgd);
1385 	}
1386 
1387 out:
1388 	r.len = trimmed << bs_shift;
1389 	if (copy_to_user(argp, &r, sizeof(r)))
1390 		return -EFAULT;
1391 
1392 	return ret;
1393 }
1394 
1395 /**
1396  * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1397  * @ip: the inode structure
1398  *
1399  */
1400 static void rs_insert(struct gfs2_inode *ip)
1401 {
1402 	struct rb_node **newn, *parent = NULL;
1403 	int rc;
1404 	struct gfs2_blkreserv *rs = ip->i_res;
1405 	struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1406 	u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1407 
1408 	BUG_ON(gfs2_rs_active(rs));
1409 
1410 	spin_lock(&rgd->rd_rsspin);
1411 	newn = &rgd->rd_rstree.rb_node;
1412 	while (*newn) {
1413 		struct gfs2_blkreserv *cur =
1414 			rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1415 
1416 		parent = *newn;
1417 		rc = rs_cmp(fsblock, rs->rs_free, cur);
1418 		if (rc > 0)
1419 			newn = &((*newn)->rb_right);
1420 		else if (rc < 0)
1421 			newn = &((*newn)->rb_left);
1422 		else {
1423 			spin_unlock(&rgd->rd_rsspin);
1424 			WARN_ON(1);
1425 			return;
1426 		}
1427 	}
1428 
1429 	rb_link_node(&rs->rs_node, parent, newn);
1430 	rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1431 
1432 	/* Do our rgrp accounting for the reservation */
1433 	rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1434 	spin_unlock(&rgd->rd_rsspin);
1435 	trace_gfs2_rs(rs, TRACE_RS_INSERT);
1436 }
1437 
1438 /**
1439  * rg_mblk_search - find a group of multiple free blocks to form a reservation
1440  * @rgd: the resource group descriptor
1441  * @ip: pointer to the inode for which we're reserving blocks
1442  * @ap: the allocation parameters
1443  *
1444  */
1445 
1446 static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1447 			   const struct gfs2_alloc_parms *ap)
1448 {
1449 	struct gfs2_rbm rbm = { .rgd = rgd, };
1450 	u64 goal;
1451 	struct gfs2_blkreserv *rs = ip->i_res;
1452 	u32 extlen;
1453 	u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
1454 	int ret;
1455 	struct inode *inode = &ip->i_inode;
1456 
1457 	if (S_ISDIR(inode->i_mode))
1458 		extlen = 1;
1459 	else {
1460 		extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target);
1461 		extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1462 	}
1463 	if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1464 		return;
1465 
1466 	/* Find bitmap block that contains bits for goal block */
1467 	if (rgrp_contains_block(rgd, ip->i_goal))
1468 		goal = ip->i_goal;
1469 	else
1470 		goal = rgd->rd_last_alloc + rgd->rd_data0;
1471 
1472 	if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1473 		return;
1474 
1475 	ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap);
1476 	if (ret == 0) {
1477 		rs->rs_rbm = rbm;
1478 		rs->rs_free = extlen;
1479 		rs->rs_inum = ip->i_no_addr;
1480 		rs_insert(ip);
1481 	} else {
1482 		if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1483 			rgd->rd_last_alloc = 0;
1484 	}
1485 }
1486 
1487 /**
1488  * gfs2_next_unreserved_block - Return next block that is not reserved
1489  * @rgd: The resource group
1490  * @block: The starting block
1491  * @length: The required length
1492  * @ip: Ignore any reservations for this inode
1493  *
1494  * If the block does not appear in any reservation, then return the
1495  * block number unchanged. If it does appear in the reservation, then
1496  * keep looking through the tree of reservations in order to find the
1497  * first block number which is not reserved.
1498  */
1499 
1500 static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1501 				      u32 length,
1502 				      const struct gfs2_inode *ip)
1503 {
1504 	struct gfs2_blkreserv *rs;
1505 	struct rb_node *n;
1506 	int rc;
1507 
1508 	spin_lock(&rgd->rd_rsspin);
1509 	n = rgd->rd_rstree.rb_node;
1510 	while (n) {
1511 		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1512 		rc = rs_cmp(block, length, rs);
1513 		if (rc < 0)
1514 			n = n->rb_left;
1515 		else if (rc > 0)
1516 			n = n->rb_right;
1517 		else
1518 			break;
1519 	}
1520 
1521 	if (n) {
1522 		while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) {
1523 			block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1524 			n = n->rb_right;
1525 			if (n == NULL)
1526 				break;
1527 			rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1528 		}
1529 	}
1530 
1531 	spin_unlock(&rgd->rd_rsspin);
1532 	return block;
1533 }
1534 
1535 /**
1536  * gfs2_reservation_check_and_update - Check for reservations during block alloc
1537  * @rbm: The current position in the resource group
1538  * @ip: The inode for which we are searching for blocks
1539  * @minext: The minimum extent length
1540  * @maxext: A pointer to the maximum extent structure
1541  *
1542  * This checks the current position in the rgrp to see whether there is
1543  * a reservation covering this block. If not then this function is a
1544  * no-op. If there is, then the position is moved to the end of the
1545  * contiguous reservation(s) so that we are pointing at the first
1546  * non-reserved block.
1547  *
1548  * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1549  */
1550 
1551 static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1552 					     const struct gfs2_inode *ip,
1553 					     u32 minext,
1554 					     struct gfs2_extent *maxext)
1555 {
1556 	u64 block = gfs2_rbm_to_block(rbm);
1557 	u32 extlen = 1;
1558 	u64 nblock;
1559 	int ret;
1560 
1561 	/*
1562 	 * If we have a minimum extent length, then skip over any extent
1563 	 * which is less than the min extent length in size.
1564 	 */
1565 	if (minext) {
1566 		extlen = gfs2_free_extlen(rbm, minext);
1567 		if (extlen <= maxext->len)
1568 			goto fail;
1569 	}
1570 
1571 	/*
1572 	 * Check the extent which has been found against the reservations
1573 	 * and skip if parts of it are already reserved
1574 	 */
1575 	nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1576 	if (nblock == block) {
1577 		if (!minext || extlen >= minext)
1578 			return 0;
1579 
1580 		if (extlen > maxext->len) {
1581 			maxext->len = extlen;
1582 			maxext->rbm = *rbm;
1583 		}
1584 fail:
1585 		nblock = block + extlen;
1586 	}
1587 	ret = gfs2_rbm_from_block(rbm, nblock);
1588 	if (ret < 0)
1589 		return ret;
1590 	return 1;
1591 }
1592 
1593 /**
1594  * gfs2_rbm_find - Look for blocks of a particular state
1595  * @rbm: Value/result starting position and final position
1596  * @state: The state which we want to find
1597  * @minext: Pointer to the requested extent length (NULL for a single block)
1598  *          This is updated to be the actual reservation size.
1599  * @ip: If set, check for reservations
1600  * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1601  *          around until we've reached the starting point.
1602  * @ap: the allocation parameters
1603  *
1604  * Side effects:
1605  * - If looking for free blocks, we set GBF_FULL on each bitmap which
1606  *   has no free blocks in it.
1607  * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1608  *   has come up short on a free block search.
1609  *
1610  * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1611  */
1612 
1613 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
1614 			 const struct gfs2_inode *ip, bool nowrap,
1615 			 const struct gfs2_alloc_parms *ap)
1616 {
1617 	struct buffer_head *bh;
1618 	int initial_bii;
1619 	u32 initial_offset;
1620 	int first_bii = rbm->bii;
1621 	u32 first_offset = rbm->offset;
1622 	u32 offset;
1623 	u8 *buffer;
1624 	int n = 0;
1625 	int iters = rbm->rgd->rd_length;
1626 	int ret;
1627 	struct gfs2_bitmap *bi;
1628 	struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
1629 
1630 	/* If we are not starting at the beginning of a bitmap, then we
1631 	 * need to add one to the bitmap count to ensure that we search
1632 	 * the starting bitmap twice.
1633 	 */
1634 	if (rbm->offset != 0)
1635 		iters++;
1636 
1637 	while(1) {
1638 		bi = rbm_bi(rbm);
1639 		if (test_bit(GBF_FULL, &bi->bi_flags) &&
1640 		    (state == GFS2_BLKST_FREE))
1641 			goto next_bitmap;
1642 
1643 		bh = bi->bi_bh;
1644 		buffer = bh->b_data + bi->bi_offset;
1645 		WARN_ON(!buffer_uptodate(bh));
1646 		if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1647 			buffer = bi->bi_clone + bi->bi_offset;
1648 		initial_offset = rbm->offset;
1649 		offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state);
1650 		if (offset == BFITNOENT)
1651 			goto bitmap_full;
1652 		rbm->offset = offset;
1653 		if (ip == NULL)
1654 			return 0;
1655 
1656 		initial_bii = rbm->bii;
1657 		ret = gfs2_reservation_check_and_update(rbm, ip,
1658 							minext ? *minext : 0,
1659 							&maxext);
1660 		if (ret == 0)
1661 			return 0;
1662 		if (ret > 0) {
1663 			n += (rbm->bii - initial_bii);
1664 			goto next_iter;
1665 		}
1666 		if (ret == -E2BIG) {
1667 			rbm->bii = 0;
1668 			rbm->offset = 0;
1669 			n += (rbm->bii - initial_bii);
1670 			goto res_covered_end_of_rgrp;
1671 		}
1672 		return ret;
1673 
1674 bitmap_full:	/* Mark bitmap as full and fall through */
1675 		if ((state == GFS2_BLKST_FREE) && initial_offset == 0) {
1676 			struct gfs2_bitmap *bi = rbm_bi(rbm);
1677 			set_bit(GBF_FULL, &bi->bi_flags);
1678 		}
1679 
1680 next_bitmap:	/* Find next bitmap in the rgrp */
1681 		rbm->offset = 0;
1682 		rbm->bii++;
1683 		if (rbm->bii == rbm->rgd->rd_length)
1684 			rbm->bii = 0;
1685 res_covered_end_of_rgrp:
1686 		if ((rbm->bii == 0) && nowrap)
1687 			break;
1688 		n++;
1689 next_iter:
1690 		if (n >= iters)
1691 			break;
1692 	}
1693 
1694 	if (minext == NULL || state != GFS2_BLKST_FREE)
1695 		return -ENOSPC;
1696 
1697 	/* If the extent was too small, and it's smaller than the smallest
1698 	   to have failed before, remember for future reference that it's
1699 	   useless to search this rgrp again for this amount or more. */
1700 	if ((first_offset == 0) && (first_bii == 0) &&
1701 	    (*minext < rbm->rgd->rd_extfail_pt))
1702 		rbm->rgd->rd_extfail_pt = *minext;
1703 
1704 	/* If the maximum extent we found is big enough to fulfill the
1705 	   minimum requirements, use it anyway. */
1706 	if (maxext.len) {
1707 		*rbm = maxext.rbm;
1708 		*minext = maxext.len;
1709 		return 0;
1710 	}
1711 
1712 	return -ENOSPC;
1713 }
1714 
1715 /**
1716  * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1717  * @rgd: The rgrp
1718  * @last_unlinked: block address of the last dinode we unlinked
1719  * @skip: block address we should explicitly not unlink
1720  *
1721  * Returns: 0 if no error
1722  *          The inode, if one has been found, in inode.
1723  */
1724 
1725 static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1726 {
1727 	u64 block;
1728 	struct gfs2_sbd *sdp = rgd->rd_sbd;
1729 	struct gfs2_glock *gl;
1730 	struct gfs2_inode *ip;
1731 	int error;
1732 	int found = 0;
1733 	struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
1734 
1735 	while (1) {
1736 		down_write(&sdp->sd_log_flush_lock);
1737 		error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
1738 				      true, NULL);
1739 		up_write(&sdp->sd_log_flush_lock);
1740 		if (error == -ENOSPC)
1741 			break;
1742 		if (WARN_ON_ONCE(error))
1743 			break;
1744 
1745 		block = gfs2_rbm_to_block(&rbm);
1746 		if (gfs2_rbm_from_block(&rbm, block + 1))
1747 			break;
1748 		if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1749 			continue;
1750 		if (block == skip)
1751 			continue;
1752 		*last_unlinked = block;
1753 
1754 		error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl);
1755 		if (error)
1756 			continue;
1757 
1758 		/* If the inode is already in cache, we can ignore it here
1759 		 * because the existing inode disposal code will deal with
1760 		 * it when all refs have gone away. Accessing gl_object like
1761 		 * this is not safe in general. Here it is ok because we do
1762 		 * not dereference the pointer, and we only need an approx
1763 		 * answer to whether it is NULL or not.
1764 		 */
1765 		ip = gl->gl_object;
1766 
1767 		if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1768 			gfs2_glock_put(gl);
1769 		else
1770 			found++;
1771 
1772 		/* Limit reclaim to sensible number of tasks */
1773 		if (found > NR_CPUS)
1774 			return;
1775 	}
1776 
1777 	rgd->rd_flags &= ~GFS2_RDF_CHECK;
1778 	return;
1779 }
1780 
1781 /**
1782  * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1783  * @rgd: The rgrp in question
1784  * @loops: An indication of how picky we can be (0=very, 1=less so)
1785  *
1786  * This function uses the recently added glock statistics in order to
1787  * figure out whether a parciular resource group is suffering from
1788  * contention from multiple nodes. This is done purely on the basis
1789  * of timings, since this is the only data we have to work with and
1790  * our aim here is to reject a resource group which is highly contended
1791  * but (very important) not to do this too often in order to ensure that
1792  * we do not land up introducing fragmentation by changing resource
1793  * groups when not actually required.
1794  *
1795  * The calculation is fairly simple, we want to know whether the SRTTB
1796  * (i.e. smoothed round trip time for blocking operations) to acquire
1797  * the lock for this rgrp's glock is significantly greater than the
1798  * time taken for resource groups on average. We introduce a margin in
1799  * the form of the variable @var which is computed as the sum of the two
1800  * respective variences, and multiplied by a factor depending on @loops
1801  * and whether we have a lot of data to base the decision on. This is
1802  * then tested against the square difference of the means in order to
1803  * decide whether the result is statistically significant or not.
1804  *
1805  * Returns: A boolean verdict on the congestion status
1806  */
1807 
1808 static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1809 {
1810 	const struct gfs2_glock *gl = rgd->rd_gl;
1811 	const struct gfs2_sbd *sdp = gl->gl_sbd;
1812 	struct gfs2_lkstats *st;
1813 	s64 r_dcount, l_dcount;
1814 	s64 r_srttb, l_srttb;
1815 	s64 srttb_diff;
1816 	s64 sqr_diff;
1817 	s64 var;
1818 
1819 	preempt_disable();
1820 	st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1821 	r_srttb = st->stats[GFS2_LKS_SRTTB];
1822 	r_dcount = st->stats[GFS2_LKS_DCOUNT];
1823 	var = st->stats[GFS2_LKS_SRTTVARB] +
1824 	      gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1825 	preempt_enable();
1826 
1827 	l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1828 	l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1829 
1830 	if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0))
1831 		return false;
1832 
1833 	srttb_diff = r_srttb - l_srttb;
1834 	sqr_diff = srttb_diff * srttb_diff;
1835 
1836 	var *= 2;
1837 	if (l_dcount < 8 || r_dcount < 8)
1838 		var *= 2;
1839 	if (loops == 1)
1840 		var *= 2;
1841 
1842 	return ((srttb_diff < 0) && (sqr_diff > var));
1843 }
1844 
1845 /**
1846  * gfs2_rgrp_used_recently
1847  * @rs: The block reservation with the rgrp to test
1848  * @msecs: The time limit in milliseconds
1849  *
1850  * Returns: True if the rgrp glock has been used within the time limit
1851  */
1852 static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1853 				    u64 msecs)
1854 {
1855 	u64 tdiff;
1856 
1857 	tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1858                             rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1859 
1860 	return tdiff > (msecs * 1000 * 1000);
1861 }
1862 
1863 static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1864 {
1865 	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1866 	u32 skip;
1867 
1868 	get_random_bytes(&skip, sizeof(skip));
1869 	return skip % sdp->sd_rgrps;
1870 }
1871 
1872 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1873 {
1874 	struct gfs2_rgrpd *rgd = *pos;
1875 	struct gfs2_sbd *sdp = rgd->rd_sbd;
1876 
1877 	rgd = gfs2_rgrpd_get_next(rgd);
1878 	if (rgd == NULL)
1879 		rgd = gfs2_rgrpd_get_first(sdp);
1880 	*pos = rgd;
1881 	if (rgd != begin) /* If we didn't wrap */
1882 		return true;
1883 	return false;
1884 }
1885 
1886 /**
1887  * gfs2_inplace_reserve - Reserve space in the filesystem
1888  * @ip: the inode to reserve space for
1889  * @ap: the allocation parameters
1890  *
1891  * Returns: errno
1892  */
1893 
1894 int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap)
1895 {
1896 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1897 	struct gfs2_rgrpd *begin = NULL;
1898 	struct gfs2_blkreserv *rs = ip->i_res;
1899 	int error = 0, rg_locked, flags = 0;
1900 	u64 last_unlinked = NO_BLOCK;
1901 	int loops = 0;
1902 	u32 skip = 0;
1903 
1904 	if (sdp->sd_args.ar_rgrplvb)
1905 		flags |= GL_SKIP;
1906 	if (gfs2_assert_warn(sdp, ap->target))
1907 		return -EINVAL;
1908 	if (gfs2_rs_active(rs)) {
1909 		begin = rs->rs_rbm.rgd;
1910 	} else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
1911 		rs->rs_rbm.rgd = begin = ip->i_rgd;
1912 	} else {
1913 		rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1914 	}
1915 	if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
1916 		skip = gfs2_orlov_skip(ip);
1917 	if (rs->rs_rbm.rgd == NULL)
1918 		return -EBADSLT;
1919 
1920 	while (loops < 3) {
1921 		rg_locked = 1;
1922 
1923 		if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1924 			rg_locked = 0;
1925 			if (skip && skip--)
1926 				goto next_rgrp;
1927 			if (!gfs2_rs_active(rs) && (loops < 2) &&
1928 			     gfs2_rgrp_used_recently(rs, 1000) &&
1929 			     gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1930 				goto next_rgrp;
1931 			error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
1932 						   LM_ST_EXCLUSIVE, flags,
1933 						   &rs->rs_rgd_gh);
1934 			if (unlikely(error))
1935 				return error;
1936 			if (!gfs2_rs_active(rs) && (loops < 2) &&
1937 			    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1938 				goto skip_rgrp;
1939 			if (sdp->sd_args.ar_rgrplvb) {
1940 				error = update_rgrp_lvb(rs->rs_rbm.rgd);
1941 				if (unlikely(error)) {
1942 					gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1943 					return error;
1944 				}
1945 			}
1946 		}
1947 
1948 		/* Skip unuseable resource groups */
1949 		if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
1950 						 GFS2_RDF_ERROR)) ||
1951 		    (ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
1952 			goto skip_rgrp;
1953 
1954 		if (sdp->sd_args.ar_rgrplvb)
1955 			gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
1956 
1957 		/* Get a reservation if we don't already have one */
1958 		if (!gfs2_rs_active(rs))
1959 			rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
1960 
1961 		/* Skip rgrps when we can't get a reservation on first pass */
1962 		if (!gfs2_rs_active(rs) && (loops < 1))
1963 			goto check_rgrp;
1964 
1965 		/* If rgrp has enough free space, use it */
1966 		if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) {
1967 			ip->i_rgd = rs->rs_rbm.rgd;
1968 			return 0;
1969 		}
1970 
1971 check_rgrp:
1972 		/* Check for unlinked inodes which can be reclaimed */
1973 		if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
1974 			try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
1975 					ip->i_no_addr);
1976 skip_rgrp:
1977 		/* Drop reservation, if we couldn't use reserved rgrp */
1978 		if (gfs2_rs_active(rs))
1979 			gfs2_rs_deltree(rs);
1980 
1981 		/* Unlock rgrp if required */
1982 		if (!rg_locked)
1983 			gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1984 next_rgrp:
1985 		/* Find the next rgrp, and continue looking */
1986 		if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
1987 			continue;
1988 		if (skip)
1989 			continue;
1990 
1991 		/* If we've scanned all the rgrps, but found no free blocks
1992 		 * then this checks for some less likely conditions before
1993 		 * trying again.
1994 		 */
1995 		loops++;
1996 		/* Check that fs hasn't grown if writing to rindex */
1997 		if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
1998 			error = gfs2_ri_update(ip);
1999 			if (error)
2000 				return error;
2001 		}
2002 		/* Flushing the log may release space */
2003 		if (loops == 2)
2004 			gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
2005 	}
2006 
2007 	return -ENOSPC;
2008 }
2009 
2010 /**
2011  * gfs2_inplace_release - release an inplace reservation
2012  * @ip: the inode the reservation was taken out on
2013  *
2014  * Release a reservation made by gfs2_inplace_reserve().
2015  */
2016 
2017 void gfs2_inplace_release(struct gfs2_inode *ip)
2018 {
2019 	struct gfs2_blkreserv *rs = ip->i_res;
2020 
2021 	if (rs->rs_rgd_gh.gh_gl)
2022 		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2023 }
2024 
2025 /**
2026  * gfs2_get_block_type - Check a block in a RG is of given type
2027  * @rgd: the resource group holding the block
2028  * @block: the block number
2029  *
2030  * Returns: The block type (GFS2_BLKST_*)
2031  */
2032 
2033 static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
2034 {
2035 	struct gfs2_rbm rbm = { .rgd = rgd, };
2036 	int ret;
2037 
2038 	ret = gfs2_rbm_from_block(&rbm, block);
2039 	WARN_ON_ONCE(ret != 0);
2040 
2041 	return gfs2_testbit(&rbm);
2042 }
2043 
2044 
2045 /**
2046  * gfs2_alloc_extent - allocate an extent from a given bitmap
2047  * @rbm: the resource group information
2048  * @dinode: TRUE if the first block we allocate is for a dinode
2049  * @n: The extent length (value/result)
2050  *
2051  * Add the bitmap buffer to the transaction.
2052  * Set the found bits to @new_state to change block's allocation state.
2053  */
2054 static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
2055 			     unsigned int *n)
2056 {
2057 	struct gfs2_rbm pos = { .rgd = rbm->rgd, };
2058 	const unsigned int elen = *n;
2059 	u64 block;
2060 	int ret;
2061 
2062 	*n = 1;
2063 	block = gfs2_rbm_to_block(rbm);
2064 	gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
2065 	gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2066 	block++;
2067 	while (*n < elen) {
2068 		ret = gfs2_rbm_from_block(&pos, block);
2069 		if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
2070 			break;
2071 		gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
2072 		gfs2_setbit(&pos, true, GFS2_BLKST_USED);
2073 		(*n)++;
2074 		block++;
2075 	}
2076 }
2077 
2078 /**
2079  * rgblk_free - Change alloc state of given block(s)
2080  * @sdp: the filesystem
2081  * @bstart: the start of a run of blocks to free
2082  * @blen: the length of the block run (all must lie within ONE RG!)
2083  * @new_state: GFS2_BLKST_XXX the after-allocation block state
2084  *
2085  * Returns:  Resource group containing the block(s)
2086  */
2087 
2088 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2089 				     u32 blen, unsigned char new_state)
2090 {
2091 	struct gfs2_rbm rbm;
2092 	struct gfs2_bitmap *bi;
2093 
2094 	rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
2095 	if (!rbm.rgd) {
2096 		if (gfs2_consist(sdp))
2097 			fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
2098 		return NULL;
2099 	}
2100 
2101 	while (blen--) {
2102 		gfs2_rbm_from_block(&rbm, bstart);
2103 		bi = rbm_bi(&rbm);
2104 		bstart++;
2105 		if (!bi->bi_clone) {
2106 			bi->bi_clone = kmalloc(bi->bi_bh->b_size,
2107 					       GFP_NOFS | __GFP_NOFAIL);
2108 			memcpy(bi->bi_clone + bi->bi_offset,
2109 			       bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
2110 		}
2111 		gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
2112 		gfs2_setbit(&rbm, false, new_state);
2113 	}
2114 
2115 	return rbm.rgd;
2116 }
2117 
2118 /**
2119  * gfs2_rgrp_dump - print out an rgrp
2120  * @seq: The iterator
2121  * @gl: The glock in question
2122  *
2123  */
2124 
2125 void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
2126 {
2127 	struct gfs2_rgrpd *rgd = gl->gl_object;
2128 	struct gfs2_blkreserv *trs;
2129 	const struct rb_node *n;
2130 
2131 	if (rgd == NULL)
2132 		return;
2133 	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
2134 		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
2135 		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2136 		       rgd->rd_reserved, rgd->rd_extfail_pt);
2137 	spin_lock(&rgd->rd_rsspin);
2138 	for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2139 		trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2140 		dump_rs(seq, trs);
2141 	}
2142 	spin_unlock(&rgd->rd_rsspin);
2143 }
2144 
2145 static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2146 {
2147 	struct gfs2_sbd *sdp = rgd->rd_sbd;
2148 	fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
2149 		(unsigned long long)rgd->rd_addr);
2150 	fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2151 	gfs2_rgrp_dump(NULL, rgd->rd_gl);
2152 	rgd->rd_flags |= GFS2_RDF_ERROR;
2153 }
2154 
2155 /**
2156  * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2157  * @ip: The inode we have just allocated blocks for
2158  * @rbm: The start of the allocated blocks
2159  * @len: The extent length
2160  *
2161  * Adjusts a reservation after an allocation has taken place. If the
2162  * reservation does not match the allocation, or if it is now empty
2163  * then it is removed.
2164  */
2165 
2166 static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2167 				    const struct gfs2_rbm *rbm, unsigned len)
2168 {
2169 	struct gfs2_blkreserv *rs = ip->i_res;
2170 	struct gfs2_rgrpd *rgd = rbm->rgd;
2171 	unsigned rlen;
2172 	u64 block;
2173 	int ret;
2174 
2175 	spin_lock(&rgd->rd_rsspin);
2176 	if (gfs2_rs_active(rs)) {
2177 		if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2178 			block = gfs2_rbm_to_block(rbm);
2179 			ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2180 			rlen = min(rs->rs_free, len);
2181 			rs->rs_free -= rlen;
2182 			rgd->rd_reserved -= rlen;
2183 			trace_gfs2_rs(rs, TRACE_RS_CLAIM);
2184 			if (rs->rs_free && !ret)
2185 				goto out;
2186 		}
2187 		__rs_deltree(rs);
2188 	}
2189 out:
2190 	spin_unlock(&rgd->rd_rsspin);
2191 }
2192 
2193 /**
2194  * gfs2_set_alloc_start - Set starting point for block allocation
2195  * @rbm: The rbm which will be set to the required location
2196  * @ip: The gfs2 inode
2197  * @dinode: Flag to say if allocation includes a new inode
2198  *
2199  * This sets the starting point from the reservation if one is active
2200  * otherwise it falls back to guessing a start point based on the
2201  * inode's goal block or the last allocation point in the rgrp.
2202  */
2203 
2204 static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
2205 				 const struct gfs2_inode *ip, bool dinode)
2206 {
2207 	u64 goal;
2208 
2209 	if (gfs2_rs_active(ip->i_res)) {
2210 		*rbm = ip->i_res->rs_rbm;
2211 		return;
2212 	}
2213 
2214 	if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
2215 		goal = ip->i_goal;
2216 	else
2217 		goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
2218 
2219 	gfs2_rbm_from_block(rbm, goal);
2220 }
2221 
2222 /**
2223  * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
2224  * @ip: the inode to allocate the block for
2225  * @bn: Used to return the starting block number
2226  * @nblocks: requested number of blocks/extent length (value/result)
2227  * @dinode: 1 if we're allocating a dinode block, else 0
2228  * @generation: the generation number of the inode
2229  *
2230  * Returns: 0 or error
2231  */
2232 
2233 int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2234 		      bool dinode, u64 *generation)
2235 {
2236 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2237 	struct buffer_head *dibh;
2238 	struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
2239 	unsigned int ndata;
2240 	u64 block; /* block, within the file system scope */
2241 	int error;
2242 
2243 	gfs2_set_alloc_start(&rbm, ip, dinode);
2244 	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL);
2245 
2246 	if (error == -ENOSPC) {
2247 		gfs2_set_alloc_start(&rbm, ip, dinode);
2248 		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false,
2249 				      NULL);
2250 	}
2251 
2252 	/* Since all blocks are reserved in advance, this shouldn't happen */
2253 	if (error) {
2254 		fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
2255 			(unsigned long long)ip->i_no_addr, error, *nblocks,
2256 			test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2257 			rbm.rgd->rd_extfail_pt);
2258 		goto rgrp_error;
2259 	}
2260 
2261 	gfs2_alloc_extent(&rbm, dinode, nblocks);
2262 	block = gfs2_rbm_to_block(&rbm);
2263 	rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2264 	if (gfs2_rs_active(ip->i_res))
2265 		gfs2_adjust_reservation(ip, &rbm, *nblocks);
2266 	ndata = *nblocks;
2267 	if (dinode)
2268 		ndata--;
2269 
2270 	if (!dinode) {
2271 		ip->i_goal = block + ndata - 1;
2272 		error = gfs2_meta_inode_buffer(ip, &dibh);
2273 		if (error == 0) {
2274 			struct gfs2_dinode *di =
2275 				(struct gfs2_dinode *)dibh->b_data;
2276 			gfs2_trans_add_meta(ip->i_gl, dibh);
2277 			di->di_goal_meta = di->di_goal_data =
2278 				cpu_to_be64(ip->i_goal);
2279 			brelse(dibh);
2280 		}
2281 	}
2282 	if (rbm.rgd->rd_free < *nblocks) {
2283 		pr_warn("nblocks=%u\n", *nblocks);
2284 		goto rgrp_error;
2285 	}
2286 
2287 	rbm.rgd->rd_free -= *nblocks;
2288 	if (dinode) {
2289 		rbm.rgd->rd_dinodes++;
2290 		*generation = rbm.rgd->rd_igeneration++;
2291 		if (*generation == 0)
2292 			*generation = rbm.rgd->rd_igeneration++;
2293 	}
2294 
2295 	gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2296 	gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2297 	gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2298 
2299 	gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2300 	if (dinode)
2301 		gfs2_trans_add_unrevoke(sdp, block, *nblocks);
2302 
2303 	gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
2304 
2305 	rbm.rgd->rd_free_clone -= *nblocks;
2306 	trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2307 			       dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2308 	*bn = block;
2309 	return 0;
2310 
2311 rgrp_error:
2312 	gfs2_rgrp_error(rbm.rgd);
2313 	return -EIO;
2314 }
2315 
2316 /**
2317  * __gfs2_free_blocks - free a contiguous run of block(s)
2318  * @ip: the inode these blocks are being freed from
2319  * @bstart: first block of a run of contiguous blocks
2320  * @blen: the length of the block run
2321  * @meta: 1 if the blocks represent metadata
2322  *
2323  */
2324 
2325 void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2326 {
2327 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2328 	struct gfs2_rgrpd *rgd;
2329 
2330 	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
2331 	if (!rgd)
2332 		return;
2333 	trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2334 	rgd->rd_free += blen;
2335 	rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2336 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2337 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2338 	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2339 
2340 	/* Directories keep their data in the metadata address space */
2341 	if (meta || ip->i_depth)
2342 		gfs2_meta_wipe(ip, bstart, blen);
2343 }
2344 
2345 /**
2346  * gfs2_free_meta - free a contiguous run of data block(s)
2347  * @ip: the inode these blocks are being freed from
2348  * @bstart: first block of a run of contiguous blocks
2349  * @blen: the length of the block run
2350  *
2351  */
2352 
2353 void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
2354 {
2355 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2356 
2357 	__gfs2_free_blocks(ip, bstart, blen, 1);
2358 	gfs2_statfs_change(sdp, 0, +blen, 0);
2359 	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
2360 }
2361 
2362 void gfs2_unlink_di(struct inode *inode)
2363 {
2364 	struct gfs2_inode *ip = GFS2_I(inode);
2365 	struct gfs2_sbd *sdp = GFS2_SB(inode);
2366 	struct gfs2_rgrpd *rgd;
2367 	u64 blkno = ip->i_no_addr;
2368 
2369 	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
2370 	if (!rgd)
2371 		return;
2372 	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2373 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2374 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2375 	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2376 	update_rgrp_lvb_unlinked(rgd, 1);
2377 }
2378 
2379 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2380 {
2381 	struct gfs2_sbd *sdp = rgd->rd_sbd;
2382 	struct gfs2_rgrpd *tmp_rgd;
2383 
2384 	tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
2385 	if (!tmp_rgd)
2386 		return;
2387 	gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
2388 
2389 	if (!rgd->rd_dinodes)
2390 		gfs2_consist_rgrpd(rgd);
2391 	rgd->rd_dinodes--;
2392 	rgd->rd_free++;
2393 
2394 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2395 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2396 	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2397 	update_rgrp_lvb_unlinked(rgd, -1);
2398 
2399 	gfs2_statfs_change(sdp, 0, +1, -1);
2400 }
2401 
2402 
2403 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2404 {
2405 	gfs2_free_uninit_di(rgd, ip->i_no_addr);
2406 	trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2407 	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2408 	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
2409 }
2410 
2411 /**
2412  * gfs2_check_blk_type - Check the type of a block
2413  * @sdp: The superblock
2414  * @no_addr: The block number to check
2415  * @type: The block type we are looking for
2416  *
2417  * Returns: 0 if the block type matches the expected type
2418  *          -ESTALE if it doesn't match
2419  *          or -ve errno if something went wrong while checking
2420  */
2421 
2422 int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2423 {
2424 	struct gfs2_rgrpd *rgd;
2425 	struct gfs2_holder rgd_gh;
2426 	int error = -EINVAL;
2427 
2428 	rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
2429 	if (!rgd)
2430 		goto fail;
2431 
2432 	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2433 	if (error)
2434 		goto fail;
2435 
2436 	if (gfs2_get_block_type(rgd, no_addr) != type)
2437 		error = -ESTALE;
2438 
2439 	gfs2_glock_dq_uninit(&rgd_gh);
2440 fail:
2441 	return error;
2442 }
2443 
2444 /**
2445  * gfs2_rlist_add - add a RG to a list of RGs
2446  * @ip: the inode
2447  * @rlist: the list of resource groups
2448  * @block: the block
2449  *
2450  * Figure out what RG a block belongs to and add that RG to the list
2451  *
2452  * FIXME: Don't use NOFAIL
2453  *
2454  */
2455 
2456 void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
2457 		    u64 block)
2458 {
2459 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2460 	struct gfs2_rgrpd *rgd;
2461 	struct gfs2_rgrpd **tmp;
2462 	unsigned int new_space;
2463 	unsigned int x;
2464 
2465 	if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2466 		return;
2467 
2468 	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
2469 		rgd = ip->i_rgd;
2470 	else
2471 		rgd = gfs2_blk2rgrpd(sdp, block, 1);
2472 	if (!rgd) {
2473 		fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
2474 		return;
2475 	}
2476 	ip->i_rgd = rgd;
2477 
2478 	for (x = 0; x < rlist->rl_rgrps; x++)
2479 		if (rlist->rl_rgd[x] == rgd)
2480 			return;
2481 
2482 	if (rlist->rl_rgrps == rlist->rl_space) {
2483 		new_space = rlist->rl_space + 10;
2484 
2485 		tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
2486 			      GFP_NOFS | __GFP_NOFAIL);
2487 
2488 		if (rlist->rl_rgd) {
2489 			memcpy(tmp, rlist->rl_rgd,
2490 			       rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2491 			kfree(rlist->rl_rgd);
2492 		}
2493 
2494 		rlist->rl_space = new_space;
2495 		rlist->rl_rgd = tmp;
2496 	}
2497 
2498 	rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2499 }
2500 
2501 /**
2502  * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2503  *      and initialize an array of glock holders for them
2504  * @rlist: the list of resource groups
2505  * @state: the lock state to acquire the RG lock in
2506  *
2507  * FIXME: Don't use NOFAIL
2508  *
2509  */
2510 
2511 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
2512 {
2513 	unsigned int x;
2514 
2515 	rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
2516 				GFP_NOFS | __GFP_NOFAIL);
2517 	for (x = 0; x < rlist->rl_rgrps; x++)
2518 		gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
2519 				state, 0,
2520 				&rlist->rl_ghs[x]);
2521 }
2522 
2523 /**
2524  * gfs2_rlist_free - free a resource group list
2525  * @rlist: the list of resource groups
2526  *
2527  */
2528 
2529 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2530 {
2531 	unsigned int x;
2532 
2533 	kfree(rlist->rl_rgd);
2534 
2535 	if (rlist->rl_ghs) {
2536 		for (x = 0; x < rlist->rl_rgrps; x++)
2537 			gfs2_holder_uninit(&rlist->rl_ghs[x]);
2538 		kfree(rlist->rl_ghs);
2539 		rlist->rl_ghs = NULL;
2540 	}
2541 }
2542 
2543