xref: /openbmc/linux/fs/gfs2/rgrp.c (revision e1f7c9ee)
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9 
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 
12 #include <linux/slab.h>
13 #include <linux/spinlock.h>
14 #include <linux/completion.h>
15 #include <linux/buffer_head.h>
16 #include <linux/fs.h>
17 #include <linux/gfs2_ondisk.h>
18 #include <linux/prefetch.h>
19 #include <linux/blkdev.h>
20 #include <linux/rbtree.h>
21 #include <linux/random.h>
22 
23 #include "gfs2.h"
24 #include "incore.h"
25 #include "glock.h"
26 #include "glops.h"
27 #include "lops.h"
28 #include "meta_io.h"
29 #include "quota.h"
30 #include "rgrp.h"
31 #include "super.h"
32 #include "trans.h"
33 #include "util.h"
34 #include "log.h"
35 #include "inode.h"
36 #include "trace_gfs2.h"
37 
38 #define BFITNOENT ((u32)~0)
39 #define NO_BLOCK ((u64)~0)
40 
41 #if BITS_PER_LONG == 32
42 #define LBITMASK   (0x55555555UL)
43 #define LBITSKIP55 (0x55555555UL)
44 #define LBITSKIP00 (0x00000000UL)
45 #else
46 #define LBITMASK   (0x5555555555555555UL)
47 #define LBITSKIP55 (0x5555555555555555UL)
48 #define LBITSKIP00 (0x0000000000000000UL)
49 #endif
50 
51 /*
52  * These routines are used by the resource group routines (rgrp.c)
53  * to keep track of block allocation.  Each block is represented by two
54  * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
55  *
56  * 0 = Free
57  * 1 = Used (not metadata)
58  * 2 = Unlinked (still in use) inode
59  * 3 = Used (metadata)
60  */
61 
62 struct gfs2_extent {
63 	struct gfs2_rbm rbm;
64 	u32 len;
65 };
66 
67 static const char valid_change[16] = {
68 	        /* current */
69 	/* n */ 0, 1, 1, 1,
70 	/* e */ 1, 0, 0, 0,
71 	/* w */ 0, 0, 0, 1,
72 	        1, 0, 0, 0
73 };
74 
75 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
76 			 const struct gfs2_inode *ip, bool nowrap,
77 			 const struct gfs2_alloc_parms *ap);
78 
79 
80 /**
81  * gfs2_setbit - Set a bit in the bitmaps
82  * @rbm: The position of the bit to set
83  * @do_clone: Also set the clone bitmap, if it exists
84  * @new_state: the new state of the block
85  *
86  */
87 
88 static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
89 			       unsigned char new_state)
90 {
91 	unsigned char *byte1, *byte2, *end, cur_state;
92 	struct gfs2_bitmap *bi = rbm_bi(rbm);
93 	unsigned int buflen = bi->bi_len;
94 	const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
95 
96 	byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
97 	end = bi->bi_bh->b_data + bi->bi_offset + buflen;
98 
99 	BUG_ON(byte1 >= end);
100 
101 	cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
102 
103 	if (unlikely(!valid_change[new_state * 4 + cur_state])) {
104 		pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n",
105 			rbm->offset, cur_state, new_state);
106 		pr_warn("rgrp=0x%llx bi_start=0x%x\n",
107 			(unsigned long long)rbm->rgd->rd_addr, bi->bi_start);
108 		pr_warn("bi_offset=0x%x bi_len=0x%x\n",
109 			bi->bi_offset, bi->bi_len);
110 		dump_stack();
111 		gfs2_consist_rgrpd(rbm->rgd);
112 		return;
113 	}
114 	*byte1 ^= (cur_state ^ new_state) << bit;
115 
116 	if (do_clone && bi->bi_clone) {
117 		byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
118 		cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
119 		*byte2 ^= (cur_state ^ new_state) << bit;
120 	}
121 }
122 
123 /**
124  * gfs2_testbit - test a bit in the bitmaps
125  * @rbm: The bit to test
126  *
127  * Returns: The two bit block state of the requested bit
128  */
129 
130 static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
131 {
132 	struct gfs2_bitmap *bi = rbm_bi(rbm);
133 	const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset;
134 	const u8 *byte;
135 	unsigned int bit;
136 
137 	byte = buffer + (rbm->offset / GFS2_NBBY);
138 	bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
139 
140 	return (*byte >> bit) & GFS2_BIT_MASK;
141 }
142 
143 /**
144  * gfs2_bit_search
145  * @ptr: Pointer to bitmap data
146  * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
147  * @state: The state we are searching for
148  *
149  * We xor the bitmap data with a patter which is the bitwise opposite
150  * of what we are looking for, this gives rise to a pattern of ones
151  * wherever there is a match. Since we have two bits per entry, we
152  * take this pattern, shift it down by one place and then and it with
153  * the original. All the even bit positions (0,2,4, etc) then represent
154  * successful matches, so we mask with 0x55555..... to remove the unwanted
155  * odd bit positions.
156  *
157  * This allows searching of a whole u64 at once (32 blocks) with a
158  * single test (on 64 bit arches).
159  */
160 
161 static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
162 {
163 	u64 tmp;
164 	static const u64 search[] = {
165 		[0] = 0xffffffffffffffffULL,
166 		[1] = 0xaaaaaaaaaaaaaaaaULL,
167 		[2] = 0x5555555555555555ULL,
168 		[3] = 0x0000000000000000ULL,
169 	};
170 	tmp = le64_to_cpu(*ptr) ^ search[state];
171 	tmp &= (tmp >> 1);
172 	tmp &= mask;
173 	return tmp;
174 }
175 
176 /**
177  * rs_cmp - multi-block reservation range compare
178  * @blk: absolute file system block number of the new reservation
179  * @len: number of blocks in the new reservation
180  * @rs: existing reservation to compare against
181  *
182  * returns: 1 if the block range is beyond the reach of the reservation
183  *         -1 if the block range is before the start of the reservation
184  *          0 if the block range overlaps with the reservation
185  */
186 static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
187 {
188 	u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
189 
190 	if (blk >= startblk + rs->rs_free)
191 		return 1;
192 	if (blk + len - 1 < startblk)
193 		return -1;
194 	return 0;
195 }
196 
197 /**
198  * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
199  *       a block in a given allocation state.
200  * @buf: the buffer that holds the bitmaps
201  * @len: the length (in bytes) of the buffer
202  * @goal: start search at this block's bit-pair (within @buffer)
203  * @state: GFS2_BLKST_XXX the state of the block we're looking for.
204  *
205  * Scope of @goal and returned block number is only within this bitmap buffer,
206  * not entire rgrp or filesystem.  @buffer will be offset from the actual
207  * beginning of a bitmap block buffer, skipping any header structures, but
208  * headers are always a multiple of 64 bits long so that the buffer is
209  * always aligned to a 64 bit boundary.
210  *
211  * The size of the buffer is in bytes, but is it assumed that it is
212  * always ok to read a complete multiple of 64 bits at the end
213  * of the block in case the end is no aligned to a natural boundary.
214  *
215  * Return: the block number (bitmap buffer scope) that was found
216  */
217 
218 static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
219 		       u32 goal, u8 state)
220 {
221 	u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
222 	const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
223 	const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
224 	u64 tmp;
225 	u64 mask = 0x5555555555555555ULL;
226 	u32 bit;
227 
228 	/* Mask off bits we don't care about at the start of the search */
229 	mask <<= spoint;
230 	tmp = gfs2_bit_search(ptr, mask, state);
231 	ptr++;
232 	while(tmp == 0 && ptr < end) {
233 		tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
234 		ptr++;
235 	}
236 	/* Mask off any bits which are more than len bytes from the start */
237 	if (ptr == end && (len & (sizeof(u64) - 1)))
238 		tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
239 	/* Didn't find anything, so return */
240 	if (tmp == 0)
241 		return BFITNOENT;
242 	ptr--;
243 	bit = __ffs64(tmp);
244 	bit /= 2;	/* two bits per entry in the bitmap */
245 	return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
246 }
247 
248 /**
249  * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
250  * @rbm: The rbm with rgd already set correctly
251  * @block: The block number (filesystem relative)
252  *
253  * This sets the bi and offset members of an rbm based on a
254  * resource group and a filesystem relative block number. The
255  * resource group must be set in the rbm on entry, the bi and
256  * offset members will be set by this function.
257  *
258  * Returns: 0 on success, or an error code
259  */
260 
261 static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
262 {
263 	u64 rblock = block - rbm->rgd->rd_data0;
264 
265 	if (WARN_ON_ONCE(rblock > UINT_MAX))
266 		return -EINVAL;
267 	if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
268 		return -E2BIG;
269 
270 	rbm->bii = 0;
271 	rbm->offset = (u32)(rblock);
272 	/* Check if the block is within the first block */
273 	if (rbm->offset < rbm_bi(rbm)->bi_blocks)
274 		return 0;
275 
276 	/* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
277 	rbm->offset += (sizeof(struct gfs2_rgrp) -
278 			sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
279 	rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
280 	rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
281 	return 0;
282 }
283 
284 /**
285  * gfs2_rbm_incr - increment an rbm structure
286  * @rbm: The rbm with rgd already set correctly
287  *
288  * This function takes an existing rbm structure and increments it to the next
289  * viable block offset.
290  *
291  * Returns: If incrementing the offset would cause the rbm to go past the
292  *          end of the rgrp, true is returned, otherwise false.
293  *
294  */
295 
296 static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
297 {
298 	if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
299 		rbm->offset++;
300 		return false;
301 	}
302 	if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
303 		return true;
304 
305 	rbm->offset = 0;
306 	rbm->bii++;
307 	return false;
308 }
309 
310 /**
311  * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
312  * @rbm: Position to search (value/result)
313  * @n_unaligned: Number of unaligned blocks to check
314  * @len: Decremented for each block found (terminate on zero)
315  *
316  * Returns: true if a non-free block is encountered
317  */
318 
319 static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
320 {
321 	u32 n;
322 	u8 res;
323 
324 	for (n = 0; n < n_unaligned; n++) {
325 		res = gfs2_testbit(rbm);
326 		if (res != GFS2_BLKST_FREE)
327 			return true;
328 		(*len)--;
329 		if (*len == 0)
330 			return true;
331 		if (gfs2_rbm_incr(rbm))
332 			return true;
333 	}
334 
335 	return false;
336 }
337 
338 /**
339  * gfs2_free_extlen - Return extent length of free blocks
340  * @rrbm: Starting position
341  * @len: Max length to check
342  *
343  * Starting at the block specified by the rbm, see how many free blocks
344  * there are, not reading more than len blocks ahead. This can be done
345  * using memchr_inv when the blocks are byte aligned, but has to be done
346  * on a block by block basis in case of unaligned blocks. Also this
347  * function can cope with bitmap boundaries (although it must stop on
348  * a resource group boundary)
349  *
350  * Returns: Number of free blocks in the extent
351  */
352 
353 static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
354 {
355 	struct gfs2_rbm rbm = *rrbm;
356 	u32 n_unaligned = rbm.offset & 3;
357 	u32 size = len;
358 	u32 bytes;
359 	u32 chunk_size;
360 	u8 *ptr, *start, *end;
361 	u64 block;
362 	struct gfs2_bitmap *bi;
363 
364 	if (n_unaligned &&
365 	    gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
366 		goto out;
367 
368 	n_unaligned = len & 3;
369 	/* Start is now byte aligned */
370 	while (len > 3) {
371 		bi = rbm_bi(&rbm);
372 		start = bi->bi_bh->b_data;
373 		if (bi->bi_clone)
374 			start = bi->bi_clone;
375 		end = start + bi->bi_bh->b_size;
376 		start += bi->bi_offset;
377 		BUG_ON(rbm.offset & 3);
378 		start += (rbm.offset / GFS2_NBBY);
379 		bytes = min_t(u32, len / GFS2_NBBY, (end - start));
380 		ptr = memchr_inv(start, 0, bytes);
381 		chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
382 		chunk_size *= GFS2_NBBY;
383 		BUG_ON(len < chunk_size);
384 		len -= chunk_size;
385 		block = gfs2_rbm_to_block(&rbm);
386 		if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
387 			n_unaligned = 0;
388 			break;
389 		}
390 		if (ptr) {
391 			n_unaligned = 3;
392 			break;
393 		}
394 		n_unaligned = len & 3;
395 	}
396 
397 	/* Deal with any bits left over at the end */
398 	if (n_unaligned)
399 		gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
400 out:
401 	return size - len;
402 }
403 
404 /**
405  * gfs2_bitcount - count the number of bits in a certain state
406  * @rgd: the resource group descriptor
407  * @buffer: the buffer that holds the bitmaps
408  * @buflen: the length (in bytes) of the buffer
409  * @state: the state of the block we're looking for
410  *
411  * Returns: The number of bits
412  */
413 
414 static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
415 			 unsigned int buflen, u8 state)
416 {
417 	const u8 *byte = buffer;
418 	const u8 *end = buffer + buflen;
419 	const u8 state1 = state << 2;
420 	const u8 state2 = state << 4;
421 	const u8 state3 = state << 6;
422 	u32 count = 0;
423 
424 	for (; byte < end; byte++) {
425 		if (((*byte) & 0x03) == state)
426 			count++;
427 		if (((*byte) & 0x0C) == state1)
428 			count++;
429 		if (((*byte) & 0x30) == state2)
430 			count++;
431 		if (((*byte) & 0xC0) == state3)
432 			count++;
433 	}
434 
435 	return count;
436 }
437 
438 /**
439  * gfs2_rgrp_verify - Verify that a resource group is consistent
440  * @rgd: the rgrp
441  *
442  */
443 
444 void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
445 {
446 	struct gfs2_sbd *sdp = rgd->rd_sbd;
447 	struct gfs2_bitmap *bi = NULL;
448 	u32 length = rgd->rd_length;
449 	u32 count[4], tmp;
450 	int buf, x;
451 
452 	memset(count, 0, 4 * sizeof(u32));
453 
454 	/* Count # blocks in each of 4 possible allocation states */
455 	for (buf = 0; buf < length; buf++) {
456 		bi = rgd->rd_bits + buf;
457 		for (x = 0; x < 4; x++)
458 			count[x] += gfs2_bitcount(rgd,
459 						  bi->bi_bh->b_data +
460 						  bi->bi_offset,
461 						  bi->bi_len, x);
462 	}
463 
464 	if (count[0] != rgd->rd_free) {
465 		if (gfs2_consist_rgrpd(rgd))
466 			fs_err(sdp, "free data mismatch:  %u != %u\n",
467 			       count[0], rgd->rd_free);
468 		return;
469 	}
470 
471 	tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
472 	if (count[1] != tmp) {
473 		if (gfs2_consist_rgrpd(rgd))
474 			fs_err(sdp, "used data mismatch:  %u != %u\n",
475 			       count[1], tmp);
476 		return;
477 	}
478 
479 	if (count[2] + count[3] != rgd->rd_dinodes) {
480 		if (gfs2_consist_rgrpd(rgd))
481 			fs_err(sdp, "used metadata mismatch:  %u != %u\n",
482 			       count[2] + count[3], rgd->rd_dinodes);
483 		return;
484 	}
485 }
486 
487 static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
488 {
489 	u64 first = rgd->rd_data0;
490 	u64 last = first + rgd->rd_data;
491 	return first <= block && block < last;
492 }
493 
494 /**
495  * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
496  * @sdp: The GFS2 superblock
497  * @blk: The data block number
498  * @exact: True if this needs to be an exact match
499  *
500  * Returns: The resource group, or NULL if not found
501  */
502 
503 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
504 {
505 	struct rb_node *n, *next;
506 	struct gfs2_rgrpd *cur;
507 
508 	spin_lock(&sdp->sd_rindex_spin);
509 	n = sdp->sd_rindex_tree.rb_node;
510 	while (n) {
511 		cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
512 		next = NULL;
513 		if (blk < cur->rd_addr)
514 			next = n->rb_left;
515 		else if (blk >= cur->rd_data0 + cur->rd_data)
516 			next = n->rb_right;
517 		if (next == NULL) {
518 			spin_unlock(&sdp->sd_rindex_spin);
519 			if (exact) {
520 				if (blk < cur->rd_addr)
521 					return NULL;
522 				if (blk >= cur->rd_data0 + cur->rd_data)
523 					return NULL;
524 			}
525 			return cur;
526 		}
527 		n = next;
528 	}
529 	spin_unlock(&sdp->sd_rindex_spin);
530 
531 	return NULL;
532 }
533 
534 /**
535  * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
536  * @sdp: The GFS2 superblock
537  *
538  * Returns: The first rgrp in the filesystem
539  */
540 
541 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
542 {
543 	const struct rb_node *n;
544 	struct gfs2_rgrpd *rgd;
545 
546 	spin_lock(&sdp->sd_rindex_spin);
547 	n = rb_first(&sdp->sd_rindex_tree);
548 	rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
549 	spin_unlock(&sdp->sd_rindex_spin);
550 
551 	return rgd;
552 }
553 
554 /**
555  * gfs2_rgrpd_get_next - get the next RG
556  * @rgd: the resource group descriptor
557  *
558  * Returns: The next rgrp
559  */
560 
561 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
562 {
563 	struct gfs2_sbd *sdp = rgd->rd_sbd;
564 	const struct rb_node *n;
565 
566 	spin_lock(&sdp->sd_rindex_spin);
567 	n = rb_next(&rgd->rd_node);
568 	if (n == NULL)
569 		n = rb_first(&sdp->sd_rindex_tree);
570 
571 	if (unlikely(&rgd->rd_node == n)) {
572 		spin_unlock(&sdp->sd_rindex_spin);
573 		return NULL;
574 	}
575 	rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
576 	spin_unlock(&sdp->sd_rindex_spin);
577 	return rgd;
578 }
579 
580 void check_and_update_goal(struct gfs2_inode *ip)
581 {
582 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
583 	if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL)
584 		ip->i_goal = ip->i_no_addr;
585 }
586 
587 void gfs2_free_clones(struct gfs2_rgrpd *rgd)
588 {
589 	int x;
590 
591 	for (x = 0; x < rgd->rd_length; x++) {
592 		struct gfs2_bitmap *bi = rgd->rd_bits + x;
593 		kfree(bi->bi_clone);
594 		bi->bi_clone = NULL;
595 	}
596 }
597 
598 /**
599  * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
600  * @ip: the inode for this reservation
601  */
602 int gfs2_rs_alloc(struct gfs2_inode *ip)
603 {
604 	int error = 0;
605 
606 	down_write(&ip->i_rw_mutex);
607 	if (ip->i_res)
608 		goto out;
609 
610 	ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
611 	if (!ip->i_res) {
612 		error = -ENOMEM;
613 		goto out;
614 	}
615 
616 	RB_CLEAR_NODE(&ip->i_res->rs_node);
617 out:
618 	up_write(&ip->i_rw_mutex);
619 	return error;
620 }
621 
622 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
623 {
624 	gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
625 		       (unsigned long long)rs->rs_inum,
626 		       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
627 		       rs->rs_rbm.offset, rs->rs_free);
628 }
629 
630 /**
631  * __rs_deltree - remove a multi-block reservation from the rgd tree
632  * @rs: The reservation to remove
633  *
634  */
635 static void __rs_deltree(struct gfs2_blkreserv *rs)
636 {
637 	struct gfs2_rgrpd *rgd;
638 
639 	if (!gfs2_rs_active(rs))
640 		return;
641 
642 	rgd = rs->rs_rbm.rgd;
643 	trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
644 	rb_erase(&rs->rs_node, &rgd->rd_rstree);
645 	RB_CLEAR_NODE(&rs->rs_node);
646 
647 	if (rs->rs_free) {
648 		struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm);
649 
650 		/* return reserved blocks to the rgrp */
651 		BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
652 		rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
653 		/* The rgrp extent failure point is likely not to increase;
654 		   it will only do so if the freed blocks are somehow
655 		   contiguous with a span of free blocks that follows. Still,
656 		   it will force the number to be recalculated later. */
657 		rgd->rd_extfail_pt += rs->rs_free;
658 		rs->rs_free = 0;
659 		clear_bit(GBF_FULL, &bi->bi_flags);
660 	}
661 }
662 
663 /**
664  * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
665  * @rs: The reservation to remove
666  *
667  */
668 void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
669 {
670 	struct gfs2_rgrpd *rgd;
671 
672 	rgd = rs->rs_rbm.rgd;
673 	if (rgd) {
674 		spin_lock(&rgd->rd_rsspin);
675 		__rs_deltree(rs);
676 		spin_unlock(&rgd->rd_rsspin);
677 	}
678 }
679 
680 /**
681  * gfs2_rs_delete - delete a multi-block reservation
682  * @ip: The inode for this reservation
683  * @wcount: The inode's write count, or NULL
684  *
685  */
686 void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
687 {
688 	down_write(&ip->i_rw_mutex);
689 	if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) {
690 		gfs2_rs_deltree(ip->i_res);
691 		BUG_ON(ip->i_res->rs_free);
692 		kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
693 		ip->i_res = NULL;
694 	}
695 	up_write(&ip->i_rw_mutex);
696 }
697 
698 /**
699  * return_all_reservations - return all reserved blocks back to the rgrp.
700  * @rgd: the rgrp that needs its space back
701  *
702  * We previously reserved a bunch of blocks for allocation. Now we need to
703  * give them back. This leave the reservation structures in tact, but removes
704  * all of their corresponding "no-fly zones".
705  */
706 static void return_all_reservations(struct gfs2_rgrpd *rgd)
707 {
708 	struct rb_node *n;
709 	struct gfs2_blkreserv *rs;
710 
711 	spin_lock(&rgd->rd_rsspin);
712 	while ((n = rb_first(&rgd->rd_rstree))) {
713 		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
714 		__rs_deltree(rs);
715 	}
716 	spin_unlock(&rgd->rd_rsspin);
717 }
718 
719 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
720 {
721 	struct rb_node *n;
722 	struct gfs2_rgrpd *rgd;
723 	struct gfs2_glock *gl;
724 
725 	while ((n = rb_first(&sdp->sd_rindex_tree))) {
726 		rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
727 		gl = rgd->rd_gl;
728 
729 		rb_erase(n, &sdp->sd_rindex_tree);
730 
731 		if (gl) {
732 			spin_lock(&gl->gl_spin);
733 			gl->gl_object = NULL;
734 			spin_unlock(&gl->gl_spin);
735 			gfs2_glock_add_to_lru(gl);
736 			gfs2_glock_put(gl);
737 		}
738 
739 		gfs2_free_clones(rgd);
740 		kfree(rgd->rd_bits);
741 		return_all_reservations(rgd);
742 		kmem_cache_free(gfs2_rgrpd_cachep, rgd);
743 	}
744 }
745 
746 static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
747 {
748 	pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
749 	pr_info("ri_length = %u\n", rgd->rd_length);
750 	pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
751 	pr_info("ri_data = %u\n", rgd->rd_data);
752 	pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes);
753 }
754 
755 /**
756  * gfs2_compute_bitstructs - Compute the bitmap sizes
757  * @rgd: The resource group descriptor
758  *
759  * Calculates bitmap descriptors, one for each block that contains bitmap data
760  *
761  * Returns: errno
762  */
763 
764 static int compute_bitstructs(struct gfs2_rgrpd *rgd)
765 {
766 	struct gfs2_sbd *sdp = rgd->rd_sbd;
767 	struct gfs2_bitmap *bi;
768 	u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
769 	u32 bytes_left, bytes;
770 	int x;
771 
772 	if (!length)
773 		return -EINVAL;
774 
775 	rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
776 	if (!rgd->rd_bits)
777 		return -ENOMEM;
778 
779 	bytes_left = rgd->rd_bitbytes;
780 
781 	for (x = 0; x < length; x++) {
782 		bi = rgd->rd_bits + x;
783 
784 		bi->bi_flags = 0;
785 		/* small rgrp; bitmap stored completely in header block */
786 		if (length == 1) {
787 			bytes = bytes_left;
788 			bi->bi_offset = sizeof(struct gfs2_rgrp);
789 			bi->bi_start = 0;
790 			bi->bi_len = bytes;
791 			bi->bi_blocks = bytes * GFS2_NBBY;
792 		/* header block */
793 		} else if (x == 0) {
794 			bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
795 			bi->bi_offset = sizeof(struct gfs2_rgrp);
796 			bi->bi_start = 0;
797 			bi->bi_len = bytes;
798 			bi->bi_blocks = bytes * GFS2_NBBY;
799 		/* last block */
800 		} else if (x + 1 == length) {
801 			bytes = bytes_left;
802 			bi->bi_offset = sizeof(struct gfs2_meta_header);
803 			bi->bi_start = rgd->rd_bitbytes - bytes_left;
804 			bi->bi_len = bytes;
805 			bi->bi_blocks = bytes * GFS2_NBBY;
806 		/* other blocks */
807 		} else {
808 			bytes = sdp->sd_sb.sb_bsize -
809 				sizeof(struct gfs2_meta_header);
810 			bi->bi_offset = sizeof(struct gfs2_meta_header);
811 			bi->bi_start = rgd->rd_bitbytes - bytes_left;
812 			bi->bi_len = bytes;
813 			bi->bi_blocks = bytes * GFS2_NBBY;
814 		}
815 
816 		bytes_left -= bytes;
817 	}
818 
819 	if (bytes_left) {
820 		gfs2_consist_rgrpd(rgd);
821 		return -EIO;
822 	}
823 	bi = rgd->rd_bits + (length - 1);
824 	if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
825 		if (gfs2_consist_rgrpd(rgd)) {
826 			gfs2_rindex_print(rgd);
827 			fs_err(sdp, "start=%u len=%u offset=%u\n",
828 			       bi->bi_start, bi->bi_len, bi->bi_offset);
829 		}
830 		return -EIO;
831 	}
832 
833 	return 0;
834 }
835 
836 /**
837  * gfs2_ri_total - Total up the file system space, according to the rindex.
838  * @sdp: the filesystem
839  *
840  */
841 u64 gfs2_ri_total(struct gfs2_sbd *sdp)
842 {
843 	u64 total_data = 0;
844 	struct inode *inode = sdp->sd_rindex;
845 	struct gfs2_inode *ip = GFS2_I(inode);
846 	char buf[sizeof(struct gfs2_rindex)];
847 	int error, rgrps;
848 
849 	for (rgrps = 0;; rgrps++) {
850 		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
851 
852 		if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
853 			break;
854 		error = gfs2_internal_read(ip, buf, &pos,
855 					   sizeof(struct gfs2_rindex));
856 		if (error != sizeof(struct gfs2_rindex))
857 			break;
858 		total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
859 	}
860 	return total_data;
861 }
862 
863 static int rgd_insert(struct gfs2_rgrpd *rgd)
864 {
865 	struct gfs2_sbd *sdp = rgd->rd_sbd;
866 	struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
867 
868 	/* Figure out where to put new node */
869 	while (*newn) {
870 		struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
871 						  rd_node);
872 
873 		parent = *newn;
874 		if (rgd->rd_addr < cur->rd_addr)
875 			newn = &((*newn)->rb_left);
876 		else if (rgd->rd_addr > cur->rd_addr)
877 			newn = &((*newn)->rb_right);
878 		else
879 			return -EEXIST;
880 	}
881 
882 	rb_link_node(&rgd->rd_node, parent, newn);
883 	rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
884 	sdp->sd_rgrps++;
885 	return 0;
886 }
887 
888 /**
889  * read_rindex_entry - Pull in a new resource index entry from the disk
890  * @ip: Pointer to the rindex inode
891  *
892  * Returns: 0 on success, > 0 on EOF, error code otherwise
893  */
894 
895 static int read_rindex_entry(struct gfs2_inode *ip)
896 {
897 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
898 	const unsigned bsize = sdp->sd_sb.sb_bsize;
899 	loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
900 	struct gfs2_rindex buf;
901 	int error;
902 	struct gfs2_rgrpd *rgd;
903 
904 	if (pos >= i_size_read(&ip->i_inode))
905 		return 1;
906 
907 	error = gfs2_internal_read(ip, (char *)&buf, &pos,
908 				   sizeof(struct gfs2_rindex));
909 
910 	if (error != sizeof(struct gfs2_rindex))
911 		return (error == 0) ? 1 : error;
912 
913 	rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
914 	error = -ENOMEM;
915 	if (!rgd)
916 		return error;
917 
918 	rgd->rd_sbd = sdp;
919 	rgd->rd_addr = be64_to_cpu(buf.ri_addr);
920 	rgd->rd_length = be32_to_cpu(buf.ri_length);
921 	rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
922 	rgd->rd_data = be32_to_cpu(buf.ri_data);
923 	rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
924 	spin_lock_init(&rgd->rd_rsspin);
925 
926 	error = compute_bitstructs(rgd);
927 	if (error)
928 		goto fail;
929 
930 	error = gfs2_glock_get(sdp, rgd->rd_addr,
931 			       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
932 	if (error)
933 		goto fail;
934 
935 	rgd->rd_gl->gl_object = rgd;
936 	rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize;
937 	rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1;
938 	rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
939 	rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
940 	if (rgd->rd_data > sdp->sd_max_rg_data)
941 		sdp->sd_max_rg_data = rgd->rd_data;
942 	spin_lock(&sdp->sd_rindex_spin);
943 	error = rgd_insert(rgd);
944 	spin_unlock(&sdp->sd_rindex_spin);
945 	if (!error)
946 		return 0;
947 
948 	error = 0; /* someone else read in the rgrp; free it and ignore it */
949 	gfs2_glock_put(rgd->rd_gl);
950 
951 fail:
952 	kfree(rgd->rd_bits);
953 	kmem_cache_free(gfs2_rgrpd_cachep, rgd);
954 	return error;
955 }
956 
957 /**
958  * gfs2_ri_update - Pull in a new resource index from the disk
959  * @ip: pointer to the rindex inode
960  *
961  * Returns: 0 on successful update, error code otherwise
962  */
963 
964 static int gfs2_ri_update(struct gfs2_inode *ip)
965 {
966 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
967 	int error;
968 
969 	do {
970 		error = read_rindex_entry(ip);
971 	} while (error == 0);
972 
973 	if (error < 0)
974 		return error;
975 
976 	sdp->sd_rindex_uptodate = 1;
977 	return 0;
978 }
979 
980 /**
981  * gfs2_rindex_update - Update the rindex if required
982  * @sdp: The GFS2 superblock
983  *
984  * We grab a lock on the rindex inode to make sure that it doesn't
985  * change whilst we are performing an operation. We keep this lock
986  * for quite long periods of time compared to other locks. This
987  * doesn't matter, since it is shared and it is very, very rarely
988  * accessed in the exclusive mode (i.e. only when expanding the filesystem).
989  *
990  * This makes sure that we're using the latest copy of the resource index
991  * special file, which might have been updated if someone expanded the
992  * filesystem (via gfs2_grow utility), which adds new resource groups.
993  *
994  * Returns: 0 on succeess, error code otherwise
995  */
996 
997 int gfs2_rindex_update(struct gfs2_sbd *sdp)
998 {
999 	struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
1000 	struct gfs2_glock *gl = ip->i_gl;
1001 	struct gfs2_holder ri_gh;
1002 	int error = 0;
1003 	int unlock_required = 0;
1004 
1005 	/* Read new copy from disk if we don't have the latest */
1006 	if (!sdp->sd_rindex_uptodate) {
1007 		if (!gfs2_glock_is_locked_by_me(gl)) {
1008 			error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
1009 			if (error)
1010 				return error;
1011 			unlock_required = 1;
1012 		}
1013 		if (!sdp->sd_rindex_uptodate)
1014 			error = gfs2_ri_update(ip);
1015 		if (unlock_required)
1016 			gfs2_glock_dq_uninit(&ri_gh);
1017 	}
1018 
1019 	return error;
1020 }
1021 
1022 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
1023 {
1024 	const struct gfs2_rgrp *str = buf;
1025 	u32 rg_flags;
1026 
1027 	rg_flags = be32_to_cpu(str->rg_flags);
1028 	rg_flags &= ~GFS2_RDF_MASK;
1029 	rgd->rd_flags &= GFS2_RDF_MASK;
1030 	rgd->rd_flags |= rg_flags;
1031 	rgd->rd_free = be32_to_cpu(str->rg_free);
1032 	rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
1033 	rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
1034 }
1035 
1036 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
1037 {
1038 	struct gfs2_rgrp *str = buf;
1039 
1040 	str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
1041 	str->rg_free = cpu_to_be32(rgd->rd_free);
1042 	str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
1043 	str->__pad = cpu_to_be32(0);
1044 	str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
1045 	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
1046 }
1047 
1048 static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1049 {
1050 	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1051 	struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
1052 
1053 	if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
1054 	    rgl->rl_dinodes != str->rg_dinodes ||
1055 	    rgl->rl_igeneration != str->rg_igeneration)
1056 		return 0;
1057 	return 1;
1058 }
1059 
1060 static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1061 {
1062 	const struct gfs2_rgrp *str = buf;
1063 
1064 	rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1065 	rgl->rl_flags = str->rg_flags;
1066 	rgl->rl_free = str->rg_free;
1067 	rgl->rl_dinodes = str->rg_dinodes;
1068 	rgl->rl_igeneration = str->rg_igeneration;
1069 	rgl->__pad = 0UL;
1070 }
1071 
1072 static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
1073 {
1074 	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1075 	u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
1076 	rgl->rl_unlinked = cpu_to_be32(unlinked);
1077 }
1078 
1079 static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1080 {
1081 	struct gfs2_bitmap *bi;
1082 	const u32 length = rgd->rd_length;
1083 	const u8 *buffer = NULL;
1084 	u32 i, goal, count = 0;
1085 
1086 	for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1087 		goal = 0;
1088 		buffer = bi->bi_bh->b_data + bi->bi_offset;
1089 		WARN_ON(!buffer_uptodate(bi->bi_bh));
1090 		while (goal < bi->bi_len * GFS2_NBBY) {
1091 			goal = gfs2_bitfit(buffer, bi->bi_len, goal,
1092 					   GFS2_BLKST_UNLINKED);
1093 			if (goal == BFITNOENT)
1094 				break;
1095 			count++;
1096 			goal++;
1097 		}
1098 	}
1099 
1100 	return count;
1101 }
1102 
1103 
1104 /**
1105  * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1106  * @rgd: the struct gfs2_rgrpd describing the RG to read in
1107  *
1108  * Read in all of a Resource Group's header and bitmap blocks.
1109  * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
1110  *
1111  * Returns: errno
1112  */
1113 
1114 static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
1115 {
1116 	struct gfs2_sbd *sdp = rgd->rd_sbd;
1117 	struct gfs2_glock *gl = rgd->rd_gl;
1118 	unsigned int length = rgd->rd_length;
1119 	struct gfs2_bitmap *bi;
1120 	unsigned int x, y;
1121 	int error;
1122 
1123 	if (rgd->rd_bits[0].bi_bh != NULL)
1124 		return 0;
1125 
1126 	for (x = 0; x < length; x++) {
1127 		bi = rgd->rd_bits + x;
1128 		error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
1129 		if (error)
1130 			goto fail;
1131 	}
1132 
1133 	for (y = length; y--;) {
1134 		bi = rgd->rd_bits + y;
1135 		error = gfs2_meta_wait(sdp, bi->bi_bh);
1136 		if (error)
1137 			goto fail;
1138 		if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
1139 					      GFS2_METATYPE_RG)) {
1140 			error = -EIO;
1141 			goto fail;
1142 		}
1143 	}
1144 
1145 	if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
1146 		for (x = 0; x < length; x++)
1147 			clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
1148 		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1149 		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1150 		rgd->rd_free_clone = rgd->rd_free;
1151 		/* max out the rgrp allocation failure point */
1152 		rgd->rd_extfail_pt = rgd->rd_free;
1153 	}
1154 	if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1155 		rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1156 		gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1157 				     rgd->rd_bits[0].bi_bh->b_data);
1158 	}
1159 	else if (sdp->sd_args.ar_rgrplvb) {
1160 		if (!gfs2_rgrp_lvb_valid(rgd)){
1161 			gfs2_consist_rgrpd(rgd);
1162 			error = -EIO;
1163 			goto fail;
1164 		}
1165 		if (rgd->rd_rgl->rl_unlinked == 0)
1166 			rgd->rd_flags &= ~GFS2_RDF_CHECK;
1167 	}
1168 	return 0;
1169 
1170 fail:
1171 	while (x--) {
1172 		bi = rgd->rd_bits + x;
1173 		brelse(bi->bi_bh);
1174 		bi->bi_bh = NULL;
1175 		gfs2_assert_warn(sdp, !bi->bi_clone);
1176 	}
1177 
1178 	return error;
1179 }
1180 
1181 static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
1182 {
1183 	u32 rl_flags;
1184 
1185 	if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1186 		return 0;
1187 
1188 	if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1189 		return gfs2_rgrp_bh_get(rgd);
1190 
1191 	rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1192 	rl_flags &= ~GFS2_RDF_MASK;
1193 	rgd->rd_flags &= GFS2_RDF_MASK;
1194 	rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1195 	if (rgd->rd_rgl->rl_unlinked == 0)
1196 		rgd->rd_flags &= ~GFS2_RDF_CHECK;
1197 	rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1198 	rgd->rd_free_clone = rgd->rd_free;
1199 	rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1200 	rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1201 	return 0;
1202 }
1203 
1204 int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1205 {
1206 	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1207 	struct gfs2_sbd *sdp = rgd->rd_sbd;
1208 
1209 	if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1210 		return 0;
1211 	return gfs2_rgrp_bh_get(rgd);
1212 }
1213 
1214 /**
1215  * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1216  * @gh: The glock holder for the resource group
1217  *
1218  */
1219 
1220 void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
1221 {
1222 	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1223 	int x, length = rgd->rd_length;
1224 
1225 	for (x = 0; x < length; x++) {
1226 		struct gfs2_bitmap *bi = rgd->rd_bits + x;
1227 		if (bi->bi_bh) {
1228 			brelse(bi->bi_bh);
1229 			bi->bi_bh = NULL;
1230 		}
1231 	}
1232 
1233 }
1234 
1235 int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1236 			     struct buffer_head *bh,
1237 			     const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1238 {
1239 	struct super_block *sb = sdp->sd_vfs;
1240 	u64 blk;
1241 	sector_t start = 0;
1242 	sector_t nr_blks = 0;
1243 	int rv;
1244 	unsigned int x;
1245 	u32 trimmed = 0;
1246 	u8 diff;
1247 
1248 	for (x = 0; x < bi->bi_len; x++) {
1249 		const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1250 		clone += bi->bi_offset;
1251 		clone += x;
1252 		if (bh) {
1253 			const u8 *orig = bh->b_data + bi->bi_offset + x;
1254 			diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1255 		} else {
1256 			diff = ~(*clone | (*clone >> 1));
1257 		}
1258 		diff &= 0x55;
1259 		if (diff == 0)
1260 			continue;
1261 		blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1262 		while(diff) {
1263 			if (diff & 1) {
1264 				if (nr_blks == 0)
1265 					goto start_new_extent;
1266 				if ((start + nr_blks) != blk) {
1267 					if (nr_blks >= minlen) {
1268 						rv = sb_issue_discard(sb,
1269 							start, nr_blks,
1270 							GFP_NOFS, 0);
1271 						if (rv)
1272 							goto fail;
1273 						trimmed += nr_blks;
1274 					}
1275 					nr_blks = 0;
1276 start_new_extent:
1277 					start = blk;
1278 				}
1279 				nr_blks++;
1280 			}
1281 			diff >>= 2;
1282 			blk++;
1283 		}
1284 	}
1285 	if (nr_blks >= minlen) {
1286 		rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
1287 		if (rv)
1288 			goto fail;
1289 		trimmed += nr_blks;
1290 	}
1291 	if (ptrimmed)
1292 		*ptrimmed = trimmed;
1293 	return 0;
1294 
1295 fail:
1296 	if (sdp->sd_args.ar_discard)
1297 		fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
1298 	sdp->sd_args.ar_discard = 0;
1299 	return -EIO;
1300 }
1301 
1302 /**
1303  * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1304  * @filp: Any file on the filesystem
1305  * @argp: Pointer to the arguments (also used to pass result)
1306  *
1307  * Returns: 0 on success, otherwise error code
1308  */
1309 
1310 int gfs2_fitrim(struct file *filp, void __user *argp)
1311 {
1312 	struct inode *inode = file_inode(filp);
1313 	struct gfs2_sbd *sdp = GFS2_SB(inode);
1314 	struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1315 	struct buffer_head *bh;
1316 	struct gfs2_rgrpd *rgd;
1317 	struct gfs2_rgrpd *rgd_end;
1318 	struct gfs2_holder gh;
1319 	struct fstrim_range r;
1320 	int ret = 0;
1321 	u64 amt;
1322 	u64 trimmed = 0;
1323 	u64 start, end, minlen;
1324 	unsigned int x;
1325 	unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1326 
1327 	if (!capable(CAP_SYS_ADMIN))
1328 		return -EPERM;
1329 
1330 	if (!blk_queue_discard(q))
1331 		return -EOPNOTSUPP;
1332 
1333 	if (copy_from_user(&r, argp, sizeof(r)))
1334 		return -EFAULT;
1335 
1336 	ret = gfs2_rindex_update(sdp);
1337 	if (ret)
1338 		return ret;
1339 
1340 	start = r.start >> bs_shift;
1341 	end = start + (r.len >> bs_shift);
1342 	minlen = max_t(u64, r.minlen,
1343 		       q->limits.discard_granularity) >> bs_shift;
1344 
1345 	if (end <= start || minlen > sdp->sd_max_rg_data)
1346 		return -EINVAL;
1347 
1348 	rgd = gfs2_blk2rgrpd(sdp, start, 0);
1349 	rgd_end = gfs2_blk2rgrpd(sdp, end, 0);
1350 
1351 	if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end))
1352 	    && (start > rgd_end->rd_data0 + rgd_end->rd_data))
1353 		return -EINVAL; /* start is beyond the end of the fs */
1354 
1355 	while (1) {
1356 
1357 		ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1358 		if (ret)
1359 			goto out;
1360 
1361 		if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1362 			/* Trim each bitmap in the rgrp */
1363 			for (x = 0; x < rgd->rd_length; x++) {
1364 				struct gfs2_bitmap *bi = rgd->rd_bits + x;
1365 				ret = gfs2_rgrp_send_discards(sdp,
1366 						rgd->rd_data0, NULL, bi, minlen,
1367 						&amt);
1368 				if (ret) {
1369 					gfs2_glock_dq_uninit(&gh);
1370 					goto out;
1371 				}
1372 				trimmed += amt;
1373 			}
1374 
1375 			/* Mark rgrp as having been trimmed */
1376 			ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1377 			if (ret == 0) {
1378 				bh = rgd->rd_bits[0].bi_bh;
1379 				rgd->rd_flags |= GFS2_RGF_TRIMMED;
1380 				gfs2_trans_add_meta(rgd->rd_gl, bh);
1381 				gfs2_rgrp_out(rgd, bh->b_data);
1382 				gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
1383 				gfs2_trans_end(sdp);
1384 			}
1385 		}
1386 		gfs2_glock_dq_uninit(&gh);
1387 
1388 		if (rgd == rgd_end)
1389 			break;
1390 
1391 		rgd = gfs2_rgrpd_get_next(rgd);
1392 	}
1393 
1394 out:
1395 	r.len = trimmed << bs_shift;
1396 	if (copy_to_user(argp, &r, sizeof(r)))
1397 		return -EFAULT;
1398 
1399 	return ret;
1400 }
1401 
1402 /**
1403  * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1404  * @ip: the inode structure
1405  *
1406  */
1407 static void rs_insert(struct gfs2_inode *ip)
1408 {
1409 	struct rb_node **newn, *parent = NULL;
1410 	int rc;
1411 	struct gfs2_blkreserv *rs = ip->i_res;
1412 	struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1413 	u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1414 
1415 	BUG_ON(gfs2_rs_active(rs));
1416 
1417 	spin_lock(&rgd->rd_rsspin);
1418 	newn = &rgd->rd_rstree.rb_node;
1419 	while (*newn) {
1420 		struct gfs2_blkreserv *cur =
1421 			rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1422 
1423 		parent = *newn;
1424 		rc = rs_cmp(fsblock, rs->rs_free, cur);
1425 		if (rc > 0)
1426 			newn = &((*newn)->rb_right);
1427 		else if (rc < 0)
1428 			newn = &((*newn)->rb_left);
1429 		else {
1430 			spin_unlock(&rgd->rd_rsspin);
1431 			WARN_ON(1);
1432 			return;
1433 		}
1434 	}
1435 
1436 	rb_link_node(&rs->rs_node, parent, newn);
1437 	rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1438 
1439 	/* Do our rgrp accounting for the reservation */
1440 	rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1441 	spin_unlock(&rgd->rd_rsspin);
1442 	trace_gfs2_rs(rs, TRACE_RS_INSERT);
1443 }
1444 
1445 /**
1446  * rg_mblk_search - find a group of multiple free blocks to form a reservation
1447  * @rgd: the resource group descriptor
1448  * @ip: pointer to the inode for which we're reserving blocks
1449  * @ap: the allocation parameters
1450  *
1451  */
1452 
1453 static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1454 			   const struct gfs2_alloc_parms *ap)
1455 {
1456 	struct gfs2_rbm rbm = { .rgd = rgd, };
1457 	u64 goal;
1458 	struct gfs2_blkreserv *rs = ip->i_res;
1459 	u32 extlen;
1460 	u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
1461 	int ret;
1462 	struct inode *inode = &ip->i_inode;
1463 
1464 	if (S_ISDIR(inode->i_mode))
1465 		extlen = 1;
1466 	else {
1467 		extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target);
1468 		extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1469 	}
1470 	if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1471 		return;
1472 
1473 	/* Find bitmap block that contains bits for goal block */
1474 	if (rgrp_contains_block(rgd, ip->i_goal))
1475 		goal = ip->i_goal;
1476 	else
1477 		goal = rgd->rd_last_alloc + rgd->rd_data0;
1478 
1479 	if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1480 		return;
1481 
1482 	ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap);
1483 	if (ret == 0) {
1484 		rs->rs_rbm = rbm;
1485 		rs->rs_free = extlen;
1486 		rs->rs_inum = ip->i_no_addr;
1487 		rs_insert(ip);
1488 	} else {
1489 		if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1490 			rgd->rd_last_alloc = 0;
1491 	}
1492 }
1493 
1494 /**
1495  * gfs2_next_unreserved_block - Return next block that is not reserved
1496  * @rgd: The resource group
1497  * @block: The starting block
1498  * @length: The required length
1499  * @ip: Ignore any reservations for this inode
1500  *
1501  * If the block does not appear in any reservation, then return the
1502  * block number unchanged. If it does appear in the reservation, then
1503  * keep looking through the tree of reservations in order to find the
1504  * first block number which is not reserved.
1505  */
1506 
1507 static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1508 				      u32 length,
1509 				      const struct gfs2_inode *ip)
1510 {
1511 	struct gfs2_blkreserv *rs;
1512 	struct rb_node *n;
1513 	int rc;
1514 
1515 	spin_lock(&rgd->rd_rsspin);
1516 	n = rgd->rd_rstree.rb_node;
1517 	while (n) {
1518 		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1519 		rc = rs_cmp(block, length, rs);
1520 		if (rc < 0)
1521 			n = n->rb_left;
1522 		else if (rc > 0)
1523 			n = n->rb_right;
1524 		else
1525 			break;
1526 	}
1527 
1528 	if (n) {
1529 		while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) {
1530 			block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1531 			n = n->rb_right;
1532 			if (n == NULL)
1533 				break;
1534 			rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1535 		}
1536 	}
1537 
1538 	spin_unlock(&rgd->rd_rsspin);
1539 	return block;
1540 }
1541 
1542 /**
1543  * gfs2_reservation_check_and_update - Check for reservations during block alloc
1544  * @rbm: The current position in the resource group
1545  * @ip: The inode for which we are searching for blocks
1546  * @minext: The minimum extent length
1547  * @maxext: A pointer to the maximum extent structure
1548  *
1549  * This checks the current position in the rgrp to see whether there is
1550  * a reservation covering this block. If not then this function is a
1551  * no-op. If there is, then the position is moved to the end of the
1552  * contiguous reservation(s) so that we are pointing at the first
1553  * non-reserved block.
1554  *
1555  * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1556  */
1557 
1558 static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1559 					     const struct gfs2_inode *ip,
1560 					     u32 minext,
1561 					     struct gfs2_extent *maxext)
1562 {
1563 	u64 block = gfs2_rbm_to_block(rbm);
1564 	u32 extlen = 1;
1565 	u64 nblock;
1566 	int ret;
1567 
1568 	/*
1569 	 * If we have a minimum extent length, then skip over any extent
1570 	 * which is less than the min extent length in size.
1571 	 */
1572 	if (minext) {
1573 		extlen = gfs2_free_extlen(rbm, minext);
1574 		if (extlen <= maxext->len)
1575 			goto fail;
1576 	}
1577 
1578 	/*
1579 	 * Check the extent which has been found against the reservations
1580 	 * and skip if parts of it are already reserved
1581 	 */
1582 	nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1583 	if (nblock == block) {
1584 		if (!minext || extlen >= minext)
1585 			return 0;
1586 
1587 		if (extlen > maxext->len) {
1588 			maxext->len = extlen;
1589 			maxext->rbm = *rbm;
1590 		}
1591 fail:
1592 		nblock = block + extlen;
1593 	}
1594 	ret = gfs2_rbm_from_block(rbm, nblock);
1595 	if (ret < 0)
1596 		return ret;
1597 	return 1;
1598 }
1599 
1600 /**
1601  * gfs2_rbm_find - Look for blocks of a particular state
1602  * @rbm: Value/result starting position and final position
1603  * @state: The state which we want to find
1604  * @minext: Pointer to the requested extent length (NULL for a single block)
1605  *          This is updated to be the actual reservation size.
1606  * @ip: If set, check for reservations
1607  * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1608  *          around until we've reached the starting point.
1609  * @ap: the allocation parameters
1610  *
1611  * Side effects:
1612  * - If looking for free blocks, we set GBF_FULL on each bitmap which
1613  *   has no free blocks in it.
1614  * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1615  *   has come up short on a free block search.
1616  *
1617  * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1618  */
1619 
1620 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
1621 			 const struct gfs2_inode *ip, bool nowrap,
1622 			 const struct gfs2_alloc_parms *ap)
1623 {
1624 	struct buffer_head *bh;
1625 	int initial_bii;
1626 	u32 initial_offset;
1627 	int first_bii = rbm->bii;
1628 	u32 first_offset = rbm->offset;
1629 	u32 offset;
1630 	u8 *buffer;
1631 	int n = 0;
1632 	int iters = rbm->rgd->rd_length;
1633 	int ret;
1634 	struct gfs2_bitmap *bi;
1635 	struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
1636 
1637 	/* If we are not starting at the beginning of a bitmap, then we
1638 	 * need to add one to the bitmap count to ensure that we search
1639 	 * the starting bitmap twice.
1640 	 */
1641 	if (rbm->offset != 0)
1642 		iters++;
1643 
1644 	while(1) {
1645 		bi = rbm_bi(rbm);
1646 		if (test_bit(GBF_FULL, &bi->bi_flags) &&
1647 		    (state == GFS2_BLKST_FREE))
1648 			goto next_bitmap;
1649 
1650 		bh = bi->bi_bh;
1651 		buffer = bh->b_data + bi->bi_offset;
1652 		WARN_ON(!buffer_uptodate(bh));
1653 		if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1654 			buffer = bi->bi_clone + bi->bi_offset;
1655 		initial_offset = rbm->offset;
1656 		offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state);
1657 		if (offset == BFITNOENT)
1658 			goto bitmap_full;
1659 		rbm->offset = offset;
1660 		if (ip == NULL)
1661 			return 0;
1662 
1663 		initial_bii = rbm->bii;
1664 		ret = gfs2_reservation_check_and_update(rbm, ip,
1665 							minext ? *minext : 0,
1666 							&maxext);
1667 		if (ret == 0)
1668 			return 0;
1669 		if (ret > 0) {
1670 			n += (rbm->bii - initial_bii);
1671 			goto next_iter;
1672 		}
1673 		if (ret == -E2BIG) {
1674 			rbm->bii = 0;
1675 			rbm->offset = 0;
1676 			n += (rbm->bii - initial_bii);
1677 			goto res_covered_end_of_rgrp;
1678 		}
1679 		return ret;
1680 
1681 bitmap_full:	/* Mark bitmap as full and fall through */
1682 		if ((state == GFS2_BLKST_FREE) && initial_offset == 0) {
1683 			struct gfs2_bitmap *bi = rbm_bi(rbm);
1684 			set_bit(GBF_FULL, &bi->bi_flags);
1685 		}
1686 
1687 next_bitmap:	/* Find next bitmap in the rgrp */
1688 		rbm->offset = 0;
1689 		rbm->bii++;
1690 		if (rbm->bii == rbm->rgd->rd_length)
1691 			rbm->bii = 0;
1692 res_covered_end_of_rgrp:
1693 		if ((rbm->bii == 0) && nowrap)
1694 			break;
1695 		n++;
1696 next_iter:
1697 		if (n >= iters)
1698 			break;
1699 	}
1700 
1701 	if (minext == NULL || state != GFS2_BLKST_FREE)
1702 		return -ENOSPC;
1703 
1704 	/* If the extent was too small, and it's smaller than the smallest
1705 	   to have failed before, remember for future reference that it's
1706 	   useless to search this rgrp again for this amount or more. */
1707 	if ((first_offset == 0) && (first_bii == 0) &&
1708 	    (*minext < rbm->rgd->rd_extfail_pt))
1709 		rbm->rgd->rd_extfail_pt = *minext;
1710 
1711 	/* If the maximum extent we found is big enough to fulfill the
1712 	   minimum requirements, use it anyway. */
1713 	if (maxext.len) {
1714 		*rbm = maxext.rbm;
1715 		*minext = maxext.len;
1716 		return 0;
1717 	}
1718 
1719 	return -ENOSPC;
1720 }
1721 
1722 /**
1723  * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1724  * @rgd: The rgrp
1725  * @last_unlinked: block address of the last dinode we unlinked
1726  * @skip: block address we should explicitly not unlink
1727  *
1728  * Returns: 0 if no error
1729  *          The inode, if one has been found, in inode.
1730  */
1731 
1732 static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1733 {
1734 	u64 block;
1735 	struct gfs2_sbd *sdp = rgd->rd_sbd;
1736 	struct gfs2_glock *gl;
1737 	struct gfs2_inode *ip;
1738 	int error;
1739 	int found = 0;
1740 	struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
1741 
1742 	while (1) {
1743 		down_write(&sdp->sd_log_flush_lock);
1744 		error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
1745 				      true, NULL);
1746 		up_write(&sdp->sd_log_flush_lock);
1747 		if (error == -ENOSPC)
1748 			break;
1749 		if (WARN_ON_ONCE(error))
1750 			break;
1751 
1752 		block = gfs2_rbm_to_block(&rbm);
1753 		if (gfs2_rbm_from_block(&rbm, block + 1))
1754 			break;
1755 		if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1756 			continue;
1757 		if (block == skip)
1758 			continue;
1759 		*last_unlinked = block;
1760 
1761 		error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl);
1762 		if (error)
1763 			continue;
1764 
1765 		/* If the inode is already in cache, we can ignore it here
1766 		 * because the existing inode disposal code will deal with
1767 		 * it when all refs have gone away. Accessing gl_object like
1768 		 * this is not safe in general. Here it is ok because we do
1769 		 * not dereference the pointer, and we only need an approx
1770 		 * answer to whether it is NULL or not.
1771 		 */
1772 		ip = gl->gl_object;
1773 
1774 		if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1775 			gfs2_glock_put(gl);
1776 		else
1777 			found++;
1778 
1779 		/* Limit reclaim to sensible number of tasks */
1780 		if (found > NR_CPUS)
1781 			return;
1782 	}
1783 
1784 	rgd->rd_flags &= ~GFS2_RDF_CHECK;
1785 	return;
1786 }
1787 
1788 /**
1789  * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1790  * @rgd: The rgrp in question
1791  * @loops: An indication of how picky we can be (0=very, 1=less so)
1792  *
1793  * This function uses the recently added glock statistics in order to
1794  * figure out whether a parciular resource group is suffering from
1795  * contention from multiple nodes. This is done purely on the basis
1796  * of timings, since this is the only data we have to work with and
1797  * our aim here is to reject a resource group which is highly contended
1798  * but (very important) not to do this too often in order to ensure that
1799  * we do not land up introducing fragmentation by changing resource
1800  * groups when not actually required.
1801  *
1802  * The calculation is fairly simple, we want to know whether the SRTTB
1803  * (i.e. smoothed round trip time for blocking operations) to acquire
1804  * the lock for this rgrp's glock is significantly greater than the
1805  * time taken for resource groups on average. We introduce a margin in
1806  * the form of the variable @var which is computed as the sum of the two
1807  * respective variences, and multiplied by a factor depending on @loops
1808  * and whether we have a lot of data to base the decision on. This is
1809  * then tested against the square difference of the means in order to
1810  * decide whether the result is statistically significant or not.
1811  *
1812  * Returns: A boolean verdict on the congestion status
1813  */
1814 
1815 static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1816 {
1817 	const struct gfs2_glock *gl = rgd->rd_gl;
1818 	const struct gfs2_sbd *sdp = gl->gl_sbd;
1819 	struct gfs2_lkstats *st;
1820 	s64 r_dcount, l_dcount;
1821 	s64 r_srttb, l_srttb;
1822 	s64 srttb_diff;
1823 	s64 sqr_diff;
1824 	s64 var;
1825 
1826 	preempt_disable();
1827 	st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1828 	r_srttb = st->stats[GFS2_LKS_SRTTB];
1829 	r_dcount = st->stats[GFS2_LKS_DCOUNT];
1830 	var = st->stats[GFS2_LKS_SRTTVARB] +
1831 	      gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1832 	preempt_enable();
1833 
1834 	l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1835 	l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1836 
1837 	if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0))
1838 		return false;
1839 
1840 	srttb_diff = r_srttb - l_srttb;
1841 	sqr_diff = srttb_diff * srttb_diff;
1842 
1843 	var *= 2;
1844 	if (l_dcount < 8 || r_dcount < 8)
1845 		var *= 2;
1846 	if (loops == 1)
1847 		var *= 2;
1848 
1849 	return ((srttb_diff < 0) && (sqr_diff > var));
1850 }
1851 
1852 /**
1853  * gfs2_rgrp_used_recently
1854  * @rs: The block reservation with the rgrp to test
1855  * @msecs: The time limit in milliseconds
1856  *
1857  * Returns: True if the rgrp glock has been used within the time limit
1858  */
1859 static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1860 				    u64 msecs)
1861 {
1862 	u64 tdiff;
1863 
1864 	tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1865                             rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1866 
1867 	return tdiff > (msecs * 1000 * 1000);
1868 }
1869 
1870 static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1871 {
1872 	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1873 	u32 skip;
1874 
1875 	get_random_bytes(&skip, sizeof(skip));
1876 	return skip % sdp->sd_rgrps;
1877 }
1878 
1879 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1880 {
1881 	struct gfs2_rgrpd *rgd = *pos;
1882 	struct gfs2_sbd *sdp = rgd->rd_sbd;
1883 
1884 	rgd = gfs2_rgrpd_get_next(rgd);
1885 	if (rgd == NULL)
1886 		rgd = gfs2_rgrpd_get_first(sdp);
1887 	*pos = rgd;
1888 	if (rgd != begin) /* If we didn't wrap */
1889 		return true;
1890 	return false;
1891 }
1892 
1893 /**
1894  * gfs2_inplace_reserve - Reserve space in the filesystem
1895  * @ip: the inode to reserve space for
1896  * @ap: the allocation parameters
1897  *
1898  * Returns: errno
1899  */
1900 
1901 int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap)
1902 {
1903 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1904 	struct gfs2_rgrpd *begin = NULL;
1905 	struct gfs2_blkreserv *rs = ip->i_res;
1906 	int error = 0, rg_locked, flags = 0;
1907 	u64 last_unlinked = NO_BLOCK;
1908 	int loops = 0;
1909 	u32 skip = 0;
1910 
1911 	if (sdp->sd_args.ar_rgrplvb)
1912 		flags |= GL_SKIP;
1913 	if (gfs2_assert_warn(sdp, ap->target))
1914 		return -EINVAL;
1915 	if (gfs2_rs_active(rs)) {
1916 		begin = rs->rs_rbm.rgd;
1917 	} else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
1918 		rs->rs_rbm.rgd = begin = ip->i_rgd;
1919 	} else {
1920 		check_and_update_goal(ip);
1921 		rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1922 	}
1923 	if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
1924 		skip = gfs2_orlov_skip(ip);
1925 	if (rs->rs_rbm.rgd == NULL)
1926 		return -EBADSLT;
1927 
1928 	while (loops < 3) {
1929 		rg_locked = 1;
1930 
1931 		if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1932 			rg_locked = 0;
1933 			if (skip && skip--)
1934 				goto next_rgrp;
1935 			if (!gfs2_rs_active(rs) && (loops < 2) &&
1936 			     gfs2_rgrp_used_recently(rs, 1000) &&
1937 			     gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1938 				goto next_rgrp;
1939 			error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
1940 						   LM_ST_EXCLUSIVE, flags,
1941 						   &rs->rs_rgd_gh);
1942 			if (unlikely(error))
1943 				return error;
1944 			if (!gfs2_rs_active(rs) && (loops < 2) &&
1945 			    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1946 				goto skip_rgrp;
1947 			if (sdp->sd_args.ar_rgrplvb) {
1948 				error = update_rgrp_lvb(rs->rs_rbm.rgd);
1949 				if (unlikely(error)) {
1950 					gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1951 					return error;
1952 				}
1953 			}
1954 		}
1955 
1956 		/* Skip unuseable resource groups */
1957 		if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
1958 						 GFS2_RDF_ERROR)) ||
1959 		    (ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
1960 			goto skip_rgrp;
1961 
1962 		if (sdp->sd_args.ar_rgrplvb)
1963 			gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
1964 
1965 		/* Get a reservation if we don't already have one */
1966 		if (!gfs2_rs_active(rs))
1967 			rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
1968 
1969 		/* Skip rgrps when we can't get a reservation on first pass */
1970 		if (!gfs2_rs_active(rs) && (loops < 1))
1971 			goto check_rgrp;
1972 
1973 		/* If rgrp has enough free space, use it */
1974 		if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) {
1975 			ip->i_rgd = rs->rs_rbm.rgd;
1976 			return 0;
1977 		}
1978 
1979 check_rgrp:
1980 		/* Check for unlinked inodes which can be reclaimed */
1981 		if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
1982 			try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
1983 					ip->i_no_addr);
1984 skip_rgrp:
1985 		/* Drop reservation, if we couldn't use reserved rgrp */
1986 		if (gfs2_rs_active(rs))
1987 			gfs2_rs_deltree(rs);
1988 
1989 		/* Unlock rgrp if required */
1990 		if (!rg_locked)
1991 			gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1992 next_rgrp:
1993 		/* Find the next rgrp, and continue looking */
1994 		if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
1995 			continue;
1996 		if (skip)
1997 			continue;
1998 
1999 		/* If we've scanned all the rgrps, but found no free blocks
2000 		 * then this checks for some less likely conditions before
2001 		 * trying again.
2002 		 */
2003 		loops++;
2004 		/* Check that fs hasn't grown if writing to rindex */
2005 		if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
2006 			error = gfs2_ri_update(ip);
2007 			if (error)
2008 				return error;
2009 		}
2010 		/* Flushing the log may release space */
2011 		if (loops == 2)
2012 			gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
2013 	}
2014 
2015 	return -ENOSPC;
2016 }
2017 
2018 /**
2019  * gfs2_inplace_release - release an inplace reservation
2020  * @ip: the inode the reservation was taken out on
2021  *
2022  * Release a reservation made by gfs2_inplace_reserve().
2023  */
2024 
2025 void gfs2_inplace_release(struct gfs2_inode *ip)
2026 {
2027 	struct gfs2_blkreserv *rs = ip->i_res;
2028 
2029 	if (rs->rs_rgd_gh.gh_gl)
2030 		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2031 }
2032 
2033 /**
2034  * gfs2_get_block_type - Check a block in a RG is of given type
2035  * @rgd: the resource group holding the block
2036  * @block: the block number
2037  *
2038  * Returns: The block type (GFS2_BLKST_*)
2039  */
2040 
2041 static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
2042 {
2043 	struct gfs2_rbm rbm = { .rgd = rgd, };
2044 	int ret;
2045 
2046 	ret = gfs2_rbm_from_block(&rbm, block);
2047 	WARN_ON_ONCE(ret != 0);
2048 
2049 	return gfs2_testbit(&rbm);
2050 }
2051 
2052 
2053 /**
2054  * gfs2_alloc_extent - allocate an extent from a given bitmap
2055  * @rbm: the resource group information
2056  * @dinode: TRUE if the first block we allocate is for a dinode
2057  * @n: The extent length (value/result)
2058  *
2059  * Add the bitmap buffer to the transaction.
2060  * Set the found bits to @new_state to change block's allocation state.
2061  */
2062 static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
2063 			     unsigned int *n)
2064 {
2065 	struct gfs2_rbm pos = { .rgd = rbm->rgd, };
2066 	const unsigned int elen = *n;
2067 	u64 block;
2068 	int ret;
2069 
2070 	*n = 1;
2071 	block = gfs2_rbm_to_block(rbm);
2072 	gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
2073 	gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2074 	block++;
2075 	while (*n < elen) {
2076 		ret = gfs2_rbm_from_block(&pos, block);
2077 		if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
2078 			break;
2079 		gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
2080 		gfs2_setbit(&pos, true, GFS2_BLKST_USED);
2081 		(*n)++;
2082 		block++;
2083 	}
2084 }
2085 
2086 /**
2087  * rgblk_free - Change alloc state of given block(s)
2088  * @sdp: the filesystem
2089  * @bstart: the start of a run of blocks to free
2090  * @blen: the length of the block run (all must lie within ONE RG!)
2091  * @new_state: GFS2_BLKST_XXX the after-allocation block state
2092  *
2093  * Returns:  Resource group containing the block(s)
2094  */
2095 
2096 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2097 				     u32 blen, unsigned char new_state)
2098 {
2099 	struct gfs2_rbm rbm;
2100 	struct gfs2_bitmap *bi, *bi_prev = NULL;
2101 
2102 	rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
2103 	if (!rbm.rgd) {
2104 		if (gfs2_consist(sdp))
2105 			fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
2106 		return NULL;
2107 	}
2108 
2109 	gfs2_rbm_from_block(&rbm, bstart);
2110 	while (blen--) {
2111 		bi = rbm_bi(&rbm);
2112 		if (bi != bi_prev) {
2113 			if (!bi->bi_clone) {
2114 				bi->bi_clone = kmalloc(bi->bi_bh->b_size,
2115 						      GFP_NOFS | __GFP_NOFAIL);
2116 				memcpy(bi->bi_clone + bi->bi_offset,
2117 				       bi->bi_bh->b_data + bi->bi_offset,
2118 				       bi->bi_len);
2119 			}
2120 			gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
2121 			bi_prev = bi;
2122 		}
2123 		gfs2_setbit(&rbm, false, new_state);
2124 		gfs2_rbm_incr(&rbm);
2125 	}
2126 
2127 	return rbm.rgd;
2128 }
2129 
2130 /**
2131  * gfs2_rgrp_dump - print out an rgrp
2132  * @seq: The iterator
2133  * @gl: The glock in question
2134  *
2135  */
2136 
2137 void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
2138 {
2139 	struct gfs2_rgrpd *rgd = gl->gl_object;
2140 	struct gfs2_blkreserv *trs;
2141 	const struct rb_node *n;
2142 
2143 	if (rgd == NULL)
2144 		return;
2145 	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
2146 		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
2147 		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2148 		       rgd->rd_reserved, rgd->rd_extfail_pt);
2149 	spin_lock(&rgd->rd_rsspin);
2150 	for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2151 		trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2152 		dump_rs(seq, trs);
2153 	}
2154 	spin_unlock(&rgd->rd_rsspin);
2155 }
2156 
2157 static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2158 {
2159 	struct gfs2_sbd *sdp = rgd->rd_sbd;
2160 	fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
2161 		(unsigned long long)rgd->rd_addr);
2162 	fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2163 	gfs2_rgrp_dump(NULL, rgd->rd_gl);
2164 	rgd->rd_flags |= GFS2_RDF_ERROR;
2165 }
2166 
2167 /**
2168  * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2169  * @ip: The inode we have just allocated blocks for
2170  * @rbm: The start of the allocated blocks
2171  * @len: The extent length
2172  *
2173  * Adjusts a reservation after an allocation has taken place. If the
2174  * reservation does not match the allocation, or if it is now empty
2175  * then it is removed.
2176  */
2177 
2178 static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2179 				    const struct gfs2_rbm *rbm, unsigned len)
2180 {
2181 	struct gfs2_blkreserv *rs = ip->i_res;
2182 	struct gfs2_rgrpd *rgd = rbm->rgd;
2183 	unsigned rlen;
2184 	u64 block;
2185 	int ret;
2186 
2187 	spin_lock(&rgd->rd_rsspin);
2188 	if (gfs2_rs_active(rs)) {
2189 		if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2190 			block = gfs2_rbm_to_block(rbm);
2191 			ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2192 			rlen = min(rs->rs_free, len);
2193 			rs->rs_free -= rlen;
2194 			rgd->rd_reserved -= rlen;
2195 			trace_gfs2_rs(rs, TRACE_RS_CLAIM);
2196 			if (rs->rs_free && !ret)
2197 				goto out;
2198 		}
2199 		__rs_deltree(rs);
2200 	}
2201 out:
2202 	spin_unlock(&rgd->rd_rsspin);
2203 }
2204 
2205 /**
2206  * gfs2_set_alloc_start - Set starting point for block allocation
2207  * @rbm: The rbm which will be set to the required location
2208  * @ip: The gfs2 inode
2209  * @dinode: Flag to say if allocation includes a new inode
2210  *
2211  * This sets the starting point from the reservation if one is active
2212  * otherwise it falls back to guessing a start point based on the
2213  * inode's goal block or the last allocation point in the rgrp.
2214  */
2215 
2216 static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
2217 				 const struct gfs2_inode *ip, bool dinode)
2218 {
2219 	u64 goal;
2220 
2221 	if (gfs2_rs_active(ip->i_res)) {
2222 		*rbm = ip->i_res->rs_rbm;
2223 		return;
2224 	}
2225 
2226 	if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
2227 		goal = ip->i_goal;
2228 	else
2229 		goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
2230 
2231 	gfs2_rbm_from_block(rbm, goal);
2232 }
2233 
2234 /**
2235  * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
2236  * @ip: the inode to allocate the block for
2237  * @bn: Used to return the starting block number
2238  * @nblocks: requested number of blocks/extent length (value/result)
2239  * @dinode: 1 if we're allocating a dinode block, else 0
2240  * @generation: the generation number of the inode
2241  *
2242  * Returns: 0 or error
2243  */
2244 
2245 int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2246 		      bool dinode, u64 *generation)
2247 {
2248 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2249 	struct buffer_head *dibh;
2250 	struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
2251 	unsigned int ndata;
2252 	u64 block; /* block, within the file system scope */
2253 	int error;
2254 
2255 	gfs2_set_alloc_start(&rbm, ip, dinode);
2256 	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL);
2257 
2258 	if (error == -ENOSPC) {
2259 		gfs2_set_alloc_start(&rbm, ip, dinode);
2260 		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false,
2261 				      NULL);
2262 	}
2263 
2264 	/* Since all blocks are reserved in advance, this shouldn't happen */
2265 	if (error) {
2266 		fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
2267 			(unsigned long long)ip->i_no_addr, error, *nblocks,
2268 			test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2269 			rbm.rgd->rd_extfail_pt);
2270 		goto rgrp_error;
2271 	}
2272 
2273 	gfs2_alloc_extent(&rbm, dinode, nblocks);
2274 	block = gfs2_rbm_to_block(&rbm);
2275 	rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2276 	if (gfs2_rs_active(ip->i_res))
2277 		gfs2_adjust_reservation(ip, &rbm, *nblocks);
2278 	ndata = *nblocks;
2279 	if (dinode)
2280 		ndata--;
2281 
2282 	if (!dinode) {
2283 		ip->i_goal = block + ndata - 1;
2284 		error = gfs2_meta_inode_buffer(ip, &dibh);
2285 		if (error == 0) {
2286 			struct gfs2_dinode *di =
2287 				(struct gfs2_dinode *)dibh->b_data;
2288 			gfs2_trans_add_meta(ip->i_gl, dibh);
2289 			di->di_goal_meta = di->di_goal_data =
2290 				cpu_to_be64(ip->i_goal);
2291 			brelse(dibh);
2292 		}
2293 	}
2294 	if (rbm.rgd->rd_free < *nblocks) {
2295 		pr_warn("nblocks=%u\n", *nblocks);
2296 		goto rgrp_error;
2297 	}
2298 
2299 	rbm.rgd->rd_free -= *nblocks;
2300 	if (dinode) {
2301 		rbm.rgd->rd_dinodes++;
2302 		*generation = rbm.rgd->rd_igeneration++;
2303 		if (*generation == 0)
2304 			*generation = rbm.rgd->rd_igeneration++;
2305 	}
2306 
2307 	gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2308 	gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2309 	gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2310 
2311 	gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2312 	if (dinode)
2313 		gfs2_trans_add_unrevoke(sdp, block, *nblocks);
2314 
2315 	gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
2316 
2317 	rbm.rgd->rd_free_clone -= *nblocks;
2318 	trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2319 			       dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2320 	*bn = block;
2321 	return 0;
2322 
2323 rgrp_error:
2324 	gfs2_rgrp_error(rbm.rgd);
2325 	return -EIO;
2326 }
2327 
2328 /**
2329  * __gfs2_free_blocks - free a contiguous run of block(s)
2330  * @ip: the inode these blocks are being freed from
2331  * @bstart: first block of a run of contiguous blocks
2332  * @blen: the length of the block run
2333  * @meta: 1 if the blocks represent metadata
2334  *
2335  */
2336 
2337 void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2338 {
2339 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2340 	struct gfs2_rgrpd *rgd;
2341 
2342 	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
2343 	if (!rgd)
2344 		return;
2345 	trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2346 	rgd->rd_free += blen;
2347 	rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2348 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2349 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2350 	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2351 
2352 	/* Directories keep their data in the metadata address space */
2353 	if (meta || ip->i_depth)
2354 		gfs2_meta_wipe(ip, bstart, blen);
2355 }
2356 
2357 /**
2358  * gfs2_free_meta - free a contiguous run of data block(s)
2359  * @ip: the inode these blocks are being freed from
2360  * @bstart: first block of a run of contiguous blocks
2361  * @blen: the length of the block run
2362  *
2363  */
2364 
2365 void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
2366 {
2367 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2368 
2369 	__gfs2_free_blocks(ip, bstart, blen, 1);
2370 	gfs2_statfs_change(sdp, 0, +blen, 0);
2371 	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
2372 }
2373 
2374 void gfs2_unlink_di(struct inode *inode)
2375 {
2376 	struct gfs2_inode *ip = GFS2_I(inode);
2377 	struct gfs2_sbd *sdp = GFS2_SB(inode);
2378 	struct gfs2_rgrpd *rgd;
2379 	u64 blkno = ip->i_no_addr;
2380 
2381 	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
2382 	if (!rgd)
2383 		return;
2384 	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2385 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2386 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2387 	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2388 	update_rgrp_lvb_unlinked(rgd, 1);
2389 }
2390 
2391 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2392 {
2393 	struct gfs2_sbd *sdp = rgd->rd_sbd;
2394 	struct gfs2_rgrpd *tmp_rgd;
2395 
2396 	tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
2397 	if (!tmp_rgd)
2398 		return;
2399 	gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
2400 
2401 	if (!rgd->rd_dinodes)
2402 		gfs2_consist_rgrpd(rgd);
2403 	rgd->rd_dinodes--;
2404 	rgd->rd_free++;
2405 
2406 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2407 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2408 	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2409 	update_rgrp_lvb_unlinked(rgd, -1);
2410 
2411 	gfs2_statfs_change(sdp, 0, +1, -1);
2412 }
2413 
2414 
2415 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2416 {
2417 	gfs2_free_uninit_di(rgd, ip->i_no_addr);
2418 	trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2419 	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2420 	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
2421 }
2422 
2423 /**
2424  * gfs2_check_blk_type - Check the type of a block
2425  * @sdp: The superblock
2426  * @no_addr: The block number to check
2427  * @type: The block type we are looking for
2428  *
2429  * Returns: 0 if the block type matches the expected type
2430  *          -ESTALE if it doesn't match
2431  *          or -ve errno if something went wrong while checking
2432  */
2433 
2434 int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2435 {
2436 	struct gfs2_rgrpd *rgd;
2437 	struct gfs2_holder rgd_gh;
2438 	int error = -EINVAL;
2439 
2440 	rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
2441 	if (!rgd)
2442 		goto fail;
2443 
2444 	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2445 	if (error)
2446 		goto fail;
2447 
2448 	if (gfs2_get_block_type(rgd, no_addr) != type)
2449 		error = -ESTALE;
2450 
2451 	gfs2_glock_dq_uninit(&rgd_gh);
2452 fail:
2453 	return error;
2454 }
2455 
2456 /**
2457  * gfs2_rlist_add - add a RG to a list of RGs
2458  * @ip: the inode
2459  * @rlist: the list of resource groups
2460  * @block: the block
2461  *
2462  * Figure out what RG a block belongs to and add that RG to the list
2463  *
2464  * FIXME: Don't use NOFAIL
2465  *
2466  */
2467 
2468 void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
2469 		    u64 block)
2470 {
2471 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2472 	struct gfs2_rgrpd *rgd;
2473 	struct gfs2_rgrpd **tmp;
2474 	unsigned int new_space;
2475 	unsigned int x;
2476 
2477 	if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2478 		return;
2479 
2480 	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
2481 		rgd = ip->i_rgd;
2482 	else
2483 		rgd = gfs2_blk2rgrpd(sdp, block, 1);
2484 	if (!rgd) {
2485 		fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
2486 		return;
2487 	}
2488 	ip->i_rgd = rgd;
2489 
2490 	for (x = 0; x < rlist->rl_rgrps; x++)
2491 		if (rlist->rl_rgd[x] == rgd)
2492 			return;
2493 
2494 	if (rlist->rl_rgrps == rlist->rl_space) {
2495 		new_space = rlist->rl_space + 10;
2496 
2497 		tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
2498 			      GFP_NOFS | __GFP_NOFAIL);
2499 
2500 		if (rlist->rl_rgd) {
2501 			memcpy(tmp, rlist->rl_rgd,
2502 			       rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2503 			kfree(rlist->rl_rgd);
2504 		}
2505 
2506 		rlist->rl_space = new_space;
2507 		rlist->rl_rgd = tmp;
2508 	}
2509 
2510 	rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2511 }
2512 
2513 /**
2514  * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2515  *      and initialize an array of glock holders for them
2516  * @rlist: the list of resource groups
2517  * @state: the lock state to acquire the RG lock in
2518  *
2519  * FIXME: Don't use NOFAIL
2520  *
2521  */
2522 
2523 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
2524 {
2525 	unsigned int x;
2526 
2527 	rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
2528 				GFP_NOFS | __GFP_NOFAIL);
2529 	for (x = 0; x < rlist->rl_rgrps; x++)
2530 		gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
2531 				state, 0,
2532 				&rlist->rl_ghs[x]);
2533 }
2534 
2535 /**
2536  * gfs2_rlist_free - free a resource group list
2537  * @rlist: the list of resource groups
2538  *
2539  */
2540 
2541 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2542 {
2543 	unsigned int x;
2544 
2545 	kfree(rlist->rl_rgd);
2546 
2547 	if (rlist->rl_ghs) {
2548 		for (x = 0; x < rlist->rl_rgrps; x++)
2549 			gfs2_holder_uninit(&rlist->rl_ghs[x]);
2550 		kfree(rlist->rl_ghs);
2551 		rlist->rl_ghs = NULL;
2552 	}
2553 }
2554 
2555