xref: /openbmc/linux/fs/gfs2/rgrp.c (revision 8a10bc9d)
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9 
10 #include <linux/slab.h>
11 #include <linux/spinlock.h>
12 #include <linux/completion.h>
13 #include <linux/buffer_head.h>
14 #include <linux/fs.h>
15 #include <linux/gfs2_ondisk.h>
16 #include <linux/prefetch.h>
17 #include <linux/blkdev.h>
18 #include <linux/rbtree.h>
19 #include <linux/random.h>
20 
21 #include "gfs2.h"
22 #include "incore.h"
23 #include "glock.h"
24 #include "glops.h"
25 #include "lops.h"
26 #include "meta_io.h"
27 #include "quota.h"
28 #include "rgrp.h"
29 #include "super.h"
30 #include "trans.h"
31 #include "util.h"
32 #include "log.h"
33 #include "inode.h"
34 #include "trace_gfs2.h"
35 
36 #define BFITNOENT ((u32)~0)
37 #define NO_BLOCK ((u64)~0)
38 
39 #if BITS_PER_LONG == 32
40 #define LBITMASK   (0x55555555UL)
41 #define LBITSKIP55 (0x55555555UL)
42 #define LBITSKIP00 (0x00000000UL)
43 #else
44 #define LBITMASK   (0x5555555555555555UL)
45 #define LBITSKIP55 (0x5555555555555555UL)
46 #define LBITSKIP00 (0x0000000000000000UL)
47 #endif
48 
49 /*
50  * These routines are used by the resource group routines (rgrp.c)
51  * to keep track of block allocation.  Each block is represented by two
52  * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
53  *
54  * 0 = Free
55  * 1 = Used (not metadata)
56  * 2 = Unlinked (still in use) inode
57  * 3 = Used (metadata)
58  */
59 
60 struct gfs2_extent {
61 	struct gfs2_rbm rbm;
62 	u32 len;
63 };
64 
65 static const char valid_change[16] = {
66 	        /* current */
67 	/* n */ 0, 1, 1, 1,
68 	/* e */ 1, 0, 0, 0,
69 	/* w */ 0, 0, 0, 1,
70 	        1, 0, 0, 0
71 };
72 
73 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
74 			 const struct gfs2_inode *ip, bool nowrap,
75 			 const struct gfs2_alloc_parms *ap);
76 
77 
78 /**
79  * gfs2_setbit - Set a bit in the bitmaps
80  * @rbm: The position of the bit to set
81  * @do_clone: Also set the clone bitmap, if it exists
82  * @new_state: the new state of the block
83  *
84  */
85 
86 static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
87 			       unsigned char new_state)
88 {
89 	unsigned char *byte1, *byte2, *end, cur_state;
90 	struct gfs2_bitmap *bi = rbm_bi(rbm);
91 	unsigned int buflen = bi->bi_len;
92 	const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
93 
94 	byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
95 	end = bi->bi_bh->b_data + bi->bi_offset + buflen;
96 
97 	BUG_ON(byte1 >= end);
98 
99 	cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
100 
101 	if (unlikely(!valid_change[new_state * 4 + cur_state])) {
102 		printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, "
103 		       "new_state=%d\n", rbm->offset, cur_state, new_state);
104 		printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n",
105 		       (unsigned long long)rbm->rgd->rd_addr, bi->bi_start);
106 		printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n",
107 		       bi->bi_offset, bi->bi_len);
108 		dump_stack();
109 		gfs2_consist_rgrpd(rbm->rgd);
110 		return;
111 	}
112 	*byte1 ^= (cur_state ^ new_state) << bit;
113 
114 	if (do_clone && bi->bi_clone) {
115 		byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
116 		cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
117 		*byte2 ^= (cur_state ^ new_state) << bit;
118 	}
119 }
120 
121 /**
122  * gfs2_testbit - test a bit in the bitmaps
123  * @rbm: The bit to test
124  *
125  * Returns: The two bit block state of the requested bit
126  */
127 
128 static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
129 {
130 	struct gfs2_bitmap *bi = rbm_bi(rbm);
131 	const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset;
132 	const u8 *byte;
133 	unsigned int bit;
134 
135 	byte = buffer + (rbm->offset / GFS2_NBBY);
136 	bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
137 
138 	return (*byte >> bit) & GFS2_BIT_MASK;
139 }
140 
141 /**
142  * gfs2_bit_search
143  * @ptr: Pointer to bitmap data
144  * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
145  * @state: The state we are searching for
146  *
147  * We xor the bitmap data with a patter which is the bitwise opposite
148  * of what we are looking for, this gives rise to a pattern of ones
149  * wherever there is a match. Since we have two bits per entry, we
150  * take this pattern, shift it down by one place and then and it with
151  * the original. All the even bit positions (0,2,4, etc) then represent
152  * successful matches, so we mask with 0x55555..... to remove the unwanted
153  * odd bit positions.
154  *
155  * This allows searching of a whole u64 at once (32 blocks) with a
156  * single test (on 64 bit arches).
157  */
158 
159 static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
160 {
161 	u64 tmp;
162 	static const u64 search[] = {
163 		[0] = 0xffffffffffffffffULL,
164 		[1] = 0xaaaaaaaaaaaaaaaaULL,
165 		[2] = 0x5555555555555555ULL,
166 		[3] = 0x0000000000000000ULL,
167 	};
168 	tmp = le64_to_cpu(*ptr) ^ search[state];
169 	tmp &= (tmp >> 1);
170 	tmp &= mask;
171 	return tmp;
172 }
173 
174 /**
175  * rs_cmp - multi-block reservation range compare
176  * @blk: absolute file system block number of the new reservation
177  * @len: number of blocks in the new reservation
178  * @rs: existing reservation to compare against
179  *
180  * returns: 1 if the block range is beyond the reach of the reservation
181  *         -1 if the block range is before the start of the reservation
182  *          0 if the block range overlaps with the reservation
183  */
184 static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
185 {
186 	u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
187 
188 	if (blk >= startblk + rs->rs_free)
189 		return 1;
190 	if (blk + len - 1 < startblk)
191 		return -1;
192 	return 0;
193 }
194 
195 /**
196  * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
197  *       a block in a given allocation state.
198  * @buf: the buffer that holds the bitmaps
199  * @len: the length (in bytes) of the buffer
200  * @goal: start search at this block's bit-pair (within @buffer)
201  * @state: GFS2_BLKST_XXX the state of the block we're looking for.
202  *
203  * Scope of @goal and returned block number is only within this bitmap buffer,
204  * not entire rgrp or filesystem.  @buffer will be offset from the actual
205  * beginning of a bitmap block buffer, skipping any header structures, but
206  * headers are always a multiple of 64 bits long so that the buffer is
207  * always aligned to a 64 bit boundary.
208  *
209  * The size of the buffer is in bytes, but is it assumed that it is
210  * always ok to read a complete multiple of 64 bits at the end
211  * of the block in case the end is no aligned to a natural boundary.
212  *
213  * Return: the block number (bitmap buffer scope) that was found
214  */
215 
216 static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
217 		       u32 goal, u8 state)
218 {
219 	u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
220 	const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
221 	const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
222 	u64 tmp;
223 	u64 mask = 0x5555555555555555ULL;
224 	u32 bit;
225 
226 	/* Mask off bits we don't care about at the start of the search */
227 	mask <<= spoint;
228 	tmp = gfs2_bit_search(ptr, mask, state);
229 	ptr++;
230 	while(tmp == 0 && ptr < end) {
231 		tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
232 		ptr++;
233 	}
234 	/* Mask off any bits which are more than len bytes from the start */
235 	if (ptr == end && (len & (sizeof(u64) - 1)))
236 		tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
237 	/* Didn't find anything, so return */
238 	if (tmp == 0)
239 		return BFITNOENT;
240 	ptr--;
241 	bit = __ffs64(tmp);
242 	bit /= 2;	/* two bits per entry in the bitmap */
243 	return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
244 }
245 
246 /**
247  * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
248  * @rbm: The rbm with rgd already set correctly
249  * @block: The block number (filesystem relative)
250  *
251  * This sets the bi and offset members of an rbm based on a
252  * resource group and a filesystem relative block number. The
253  * resource group must be set in the rbm on entry, the bi and
254  * offset members will be set by this function.
255  *
256  * Returns: 0 on success, or an error code
257  */
258 
259 static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
260 {
261 	u64 rblock = block - rbm->rgd->rd_data0;
262 
263 	if (WARN_ON_ONCE(rblock > UINT_MAX))
264 		return -EINVAL;
265 	if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
266 		return -E2BIG;
267 
268 	rbm->bii = 0;
269 	rbm->offset = (u32)(rblock);
270 	/* Check if the block is within the first block */
271 	if (rbm->offset < rbm_bi(rbm)->bi_blocks)
272 		return 0;
273 
274 	/* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
275 	rbm->offset += (sizeof(struct gfs2_rgrp) -
276 			sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
277 	rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
278 	rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
279 	return 0;
280 }
281 
282 /**
283  * gfs2_rbm_incr - increment an rbm structure
284  * @rbm: The rbm with rgd already set correctly
285  *
286  * This function takes an existing rbm structure and increments it to the next
287  * viable block offset.
288  *
289  * Returns: If incrementing the offset would cause the rbm to go past the
290  *          end of the rgrp, true is returned, otherwise false.
291  *
292  */
293 
294 static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
295 {
296 	if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
297 		rbm->offset++;
298 		return false;
299 	}
300 	if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
301 		return true;
302 
303 	rbm->offset = 0;
304 	rbm->bii++;
305 	return false;
306 }
307 
308 /**
309  * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
310  * @rbm: Position to search (value/result)
311  * @n_unaligned: Number of unaligned blocks to check
312  * @len: Decremented for each block found (terminate on zero)
313  *
314  * Returns: true if a non-free block is encountered
315  */
316 
317 static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
318 {
319 	u32 n;
320 	u8 res;
321 
322 	for (n = 0; n < n_unaligned; n++) {
323 		res = gfs2_testbit(rbm);
324 		if (res != GFS2_BLKST_FREE)
325 			return true;
326 		(*len)--;
327 		if (*len == 0)
328 			return true;
329 		if (gfs2_rbm_incr(rbm))
330 			return true;
331 	}
332 
333 	return false;
334 }
335 
336 /**
337  * gfs2_free_extlen - Return extent length of free blocks
338  * @rbm: Starting position
339  * @len: Max length to check
340  *
341  * Starting at the block specified by the rbm, see how many free blocks
342  * there are, not reading more than len blocks ahead. This can be done
343  * using memchr_inv when the blocks are byte aligned, but has to be done
344  * on a block by block basis in case of unaligned blocks. Also this
345  * function can cope with bitmap boundaries (although it must stop on
346  * a resource group boundary)
347  *
348  * Returns: Number of free blocks in the extent
349  */
350 
351 static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
352 {
353 	struct gfs2_rbm rbm = *rrbm;
354 	u32 n_unaligned = rbm.offset & 3;
355 	u32 size = len;
356 	u32 bytes;
357 	u32 chunk_size;
358 	u8 *ptr, *start, *end;
359 	u64 block;
360 	struct gfs2_bitmap *bi;
361 
362 	if (n_unaligned &&
363 	    gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
364 		goto out;
365 
366 	n_unaligned = len & 3;
367 	/* Start is now byte aligned */
368 	while (len > 3) {
369 		bi = rbm_bi(&rbm);
370 		start = bi->bi_bh->b_data;
371 		if (bi->bi_clone)
372 			start = bi->bi_clone;
373 		end = start + bi->bi_bh->b_size;
374 		start += bi->bi_offset;
375 		BUG_ON(rbm.offset & 3);
376 		start += (rbm.offset / GFS2_NBBY);
377 		bytes = min_t(u32, len / GFS2_NBBY, (end - start));
378 		ptr = memchr_inv(start, 0, bytes);
379 		chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
380 		chunk_size *= GFS2_NBBY;
381 		BUG_ON(len < chunk_size);
382 		len -= chunk_size;
383 		block = gfs2_rbm_to_block(&rbm);
384 		if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
385 			n_unaligned = 0;
386 			break;
387 		}
388 		if (ptr) {
389 			n_unaligned = 3;
390 			break;
391 		}
392 		n_unaligned = len & 3;
393 	}
394 
395 	/* Deal with any bits left over at the end */
396 	if (n_unaligned)
397 		gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
398 out:
399 	return size - len;
400 }
401 
402 /**
403  * gfs2_bitcount - count the number of bits in a certain state
404  * @rgd: the resource group descriptor
405  * @buffer: the buffer that holds the bitmaps
406  * @buflen: the length (in bytes) of the buffer
407  * @state: the state of the block we're looking for
408  *
409  * Returns: The number of bits
410  */
411 
412 static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
413 			 unsigned int buflen, u8 state)
414 {
415 	const u8 *byte = buffer;
416 	const u8 *end = buffer + buflen;
417 	const u8 state1 = state << 2;
418 	const u8 state2 = state << 4;
419 	const u8 state3 = state << 6;
420 	u32 count = 0;
421 
422 	for (; byte < end; byte++) {
423 		if (((*byte) & 0x03) == state)
424 			count++;
425 		if (((*byte) & 0x0C) == state1)
426 			count++;
427 		if (((*byte) & 0x30) == state2)
428 			count++;
429 		if (((*byte) & 0xC0) == state3)
430 			count++;
431 	}
432 
433 	return count;
434 }
435 
436 /**
437  * gfs2_rgrp_verify - Verify that a resource group is consistent
438  * @rgd: the rgrp
439  *
440  */
441 
442 void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
443 {
444 	struct gfs2_sbd *sdp = rgd->rd_sbd;
445 	struct gfs2_bitmap *bi = NULL;
446 	u32 length = rgd->rd_length;
447 	u32 count[4], tmp;
448 	int buf, x;
449 
450 	memset(count, 0, 4 * sizeof(u32));
451 
452 	/* Count # blocks in each of 4 possible allocation states */
453 	for (buf = 0; buf < length; buf++) {
454 		bi = rgd->rd_bits + buf;
455 		for (x = 0; x < 4; x++)
456 			count[x] += gfs2_bitcount(rgd,
457 						  bi->bi_bh->b_data +
458 						  bi->bi_offset,
459 						  bi->bi_len, x);
460 	}
461 
462 	if (count[0] != rgd->rd_free) {
463 		if (gfs2_consist_rgrpd(rgd))
464 			fs_err(sdp, "free data mismatch:  %u != %u\n",
465 			       count[0], rgd->rd_free);
466 		return;
467 	}
468 
469 	tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
470 	if (count[1] != tmp) {
471 		if (gfs2_consist_rgrpd(rgd))
472 			fs_err(sdp, "used data mismatch:  %u != %u\n",
473 			       count[1], tmp);
474 		return;
475 	}
476 
477 	if (count[2] + count[3] != rgd->rd_dinodes) {
478 		if (gfs2_consist_rgrpd(rgd))
479 			fs_err(sdp, "used metadata mismatch:  %u != %u\n",
480 			       count[2] + count[3], rgd->rd_dinodes);
481 		return;
482 	}
483 }
484 
485 static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
486 {
487 	u64 first = rgd->rd_data0;
488 	u64 last = first + rgd->rd_data;
489 	return first <= block && block < last;
490 }
491 
492 /**
493  * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
494  * @sdp: The GFS2 superblock
495  * @blk: The data block number
496  * @exact: True if this needs to be an exact match
497  *
498  * Returns: The resource group, or NULL if not found
499  */
500 
501 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
502 {
503 	struct rb_node *n, *next;
504 	struct gfs2_rgrpd *cur;
505 
506 	spin_lock(&sdp->sd_rindex_spin);
507 	n = sdp->sd_rindex_tree.rb_node;
508 	while (n) {
509 		cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
510 		next = NULL;
511 		if (blk < cur->rd_addr)
512 			next = n->rb_left;
513 		else if (blk >= cur->rd_data0 + cur->rd_data)
514 			next = n->rb_right;
515 		if (next == NULL) {
516 			spin_unlock(&sdp->sd_rindex_spin);
517 			if (exact) {
518 				if (blk < cur->rd_addr)
519 					return NULL;
520 				if (blk >= cur->rd_data0 + cur->rd_data)
521 					return NULL;
522 			}
523 			return cur;
524 		}
525 		n = next;
526 	}
527 	spin_unlock(&sdp->sd_rindex_spin);
528 
529 	return NULL;
530 }
531 
532 /**
533  * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
534  * @sdp: The GFS2 superblock
535  *
536  * Returns: The first rgrp in the filesystem
537  */
538 
539 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
540 {
541 	const struct rb_node *n;
542 	struct gfs2_rgrpd *rgd;
543 
544 	spin_lock(&sdp->sd_rindex_spin);
545 	n = rb_first(&sdp->sd_rindex_tree);
546 	rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
547 	spin_unlock(&sdp->sd_rindex_spin);
548 
549 	return rgd;
550 }
551 
552 /**
553  * gfs2_rgrpd_get_next - get the next RG
554  * @rgd: the resource group descriptor
555  *
556  * Returns: The next rgrp
557  */
558 
559 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
560 {
561 	struct gfs2_sbd *sdp = rgd->rd_sbd;
562 	const struct rb_node *n;
563 
564 	spin_lock(&sdp->sd_rindex_spin);
565 	n = rb_next(&rgd->rd_node);
566 	if (n == NULL)
567 		n = rb_first(&sdp->sd_rindex_tree);
568 
569 	if (unlikely(&rgd->rd_node == n)) {
570 		spin_unlock(&sdp->sd_rindex_spin);
571 		return NULL;
572 	}
573 	rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
574 	spin_unlock(&sdp->sd_rindex_spin);
575 	return rgd;
576 }
577 
578 void gfs2_free_clones(struct gfs2_rgrpd *rgd)
579 {
580 	int x;
581 
582 	for (x = 0; x < rgd->rd_length; x++) {
583 		struct gfs2_bitmap *bi = rgd->rd_bits + x;
584 		kfree(bi->bi_clone);
585 		bi->bi_clone = NULL;
586 	}
587 }
588 
589 /**
590  * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
591  * @ip: the inode for this reservation
592  */
593 int gfs2_rs_alloc(struct gfs2_inode *ip)
594 {
595 	int error = 0;
596 
597 	down_write(&ip->i_rw_mutex);
598 	if (ip->i_res)
599 		goto out;
600 
601 	ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
602 	if (!ip->i_res) {
603 		error = -ENOMEM;
604 		goto out;
605 	}
606 
607 	RB_CLEAR_NODE(&ip->i_res->rs_node);
608 out:
609 	up_write(&ip->i_rw_mutex);
610 	return error;
611 }
612 
613 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
614 {
615 	gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
616 		       (unsigned long long)rs->rs_inum,
617 		       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
618 		       rs->rs_rbm.offset, rs->rs_free);
619 }
620 
621 /**
622  * __rs_deltree - remove a multi-block reservation from the rgd tree
623  * @rs: The reservation to remove
624  *
625  */
626 static void __rs_deltree(struct gfs2_blkreserv *rs)
627 {
628 	struct gfs2_rgrpd *rgd;
629 
630 	if (!gfs2_rs_active(rs))
631 		return;
632 
633 	rgd = rs->rs_rbm.rgd;
634 	trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
635 	rb_erase(&rs->rs_node, &rgd->rd_rstree);
636 	RB_CLEAR_NODE(&rs->rs_node);
637 
638 	if (rs->rs_free) {
639 		struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm);
640 
641 		/* return reserved blocks to the rgrp */
642 		BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
643 		rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
644 		/* The rgrp extent failure point is likely not to increase;
645 		   it will only do so if the freed blocks are somehow
646 		   contiguous with a span of free blocks that follows. Still,
647 		   it will force the number to be recalculated later. */
648 		rgd->rd_extfail_pt += rs->rs_free;
649 		rs->rs_free = 0;
650 		clear_bit(GBF_FULL, &bi->bi_flags);
651 	}
652 }
653 
654 /**
655  * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
656  * @rs: The reservation to remove
657  *
658  */
659 void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
660 {
661 	struct gfs2_rgrpd *rgd;
662 
663 	rgd = rs->rs_rbm.rgd;
664 	if (rgd) {
665 		spin_lock(&rgd->rd_rsspin);
666 		__rs_deltree(rs);
667 		spin_unlock(&rgd->rd_rsspin);
668 	}
669 }
670 
671 /**
672  * gfs2_rs_delete - delete a multi-block reservation
673  * @ip: The inode for this reservation
674  * @wcount: The inode's write count, or NULL
675  *
676  */
677 void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
678 {
679 	down_write(&ip->i_rw_mutex);
680 	if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) {
681 		gfs2_rs_deltree(ip->i_res);
682 		BUG_ON(ip->i_res->rs_free);
683 		kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
684 		ip->i_res = NULL;
685 	}
686 	up_write(&ip->i_rw_mutex);
687 }
688 
689 /**
690  * return_all_reservations - return all reserved blocks back to the rgrp.
691  * @rgd: the rgrp that needs its space back
692  *
693  * We previously reserved a bunch of blocks for allocation. Now we need to
694  * give them back. This leave the reservation structures in tact, but removes
695  * all of their corresponding "no-fly zones".
696  */
697 static void return_all_reservations(struct gfs2_rgrpd *rgd)
698 {
699 	struct rb_node *n;
700 	struct gfs2_blkreserv *rs;
701 
702 	spin_lock(&rgd->rd_rsspin);
703 	while ((n = rb_first(&rgd->rd_rstree))) {
704 		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
705 		__rs_deltree(rs);
706 	}
707 	spin_unlock(&rgd->rd_rsspin);
708 }
709 
710 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
711 {
712 	struct rb_node *n;
713 	struct gfs2_rgrpd *rgd;
714 	struct gfs2_glock *gl;
715 
716 	while ((n = rb_first(&sdp->sd_rindex_tree))) {
717 		rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
718 		gl = rgd->rd_gl;
719 
720 		rb_erase(n, &sdp->sd_rindex_tree);
721 
722 		if (gl) {
723 			spin_lock(&gl->gl_spin);
724 			gl->gl_object = NULL;
725 			spin_unlock(&gl->gl_spin);
726 			gfs2_glock_add_to_lru(gl);
727 			gfs2_glock_put(gl);
728 		}
729 
730 		gfs2_free_clones(rgd);
731 		kfree(rgd->rd_bits);
732 		return_all_reservations(rgd);
733 		kmem_cache_free(gfs2_rgrpd_cachep, rgd);
734 	}
735 }
736 
737 static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
738 {
739 	printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
740 	printk(KERN_INFO "  ri_length = %u\n", rgd->rd_length);
741 	printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
742 	printk(KERN_INFO "  ri_data = %u\n", rgd->rd_data);
743 	printk(KERN_INFO "  ri_bitbytes = %u\n", rgd->rd_bitbytes);
744 }
745 
746 /**
747  * gfs2_compute_bitstructs - Compute the bitmap sizes
748  * @rgd: The resource group descriptor
749  *
750  * Calculates bitmap descriptors, one for each block that contains bitmap data
751  *
752  * Returns: errno
753  */
754 
755 static int compute_bitstructs(struct gfs2_rgrpd *rgd)
756 {
757 	struct gfs2_sbd *sdp = rgd->rd_sbd;
758 	struct gfs2_bitmap *bi;
759 	u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
760 	u32 bytes_left, bytes;
761 	int x;
762 
763 	if (!length)
764 		return -EINVAL;
765 
766 	rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
767 	if (!rgd->rd_bits)
768 		return -ENOMEM;
769 
770 	bytes_left = rgd->rd_bitbytes;
771 
772 	for (x = 0; x < length; x++) {
773 		bi = rgd->rd_bits + x;
774 
775 		bi->bi_flags = 0;
776 		/* small rgrp; bitmap stored completely in header block */
777 		if (length == 1) {
778 			bytes = bytes_left;
779 			bi->bi_offset = sizeof(struct gfs2_rgrp);
780 			bi->bi_start = 0;
781 			bi->bi_len = bytes;
782 			bi->bi_blocks = bytes * GFS2_NBBY;
783 		/* header block */
784 		} else if (x == 0) {
785 			bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
786 			bi->bi_offset = sizeof(struct gfs2_rgrp);
787 			bi->bi_start = 0;
788 			bi->bi_len = bytes;
789 			bi->bi_blocks = bytes * GFS2_NBBY;
790 		/* last block */
791 		} else if (x + 1 == length) {
792 			bytes = bytes_left;
793 			bi->bi_offset = sizeof(struct gfs2_meta_header);
794 			bi->bi_start = rgd->rd_bitbytes - bytes_left;
795 			bi->bi_len = bytes;
796 			bi->bi_blocks = bytes * GFS2_NBBY;
797 		/* other blocks */
798 		} else {
799 			bytes = sdp->sd_sb.sb_bsize -
800 				sizeof(struct gfs2_meta_header);
801 			bi->bi_offset = sizeof(struct gfs2_meta_header);
802 			bi->bi_start = rgd->rd_bitbytes - bytes_left;
803 			bi->bi_len = bytes;
804 			bi->bi_blocks = bytes * GFS2_NBBY;
805 		}
806 
807 		bytes_left -= bytes;
808 	}
809 
810 	if (bytes_left) {
811 		gfs2_consist_rgrpd(rgd);
812 		return -EIO;
813 	}
814 	bi = rgd->rd_bits + (length - 1);
815 	if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
816 		if (gfs2_consist_rgrpd(rgd)) {
817 			gfs2_rindex_print(rgd);
818 			fs_err(sdp, "start=%u len=%u offset=%u\n",
819 			       bi->bi_start, bi->bi_len, bi->bi_offset);
820 		}
821 		return -EIO;
822 	}
823 
824 	return 0;
825 }
826 
827 /**
828  * gfs2_ri_total - Total up the file system space, according to the rindex.
829  * @sdp: the filesystem
830  *
831  */
832 u64 gfs2_ri_total(struct gfs2_sbd *sdp)
833 {
834 	u64 total_data = 0;
835 	struct inode *inode = sdp->sd_rindex;
836 	struct gfs2_inode *ip = GFS2_I(inode);
837 	char buf[sizeof(struct gfs2_rindex)];
838 	int error, rgrps;
839 
840 	for (rgrps = 0;; rgrps++) {
841 		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
842 
843 		if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
844 			break;
845 		error = gfs2_internal_read(ip, buf, &pos,
846 					   sizeof(struct gfs2_rindex));
847 		if (error != sizeof(struct gfs2_rindex))
848 			break;
849 		total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
850 	}
851 	return total_data;
852 }
853 
854 static int rgd_insert(struct gfs2_rgrpd *rgd)
855 {
856 	struct gfs2_sbd *sdp = rgd->rd_sbd;
857 	struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
858 
859 	/* Figure out where to put new node */
860 	while (*newn) {
861 		struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
862 						  rd_node);
863 
864 		parent = *newn;
865 		if (rgd->rd_addr < cur->rd_addr)
866 			newn = &((*newn)->rb_left);
867 		else if (rgd->rd_addr > cur->rd_addr)
868 			newn = &((*newn)->rb_right);
869 		else
870 			return -EEXIST;
871 	}
872 
873 	rb_link_node(&rgd->rd_node, parent, newn);
874 	rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
875 	sdp->sd_rgrps++;
876 	return 0;
877 }
878 
879 /**
880  * read_rindex_entry - Pull in a new resource index entry from the disk
881  * @ip: Pointer to the rindex inode
882  *
883  * Returns: 0 on success, > 0 on EOF, error code otherwise
884  */
885 
886 static int read_rindex_entry(struct gfs2_inode *ip)
887 {
888 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
889 	const unsigned bsize = sdp->sd_sb.sb_bsize;
890 	loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
891 	struct gfs2_rindex buf;
892 	int error;
893 	struct gfs2_rgrpd *rgd;
894 
895 	if (pos >= i_size_read(&ip->i_inode))
896 		return 1;
897 
898 	error = gfs2_internal_read(ip, (char *)&buf, &pos,
899 				   sizeof(struct gfs2_rindex));
900 
901 	if (error != sizeof(struct gfs2_rindex))
902 		return (error == 0) ? 1 : error;
903 
904 	rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
905 	error = -ENOMEM;
906 	if (!rgd)
907 		return error;
908 
909 	rgd->rd_sbd = sdp;
910 	rgd->rd_addr = be64_to_cpu(buf.ri_addr);
911 	rgd->rd_length = be32_to_cpu(buf.ri_length);
912 	rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
913 	rgd->rd_data = be32_to_cpu(buf.ri_data);
914 	rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
915 	spin_lock_init(&rgd->rd_rsspin);
916 
917 	error = compute_bitstructs(rgd);
918 	if (error)
919 		goto fail;
920 
921 	error = gfs2_glock_get(sdp, rgd->rd_addr,
922 			       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
923 	if (error)
924 		goto fail;
925 
926 	rgd->rd_gl->gl_object = rgd;
927 	rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize;
928 	rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1;
929 	rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
930 	rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
931 	if (rgd->rd_data > sdp->sd_max_rg_data)
932 		sdp->sd_max_rg_data = rgd->rd_data;
933 	spin_lock(&sdp->sd_rindex_spin);
934 	error = rgd_insert(rgd);
935 	spin_unlock(&sdp->sd_rindex_spin);
936 	if (!error)
937 		return 0;
938 
939 	error = 0; /* someone else read in the rgrp; free it and ignore it */
940 	gfs2_glock_put(rgd->rd_gl);
941 
942 fail:
943 	kfree(rgd->rd_bits);
944 	kmem_cache_free(gfs2_rgrpd_cachep, rgd);
945 	return error;
946 }
947 
948 /**
949  * gfs2_ri_update - Pull in a new resource index from the disk
950  * @ip: pointer to the rindex inode
951  *
952  * Returns: 0 on successful update, error code otherwise
953  */
954 
955 static int gfs2_ri_update(struct gfs2_inode *ip)
956 {
957 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
958 	int error;
959 
960 	do {
961 		error = read_rindex_entry(ip);
962 	} while (error == 0);
963 
964 	if (error < 0)
965 		return error;
966 
967 	sdp->sd_rindex_uptodate = 1;
968 	return 0;
969 }
970 
971 /**
972  * gfs2_rindex_update - Update the rindex if required
973  * @sdp: The GFS2 superblock
974  *
975  * We grab a lock on the rindex inode to make sure that it doesn't
976  * change whilst we are performing an operation. We keep this lock
977  * for quite long periods of time compared to other locks. This
978  * doesn't matter, since it is shared and it is very, very rarely
979  * accessed in the exclusive mode (i.e. only when expanding the filesystem).
980  *
981  * This makes sure that we're using the latest copy of the resource index
982  * special file, which might have been updated if someone expanded the
983  * filesystem (via gfs2_grow utility), which adds new resource groups.
984  *
985  * Returns: 0 on succeess, error code otherwise
986  */
987 
988 int gfs2_rindex_update(struct gfs2_sbd *sdp)
989 {
990 	struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
991 	struct gfs2_glock *gl = ip->i_gl;
992 	struct gfs2_holder ri_gh;
993 	int error = 0;
994 	int unlock_required = 0;
995 
996 	/* Read new copy from disk if we don't have the latest */
997 	if (!sdp->sd_rindex_uptodate) {
998 		if (!gfs2_glock_is_locked_by_me(gl)) {
999 			error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
1000 			if (error)
1001 				return error;
1002 			unlock_required = 1;
1003 		}
1004 		if (!sdp->sd_rindex_uptodate)
1005 			error = gfs2_ri_update(ip);
1006 		if (unlock_required)
1007 			gfs2_glock_dq_uninit(&ri_gh);
1008 	}
1009 
1010 	return error;
1011 }
1012 
1013 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
1014 {
1015 	const struct gfs2_rgrp *str = buf;
1016 	u32 rg_flags;
1017 
1018 	rg_flags = be32_to_cpu(str->rg_flags);
1019 	rg_flags &= ~GFS2_RDF_MASK;
1020 	rgd->rd_flags &= GFS2_RDF_MASK;
1021 	rgd->rd_flags |= rg_flags;
1022 	rgd->rd_free = be32_to_cpu(str->rg_free);
1023 	rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
1024 	rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
1025 }
1026 
1027 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
1028 {
1029 	struct gfs2_rgrp *str = buf;
1030 
1031 	str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
1032 	str->rg_free = cpu_to_be32(rgd->rd_free);
1033 	str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
1034 	str->__pad = cpu_to_be32(0);
1035 	str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
1036 	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
1037 }
1038 
1039 static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
1040 {
1041 	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1042 	struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
1043 
1044 	if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
1045 	    rgl->rl_dinodes != str->rg_dinodes ||
1046 	    rgl->rl_igeneration != str->rg_igeneration)
1047 		return 0;
1048 	return 1;
1049 }
1050 
1051 static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1052 {
1053 	const struct gfs2_rgrp *str = buf;
1054 
1055 	rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1056 	rgl->rl_flags = str->rg_flags;
1057 	rgl->rl_free = str->rg_free;
1058 	rgl->rl_dinodes = str->rg_dinodes;
1059 	rgl->rl_igeneration = str->rg_igeneration;
1060 	rgl->__pad = 0UL;
1061 }
1062 
1063 static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
1064 {
1065 	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1066 	u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
1067 	rgl->rl_unlinked = cpu_to_be32(unlinked);
1068 }
1069 
1070 static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1071 {
1072 	struct gfs2_bitmap *bi;
1073 	const u32 length = rgd->rd_length;
1074 	const u8 *buffer = NULL;
1075 	u32 i, goal, count = 0;
1076 
1077 	for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1078 		goal = 0;
1079 		buffer = bi->bi_bh->b_data + bi->bi_offset;
1080 		WARN_ON(!buffer_uptodate(bi->bi_bh));
1081 		while (goal < bi->bi_len * GFS2_NBBY) {
1082 			goal = gfs2_bitfit(buffer, bi->bi_len, goal,
1083 					   GFS2_BLKST_UNLINKED);
1084 			if (goal == BFITNOENT)
1085 				break;
1086 			count++;
1087 			goal++;
1088 		}
1089 	}
1090 
1091 	return count;
1092 }
1093 
1094 
1095 /**
1096  * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1097  * @rgd: the struct gfs2_rgrpd describing the RG to read in
1098  *
1099  * Read in all of a Resource Group's header and bitmap blocks.
1100  * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
1101  *
1102  * Returns: errno
1103  */
1104 
1105 int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
1106 {
1107 	struct gfs2_sbd *sdp = rgd->rd_sbd;
1108 	struct gfs2_glock *gl = rgd->rd_gl;
1109 	unsigned int length = rgd->rd_length;
1110 	struct gfs2_bitmap *bi;
1111 	unsigned int x, y;
1112 	int error;
1113 
1114 	if (rgd->rd_bits[0].bi_bh != NULL)
1115 		return 0;
1116 
1117 	for (x = 0; x < length; x++) {
1118 		bi = rgd->rd_bits + x;
1119 		error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
1120 		if (error)
1121 			goto fail;
1122 	}
1123 
1124 	for (y = length; y--;) {
1125 		bi = rgd->rd_bits + y;
1126 		error = gfs2_meta_wait(sdp, bi->bi_bh);
1127 		if (error)
1128 			goto fail;
1129 		if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
1130 					      GFS2_METATYPE_RG)) {
1131 			error = -EIO;
1132 			goto fail;
1133 		}
1134 	}
1135 
1136 	if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
1137 		for (x = 0; x < length; x++)
1138 			clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
1139 		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1140 		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1141 		rgd->rd_free_clone = rgd->rd_free;
1142 		/* max out the rgrp allocation failure point */
1143 		rgd->rd_extfail_pt = rgd->rd_free;
1144 	}
1145 	if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1146 		rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1147 		gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1148 				     rgd->rd_bits[0].bi_bh->b_data);
1149 	}
1150 	else if (sdp->sd_args.ar_rgrplvb) {
1151 		if (!gfs2_rgrp_lvb_valid(rgd)){
1152 			gfs2_consist_rgrpd(rgd);
1153 			error = -EIO;
1154 			goto fail;
1155 		}
1156 		if (rgd->rd_rgl->rl_unlinked == 0)
1157 			rgd->rd_flags &= ~GFS2_RDF_CHECK;
1158 	}
1159 	return 0;
1160 
1161 fail:
1162 	while (x--) {
1163 		bi = rgd->rd_bits + x;
1164 		brelse(bi->bi_bh);
1165 		bi->bi_bh = NULL;
1166 		gfs2_assert_warn(sdp, !bi->bi_clone);
1167 	}
1168 
1169 	return error;
1170 }
1171 
1172 int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
1173 {
1174 	u32 rl_flags;
1175 
1176 	if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1177 		return 0;
1178 
1179 	if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1180 		return gfs2_rgrp_bh_get(rgd);
1181 
1182 	rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1183 	rl_flags &= ~GFS2_RDF_MASK;
1184 	rgd->rd_flags &= GFS2_RDF_MASK;
1185 	rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1186 	if (rgd->rd_rgl->rl_unlinked == 0)
1187 		rgd->rd_flags &= ~GFS2_RDF_CHECK;
1188 	rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1189 	rgd->rd_free_clone = rgd->rd_free;
1190 	rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1191 	rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1192 	return 0;
1193 }
1194 
1195 int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1196 {
1197 	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1198 	struct gfs2_sbd *sdp = rgd->rd_sbd;
1199 
1200 	if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1201 		return 0;
1202 	return gfs2_rgrp_bh_get(rgd);
1203 }
1204 
1205 /**
1206  * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1207  * @gh: The glock holder for the resource group
1208  *
1209  */
1210 
1211 void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
1212 {
1213 	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1214 	int x, length = rgd->rd_length;
1215 
1216 	for (x = 0; x < length; x++) {
1217 		struct gfs2_bitmap *bi = rgd->rd_bits + x;
1218 		if (bi->bi_bh) {
1219 			brelse(bi->bi_bh);
1220 			bi->bi_bh = NULL;
1221 		}
1222 	}
1223 
1224 }
1225 
1226 int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1227 			     struct buffer_head *bh,
1228 			     const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1229 {
1230 	struct super_block *sb = sdp->sd_vfs;
1231 	u64 blk;
1232 	sector_t start = 0;
1233 	sector_t nr_blks = 0;
1234 	int rv;
1235 	unsigned int x;
1236 	u32 trimmed = 0;
1237 	u8 diff;
1238 
1239 	for (x = 0; x < bi->bi_len; x++) {
1240 		const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1241 		clone += bi->bi_offset;
1242 		clone += x;
1243 		if (bh) {
1244 			const u8 *orig = bh->b_data + bi->bi_offset + x;
1245 			diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1246 		} else {
1247 			diff = ~(*clone | (*clone >> 1));
1248 		}
1249 		diff &= 0x55;
1250 		if (diff == 0)
1251 			continue;
1252 		blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1253 		while(diff) {
1254 			if (diff & 1) {
1255 				if (nr_blks == 0)
1256 					goto start_new_extent;
1257 				if ((start + nr_blks) != blk) {
1258 					if (nr_blks >= minlen) {
1259 						rv = sb_issue_discard(sb,
1260 							start, nr_blks,
1261 							GFP_NOFS, 0);
1262 						if (rv)
1263 							goto fail;
1264 						trimmed += nr_blks;
1265 					}
1266 					nr_blks = 0;
1267 start_new_extent:
1268 					start = blk;
1269 				}
1270 				nr_blks++;
1271 			}
1272 			diff >>= 2;
1273 			blk++;
1274 		}
1275 	}
1276 	if (nr_blks >= minlen) {
1277 		rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
1278 		if (rv)
1279 			goto fail;
1280 		trimmed += nr_blks;
1281 	}
1282 	if (ptrimmed)
1283 		*ptrimmed = trimmed;
1284 	return 0;
1285 
1286 fail:
1287 	if (sdp->sd_args.ar_discard)
1288 		fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
1289 	sdp->sd_args.ar_discard = 0;
1290 	return -EIO;
1291 }
1292 
1293 /**
1294  * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1295  * @filp: Any file on the filesystem
1296  * @argp: Pointer to the arguments (also used to pass result)
1297  *
1298  * Returns: 0 on success, otherwise error code
1299  */
1300 
1301 int gfs2_fitrim(struct file *filp, void __user *argp)
1302 {
1303 	struct inode *inode = file_inode(filp);
1304 	struct gfs2_sbd *sdp = GFS2_SB(inode);
1305 	struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1306 	struct buffer_head *bh;
1307 	struct gfs2_rgrpd *rgd;
1308 	struct gfs2_rgrpd *rgd_end;
1309 	struct gfs2_holder gh;
1310 	struct fstrim_range r;
1311 	int ret = 0;
1312 	u64 amt;
1313 	u64 trimmed = 0;
1314 	u64 start, end, minlen;
1315 	unsigned int x;
1316 	unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1317 
1318 	if (!capable(CAP_SYS_ADMIN))
1319 		return -EPERM;
1320 
1321 	if (!blk_queue_discard(q))
1322 		return -EOPNOTSUPP;
1323 
1324 	if (copy_from_user(&r, argp, sizeof(r)))
1325 		return -EFAULT;
1326 
1327 	ret = gfs2_rindex_update(sdp);
1328 	if (ret)
1329 		return ret;
1330 
1331 	start = r.start >> bs_shift;
1332 	end = start + (r.len >> bs_shift);
1333 	minlen = max_t(u64, r.minlen,
1334 		       q->limits.discard_granularity) >> bs_shift;
1335 
1336 	if (end <= start || minlen > sdp->sd_max_rg_data)
1337 		return -EINVAL;
1338 
1339 	rgd = gfs2_blk2rgrpd(sdp, start, 0);
1340 	rgd_end = gfs2_blk2rgrpd(sdp, end, 0);
1341 
1342 	if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end))
1343 	    && (start > rgd_end->rd_data0 + rgd_end->rd_data))
1344 		return -EINVAL; /* start is beyond the end of the fs */
1345 
1346 	while (1) {
1347 
1348 		ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1349 		if (ret)
1350 			goto out;
1351 
1352 		if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1353 			/* Trim each bitmap in the rgrp */
1354 			for (x = 0; x < rgd->rd_length; x++) {
1355 				struct gfs2_bitmap *bi = rgd->rd_bits + x;
1356 				ret = gfs2_rgrp_send_discards(sdp,
1357 						rgd->rd_data0, NULL, bi, minlen,
1358 						&amt);
1359 				if (ret) {
1360 					gfs2_glock_dq_uninit(&gh);
1361 					goto out;
1362 				}
1363 				trimmed += amt;
1364 			}
1365 
1366 			/* Mark rgrp as having been trimmed */
1367 			ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1368 			if (ret == 0) {
1369 				bh = rgd->rd_bits[0].bi_bh;
1370 				rgd->rd_flags |= GFS2_RGF_TRIMMED;
1371 				gfs2_trans_add_meta(rgd->rd_gl, bh);
1372 				gfs2_rgrp_out(rgd, bh->b_data);
1373 				gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
1374 				gfs2_trans_end(sdp);
1375 			}
1376 		}
1377 		gfs2_glock_dq_uninit(&gh);
1378 
1379 		if (rgd == rgd_end)
1380 			break;
1381 
1382 		rgd = gfs2_rgrpd_get_next(rgd);
1383 	}
1384 
1385 out:
1386 	r.len = trimmed << bs_shift;
1387 	if (copy_to_user(argp, &r, sizeof(r)))
1388 		return -EFAULT;
1389 
1390 	return ret;
1391 }
1392 
1393 /**
1394  * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1395  * @ip: the inode structure
1396  *
1397  */
1398 static void rs_insert(struct gfs2_inode *ip)
1399 {
1400 	struct rb_node **newn, *parent = NULL;
1401 	int rc;
1402 	struct gfs2_blkreserv *rs = ip->i_res;
1403 	struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1404 	u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1405 
1406 	BUG_ON(gfs2_rs_active(rs));
1407 
1408 	spin_lock(&rgd->rd_rsspin);
1409 	newn = &rgd->rd_rstree.rb_node;
1410 	while (*newn) {
1411 		struct gfs2_blkreserv *cur =
1412 			rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1413 
1414 		parent = *newn;
1415 		rc = rs_cmp(fsblock, rs->rs_free, cur);
1416 		if (rc > 0)
1417 			newn = &((*newn)->rb_right);
1418 		else if (rc < 0)
1419 			newn = &((*newn)->rb_left);
1420 		else {
1421 			spin_unlock(&rgd->rd_rsspin);
1422 			WARN_ON(1);
1423 			return;
1424 		}
1425 	}
1426 
1427 	rb_link_node(&rs->rs_node, parent, newn);
1428 	rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1429 
1430 	/* Do our rgrp accounting for the reservation */
1431 	rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1432 	spin_unlock(&rgd->rd_rsspin);
1433 	trace_gfs2_rs(rs, TRACE_RS_INSERT);
1434 }
1435 
1436 /**
1437  * rg_mblk_search - find a group of multiple free blocks to form a reservation
1438  * @rgd: the resource group descriptor
1439  * @ip: pointer to the inode for which we're reserving blocks
1440  * @ap: the allocation parameters
1441  *
1442  */
1443 
1444 static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1445 			   const struct gfs2_alloc_parms *ap)
1446 {
1447 	struct gfs2_rbm rbm = { .rgd = rgd, };
1448 	u64 goal;
1449 	struct gfs2_blkreserv *rs = ip->i_res;
1450 	u32 extlen;
1451 	u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
1452 	int ret;
1453 	struct inode *inode = &ip->i_inode;
1454 
1455 	if (S_ISDIR(inode->i_mode))
1456 		extlen = 1;
1457 	else {
1458 		extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target);
1459 		extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1460 	}
1461 	if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1462 		return;
1463 
1464 	/* Find bitmap block that contains bits for goal block */
1465 	if (rgrp_contains_block(rgd, ip->i_goal))
1466 		goal = ip->i_goal;
1467 	else
1468 		goal = rgd->rd_last_alloc + rgd->rd_data0;
1469 
1470 	if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1471 		return;
1472 
1473 	ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap);
1474 	if (ret == 0) {
1475 		rs->rs_rbm = rbm;
1476 		rs->rs_free = extlen;
1477 		rs->rs_inum = ip->i_no_addr;
1478 		rs_insert(ip);
1479 	} else {
1480 		if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1481 			rgd->rd_last_alloc = 0;
1482 	}
1483 }
1484 
1485 /**
1486  * gfs2_next_unreserved_block - Return next block that is not reserved
1487  * @rgd: The resource group
1488  * @block: The starting block
1489  * @length: The required length
1490  * @ip: Ignore any reservations for this inode
1491  *
1492  * If the block does not appear in any reservation, then return the
1493  * block number unchanged. If it does appear in the reservation, then
1494  * keep looking through the tree of reservations in order to find the
1495  * first block number which is not reserved.
1496  */
1497 
1498 static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1499 				      u32 length,
1500 				      const struct gfs2_inode *ip)
1501 {
1502 	struct gfs2_blkreserv *rs;
1503 	struct rb_node *n;
1504 	int rc;
1505 
1506 	spin_lock(&rgd->rd_rsspin);
1507 	n = rgd->rd_rstree.rb_node;
1508 	while (n) {
1509 		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1510 		rc = rs_cmp(block, length, rs);
1511 		if (rc < 0)
1512 			n = n->rb_left;
1513 		else if (rc > 0)
1514 			n = n->rb_right;
1515 		else
1516 			break;
1517 	}
1518 
1519 	if (n) {
1520 		while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) {
1521 			block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1522 			n = n->rb_right;
1523 			if (n == NULL)
1524 				break;
1525 			rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1526 		}
1527 	}
1528 
1529 	spin_unlock(&rgd->rd_rsspin);
1530 	return block;
1531 }
1532 
1533 /**
1534  * gfs2_reservation_check_and_update - Check for reservations during block alloc
1535  * @rbm: The current position in the resource group
1536  * @ip: The inode for which we are searching for blocks
1537  * @minext: The minimum extent length
1538  * @maxext: A pointer to the maximum extent structure
1539  *
1540  * This checks the current position in the rgrp to see whether there is
1541  * a reservation covering this block. If not then this function is a
1542  * no-op. If there is, then the position is moved to the end of the
1543  * contiguous reservation(s) so that we are pointing at the first
1544  * non-reserved block.
1545  *
1546  * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1547  */
1548 
1549 static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1550 					     const struct gfs2_inode *ip,
1551 					     u32 minext,
1552 					     struct gfs2_extent *maxext)
1553 {
1554 	u64 block = gfs2_rbm_to_block(rbm);
1555 	u32 extlen = 1;
1556 	u64 nblock;
1557 	int ret;
1558 
1559 	/*
1560 	 * If we have a minimum extent length, then skip over any extent
1561 	 * which is less than the min extent length in size.
1562 	 */
1563 	if (minext) {
1564 		extlen = gfs2_free_extlen(rbm, minext);
1565 		if (extlen <= maxext->len)
1566 			goto fail;
1567 	}
1568 
1569 	/*
1570 	 * Check the extent which has been found against the reservations
1571 	 * and skip if parts of it are already reserved
1572 	 */
1573 	nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1574 	if (nblock == block) {
1575 		if (!minext || extlen >= minext)
1576 			return 0;
1577 
1578 		if (extlen > maxext->len) {
1579 			maxext->len = extlen;
1580 			maxext->rbm = *rbm;
1581 		}
1582 fail:
1583 		nblock = block + extlen;
1584 	}
1585 	ret = gfs2_rbm_from_block(rbm, nblock);
1586 	if (ret < 0)
1587 		return ret;
1588 	return 1;
1589 }
1590 
1591 /**
1592  * gfs2_rbm_find - Look for blocks of a particular state
1593  * @rbm: Value/result starting position and final position
1594  * @state: The state which we want to find
1595  * @minext: Pointer to the requested extent length (NULL for a single block)
1596  *          This is updated to be the actual reservation size.
1597  * @ip: If set, check for reservations
1598  * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1599  *          around until we've reached the starting point.
1600  * @ap: the allocation parameters
1601  *
1602  * Side effects:
1603  * - If looking for free blocks, we set GBF_FULL on each bitmap which
1604  *   has no free blocks in it.
1605  * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
1606  *   has come up short on a free block search.
1607  *
1608  * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1609  */
1610 
1611 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
1612 			 const struct gfs2_inode *ip, bool nowrap,
1613 			 const struct gfs2_alloc_parms *ap)
1614 {
1615 	struct buffer_head *bh;
1616 	int initial_bii;
1617 	u32 initial_offset;
1618 	int first_bii = rbm->bii;
1619 	u32 first_offset = rbm->offset;
1620 	u32 offset;
1621 	u8 *buffer;
1622 	int n = 0;
1623 	int iters = rbm->rgd->rd_length;
1624 	int ret;
1625 	struct gfs2_bitmap *bi;
1626 	struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
1627 
1628 	/* If we are not starting at the beginning of a bitmap, then we
1629 	 * need to add one to the bitmap count to ensure that we search
1630 	 * the starting bitmap twice.
1631 	 */
1632 	if (rbm->offset != 0)
1633 		iters++;
1634 
1635 	while(1) {
1636 		bi = rbm_bi(rbm);
1637 		if (test_bit(GBF_FULL, &bi->bi_flags) &&
1638 		    (state == GFS2_BLKST_FREE))
1639 			goto next_bitmap;
1640 
1641 		bh = bi->bi_bh;
1642 		buffer = bh->b_data + bi->bi_offset;
1643 		WARN_ON(!buffer_uptodate(bh));
1644 		if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1645 			buffer = bi->bi_clone + bi->bi_offset;
1646 		initial_offset = rbm->offset;
1647 		offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state);
1648 		if (offset == BFITNOENT)
1649 			goto bitmap_full;
1650 		rbm->offset = offset;
1651 		if (ip == NULL)
1652 			return 0;
1653 
1654 		initial_bii = rbm->bii;
1655 		ret = gfs2_reservation_check_and_update(rbm, ip,
1656 							minext ? *minext : 0,
1657 							&maxext);
1658 		if (ret == 0)
1659 			return 0;
1660 		if (ret > 0) {
1661 			n += (rbm->bii - initial_bii);
1662 			goto next_iter;
1663 		}
1664 		if (ret == -E2BIG) {
1665 			rbm->bii = 0;
1666 			rbm->offset = 0;
1667 			n += (rbm->bii - initial_bii);
1668 			goto res_covered_end_of_rgrp;
1669 		}
1670 		return ret;
1671 
1672 bitmap_full:	/* Mark bitmap as full and fall through */
1673 		if ((state == GFS2_BLKST_FREE) && initial_offset == 0) {
1674 			struct gfs2_bitmap *bi = rbm_bi(rbm);
1675 			set_bit(GBF_FULL, &bi->bi_flags);
1676 		}
1677 
1678 next_bitmap:	/* Find next bitmap in the rgrp */
1679 		rbm->offset = 0;
1680 		rbm->bii++;
1681 		if (rbm->bii == rbm->rgd->rd_length)
1682 			rbm->bii = 0;
1683 res_covered_end_of_rgrp:
1684 		if ((rbm->bii == 0) && nowrap)
1685 			break;
1686 		n++;
1687 next_iter:
1688 		if (n >= iters)
1689 			break;
1690 	}
1691 
1692 	if (minext == NULL || state != GFS2_BLKST_FREE)
1693 		return -ENOSPC;
1694 
1695 	/* If the extent was too small, and it's smaller than the smallest
1696 	   to have failed before, remember for future reference that it's
1697 	   useless to search this rgrp again for this amount or more. */
1698 	if ((first_offset == 0) && (first_bii == 0) &&
1699 	    (*minext < rbm->rgd->rd_extfail_pt))
1700 		rbm->rgd->rd_extfail_pt = *minext;
1701 
1702 	/* If the maximum extent we found is big enough to fulfill the
1703 	   minimum requirements, use it anyway. */
1704 	if (maxext.len) {
1705 		*rbm = maxext.rbm;
1706 		*minext = maxext.len;
1707 		return 0;
1708 	}
1709 
1710 	return -ENOSPC;
1711 }
1712 
1713 /**
1714  * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1715  * @rgd: The rgrp
1716  * @last_unlinked: block address of the last dinode we unlinked
1717  * @skip: block address we should explicitly not unlink
1718  *
1719  * Returns: 0 if no error
1720  *          The inode, if one has been found, in inode.
1721  */
1722 
1723 static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1724 {
1725 	u64 block;
1726 	struct gfs2_sbd *sdp = rgd->rd_sbd;
1727 	struct gfs2_glock *gl;
1728 	struct gfs2_inode *ip;
1729 	int error;
1730 	int found = 0;
1731 	struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
1732 
1733 	while (1) {
1734 		down_write(&sdp->sd_log_flush_lock);
1735 		error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
1736 				      true, NULL);
1737 		up_write(&sdp->sd_log_flush_lock);
1738 		if (error == -ENOSPC)
1739 			break;
1740 		if (WARN_ON_ONCE(error))
1741 			break;
1742 
1743 		block = gfs2_rbm_to_block(&rbm);
1744 		if (gfs2_rbm_from_block(&rbm, block + 1))
1745 			break;
1746 		if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1747 			continue;
1748 		if (block == skip)
1749 			continue;
1750 		*last_unlinked = block;
1751 
1752 		error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl);
1753 		if (error)
1754 			continue;
1755 
1756 		/* If the inode is already in cache, we can ignore it here
1757 		 * because the existing inode disposal code will deal with
1758 		 * it when all refs have gone away. Accessing gl_object like
1759 		 * this is not safe in general. Here it is ok because we do
1760 		 * not dereference the pointer, and we only need an approx
1761 		 * answer to whether it is NULL or not.
1762 		 */
1763 		ip = gl->gl_object;
1764 
1765 		if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1766 			gfs2_glock_put(gl);
1767 		else
1768 			found++;
1769 
1770 		/* Limit reclaim to sensible number of tasks */
1771 		if (found > NR_CPUS)
1772 			return;
1773 	}
1774 
1775 	rgd->rd_flags &= ~GFS2_RDF_CHECK;
1776 	return;
1777 }
1778 
1779 /**
1780  * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1781  * @rgd: The rgrp in question
1782  * @loops: An indication of how picky we can be (0=very, 1=less so)
1783  *
1784  * This function uses the recently added glock statistics in order to
1785  * figure out whether a parciular resource group is suffering from
1786  * contention from multiple nodes. This is done purely on the basis
1787  * of timings, since this is the only data we have to work with and
1788  * our aim here is to reject a resource group which is highly contended
1789  * but (very important) not to do this too often in order to ensure that
1790  * we do not land up introducing fragmentation by changing resource
1791  * groups when not actually required.
1792  *
1793  * The calculation is fairly simple, we want to know whether the SRTTB
1794  * (i.e. smoothed round trip time for blocking operations) to acquire
1795  * the lock for this rgrp's glock is significantly greater than the
1796  * time taken for resource groups on average. We introduce a margin in
1797  * the form of the variable @var which is computed as the sum of the two
1798  * respective variences, and multiplied by a factor depending on @loops
1799  * and whether we have a lot of data to base the decision on. This is
1800  * then tested against the square difference of the means in order to
1801  * decide whether the result is statistically significant or not.
1802  *
1803  * Returns: A boolean verdict on the congestion status
1804  */
1805 
1806 static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1807 {
1808 	const struct gfs2_glock *gl = rgd->rd_gl;
1809 	const struct gfs2_sbd *sdp = gl->gl_sbd;
1810 	struct gfs2_lkstats *st;
1811 	s64 r_dcount, l_dcount;
1812 	s64 r_srttb, l_srttb;
1813 	s64 srttb_diff;
1814 	s64 sqr_diff;
1815 	s64 var;
1816 
1817 	preempt_disable();
1818 	st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1819 	r_srttb = st->stats[GFS2_LKS_SRTTB];
1820 	r_dcount = st->stats[GFS2_LKS_DCOUNT];
1821 	var = st->stats[GFS2_LKS_SRTTVARB] +
1822 	      gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1823 	preempt_enable();
1824 
1825 	l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1826 	l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1827 
1828 	if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0))
1829 		return false;
1830 
1831 	srttb_diff = r_srttb - l_srttb;
1832 	sqr_diff = srttb_diff * srttb_diff;
1833 
1834 	var *= 2;
1835 	if (l_dcount < 8 || r_dcount < 8)
1836 		var *= 2;
1837 	if (loops == 1)
1838 		var *= 2;
1839 
1840 	return ((srttb_diff < 0) && (sqr_diff > var));
1841 }
1842 
1843 /**
1844  * gfs2_rgrp_used_recently
1845  * @rs: The block reservation with the rgrp to test
1846  * @msecs: The time limit in milliseconds
1847  *
1848  * Returns: True if the rgrp glock has been used within the time limit
1849  */
1850 static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1851 				    u64 msecs)
1852 {
1853 	u64 tdiff;
1854 
1855 	tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1856                             rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1857 
1858 	return tdiff > (msecs * 1000 * 1000);
1859 }
1860 
1861 static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1862 {
1863 	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1864 	u32 skip;
1865 
1866 	get_random_bytes(&skip, sizeof(skip));
1867 	return skip % sdp->sd_rgrps;
1868 }
1869 
1870 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1871 {
1872 	struct gfs2_rgrpd *rgd = *pos;
1873 	struct gfs2_sbd *sdp = rgd->rd_sbd;
1874 
1875 	rgd = gfs2_rgrpd_get_next(rgd);
1876 	if (rgd == NULL)
1877 		rgd = gfs2_rgrpd_get_first(sdp);
1878 	*pos = rgd;
1879 	if (rgd != begin) /* If we didn't wrap */
1880 		return true;
1881 	return false;
1882 }
1883 
1884 /**
1885  * gfs2_inplace_reserve - Reserve space in the filesystem
1886  * @ip: the inode to reserve space for
1887  * @ap: the allocation parameters
1888  *
1889  * Returns: errno
1890  */
1891 
1892 int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap)
1893 {
1894 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1895 	struct gfs2_rgrpd *begin = NULL;
1896 	struct gfs2_blkreserv *rs = ip->i_res;
1897 	int error = 0, rg_locked, flags = 0;
1898 	u64 last_unlinked = NO_BLOCK;
1899 	int loops = 0;
1900 	u32 skip = 0;
1901 
1902 	if (sdp->sd_args.ar_rgrplvb)
1903 		flags |= GL_SKIP;
1904 	if (gfs2_assert_warn(sdp, ap->target))
1905 		return -EINVAL;
1906 	if (gfs2_rs_active(rs)) {
1907 		begin = rs->rs_rbm.rgd;
1908 	} else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
1909 		rs->rs_rbm.rgd = begin = ip->i_rgd;
1910 	} else {
1911 		rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1912 	}
1913 	if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
1914 		skip = gfs2_orlov_skip(ip);
1915 	if (rs->rs_rbm.rgd == NULL)
1916 		return -EBADSLT;
1917 
1918 	while (loops < 3) {
1919 		rg_locked = 1;
1920 
1921 		if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1922 			rg_locked = 0;
1923 			if (skip && skip--)
1924 				goto next_rgrp;
1925 			if (!gfs2_rs_active(rs) && (loops < 2) &&
1926 			     gfs2_rgrp_used_recently(rs, 1000) &&
1927 			     gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1928 				goto next_rgrp;
1929 			error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
1930 						   LM_ST_EXCLUSIVE, flags,
1931 						   &rs->rs_rgd_gh);
1932 			if (unlikely(error))
1933 				return error;
1934 			if (!gfs2_rs_active(rs) && (loops < 2) &&
1935 			    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1936 				goto skip_rgrp;
1937 			if (sdp->sd_args.ar_rgrplvb) {
1938 				error = update_rgrp_lvb(rs->rs_rbm.rgd);
1939 				if (unlikely(error)) {
1940 					gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1941 					return error;
1942 				}
1943 			}
1944 		}
1945 
1946 		/* Skip unuseable resource groups */
1947 		if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
1948 						 GFS2_RDF_ERROR)) ||
1949 		    (ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
1950 			goto skip_rgrp;
1951 
1952 		if (sdp->sd_args.ar_rgrplvb)
1953 			gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
1954 
1955 		/* Get a reservation if we don't already have one */
1956 		if (!gfs2_rs_active(rs))
1957 			rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
1958 
1959 		/* Skip rgrps when we can't get a reservation on first pass */
1960 		if (!gfs2_rs_active(rs) && (loops < 1))
1961 			goto check_rgrp;
1962 
1963 		/* If rgrp has enough free space, use it */
1964 		if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) {
1965 			ip->i_rgd = rs->rs_rbm.rgd;
1966 			return 0;
1967 		}
1968 
1969 check_rgrp:
1970 		/* Check for unlinked inodes which can be reclaimed */
1971 		if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
1972 			try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
1973 					ip->i_no_addr);
1974 skip_rgrp:
1975 		/* Drop reservation, if we couldn't use reserved rgrp */
1976 		if (gfs2_rs_active(rs))
1977 			gfs2_rs_deltree(rs);
1978 
1979 		/* Unlock rgrp if required */
1980 		if (!rg_locked)
1981 			gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1982 next_rgrp:
1983 		/* Find the next rgrp, and continue looking */
1984 		if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
1985 			continue;
1986 		if (skip)
1987 			continue;
1988 
1989 		/* If we've scanned all the rgrps, but found no free blocks
1990 		 * then this checks for some less likely conditions before
1991 		 * trying again.
1992 		 */
1993 		loops++;
1994 		/* Check that fs hasn't grown if writing to rindex */
1995 		if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
1996 			error = gfs2_ri_update(ip);
1997 			if (error)
1998 				return error;
1999 		}
2000 		/* Flushing the log may release space */
2001 		if (loops == 2)
2002 			gfs2_log_flush(sdp, NULL);
2003 	}
2004 
2005 	return -ENOSPC;
2006 }
2007 
2008 /**
2009  * gfs2_inplace_release - release an inplace reservation
2010  * @ip: the inode the reservation was taken out on
2011  *
2012  * Release a reservation made by gfs2_inplace_reserve().
2013  */
2014 
2015 void gfs2_inplace_release(struct gfs2_inode *ip)
2016 {
2017 	struct gfs2_blkreserv *rs = ip->i_res;
2018 
2019 	if (rs->rs_rgd_gh.gh_gl)
2020 		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
2021 }
2022 
2023 /**
2024  * gfs2_get_block_type - Check a block in a RG is of given type
2025  * @rgd: the resource group holding the block
2026  * @block: the block number
2027  *
2028  * Returns: The block type (GFS2_BLKST_*)
2029  */
2030 
2031 static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
2032 {
2033 	struct gfs2_rbm rbm = { .rgd = rgd, };
2034 	int ret;
2035 
2036 	ret = gfs2_rbm_from_block(&rbm, block);
2037 	WARN_ON_ONCE(ret != 0);
2038 
2039 	return gfs2_testbit(&rbm);
2040 }
2041 
2042 
2043 /**
2044  * gfs2_alloc_extent - allocate an extent from a given bitmap
2045  * @rbm: the resource group information
2046  * @dinode: TRUE if the first block we allocate is for a dinode
2047  * @n: The extent length (value/result)
2048  *
2049  * Add the bitmap buffer to the transaction.
2050  * Set the found bits to @new_state to change block's allocation state.
2051  */
2052 static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
2053 			     unsigned int *n)
2054 {
2055 	struct gfs2_rbm pos = { .rgd = rbm->rgd, };
2056 	const unsigned int elen = *n;
2057 	u64 block;
2058 	int ret;
2059 
2060 	*n = 1;
2061 	block = gfs2_rbm_to_block(rbm);
2062 	gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
2063 	gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2064 	block++;
2065 	while (*n < elen) {
2066 		ret = gfs2_rbm_from_block(&pos, block);
2067 		if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
2068 			break;
2069 		gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
2070 		gfs2_setbit(&pos, true, GFS2_BLKST_USED);
2071 		(*n)++;
2072 		block++;
2073 	}
2074 }
2075 
2076 /**
2077  * rgblk_free - Change alloc state of given block(s)
2078  * @sdp: the filesystem
2079  * @bstart: the start of a run of blocks to free
2080  * @blen: the length of the block run (all must lie within ONE RG!)
2081  * @new_state: GFS2_BLKST_XXX the after-allocation block state
2082  *
2083  * Returns:  Resource group containing the block(s)
2084  */
2085 
2086 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2087 				     u32 blen, unsigned char new_state)
2088 {
2089 	struct gfs2_rbm rbm;
2090 	struct gfs2_bitmap *bi;
2091 
2092 	rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
2093 	if (!rbm.rgd) {
2094 		if (gfs2_consist(sdp))
2095 			fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
2096 		return NULL;
2097 	}
2098 
2099 	while (blen--) {
2100 		gfs2_rbm_from_block(&rbm, bstart);
2101 		bi = rbm_bi(&rbm);
2102 		bstart++;
2103 		if (!bi->bi_clone) {
2104 			bi->bi_clone = kmalloc(bi->bi_bh->b_size,
2105 					       GFP_NOFS | __GFP_NOFAIL);
2106 			memcpy(bi->bi_clone + bi->bi_offset,
2107 			       bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
2108 		}
2109 		gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
2110 		gfs2_setbit(&rbm, false, new_state);
2111 	}
2112 
2113 	return rbm.rgd;
2114 }
2115 
2116 /**
2117  * gfs2_rgrp_dump - print out an rgrp
2118  * @seq: The iterator
2119  * @gl: The glock in question
2120  *
2121  */
2122 
2123 void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
2124 {
2125 	struct gfs2_rgrpd *rgd = gl->gl_object;
2126 	struct gfs2_blkreserv *trs;
2127 	const struct rb_node *n;
2128 
2129 	if (rgd == NULL)
2130 		return;
2131 	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
2132 		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
2133 		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2134 		       rgd->rd_reserved, rgd->rd_extfail_pt);
2135 	spin_lock(&rgd->rd_rsspin);
2136 	for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2137 		trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2138 		dump_rs(seq, trs);
2139 	}
2140 	spin_unlock(&rgd->rd_rsspin);
2141 }
2142 
2143 static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2144 {
2145 	struct gfs2_sbd *sdp = rgd->rd_sbd;
2146 	fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
2147 		(unsigned long long)rgd->rd_addr);
2148 	fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2149 	gfs2_rgrp_dump(NULL, rgd->rd_gl);
2150 	rgd->rd_flags |= GFS2_RDF_ERROR;
2151 }
2152 
2153 /**
2154  * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2155  * @ip: The inode we have just allocated blocks for
2156  * @rbm: The start of the allocated blocks
2157  * @len: The extent length
2158  *
2159  * Adjusts a reservation after an allocation has taken place. If the
2160  * reservation does not match the allocation, or if it is now empty
2161  * then it is removed.
2162  */
2163 
2164 static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2165 				    const struct gfs2_rbm *rbm, unsigned len)
2166 {
2167 	struct gfs2_blkreserv *rs = ip->i_res;
2168 	struct gfs2_rgrpd *rgd = rbm->rgd;
2169 	unsigned rlen;
2170 	u64 block;
2171 	int ret;
2172 
2173 	spin_lock(&rgd->rd_rsspin);
2174 	if (gfs2_rs_active(rs)) {
2175 		if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2176 			block = gfs2_rbm_to_block(rbm);
2177 			ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2178 			rlen = min(rs->rs_free, len);
2179 			rs->rs_free -= rlen;
2180 			rgd->rd_reserved -= rlen;
2181 			trace_gfs2_rs(rs, TRACE_RS_CLAIM);
2182 			if (rs->rs_free && !ret)
2183 				goto out;
2184 		}
2185 		__rs_deltree(rs);
2186 	}
2187 out:
2188 	spin_unlock(&rgd->rd_rsspin);
2189 }
2190 
2191 /**
2192  * gfs2_set_alloc_start - Set starting point for block allocation
2193  * @rbm: The rbm which will be set to the required location
2194  * @ip: The gfs2 inode
2195  * @dinode: Flag to say if allocation includes a new inode
2196  *
2197  * This sets the starting point from the reservation if one is active
2198  * otherwise it falls back to guessing a start point based on the
2199  * inode's goal block or the last allocation point in the rgrp.
2200  */
2201 
2202 static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
2203 				 const struct gfs2_inode *ip, bool dinode)
2204 {
2205 	u64 goal;
2206 
2207 	if (gfs2_rs_active(ip->i_res)) {
2208 		*rbm = ip->i_res->rs_rbm;
2209 		return;
2210 	}
2211 
2212 	if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
2213 		goal = ip->i_goal;
2214 	else
2215 		goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
2216 
2217 	gfs2_rbm_from_block(rbm, goal);
2218 }
2219 
2220 /**
2221  * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
2222  * @ip: the inode to allocate the block for
2223  * @bn: Used to return the starting block number
2224  * @nblocks: requested number of blocks/extent length (value/result)
2225  * @dinode: 1 if we're allocating a dinode block, else 0
2226  * @generation: the generation number of the inode
2227  *
2228  * Returns: 0 or error
2229  */
2230 
2231 int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2232 		      bool dinode, u64 *generation)
2233 {
2234 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2235 	struct buffer_head *dibh;
2236 	struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
2237 	unsigned int ndata;
2238 	u64 block; /* block, within the file system scope */
2239 	int error;
2240 
2241 	gfs2_set_alloc_start(&rbm, ip, dinode);
2242 	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL);
2243 
2244 	if (error == -ENOSPC) {
2245 		gfs2_set_alloc_start(&rbm, ip, dinode);
2246 		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false,
2247 				      NULL);
2248 	}
2249 
2250 	/* Since all blocks are reserved in advance, this shouldn't happen */
2251 	if (error) {
2252 		fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
2253 			(unsigned long long)ip->i_no_addr, error, *nblocks,
2254 			test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
2255 			rbm.rgd->rd_extfail_pt);
2256 		goto rgrp_error;
2257 	}
2258 
2259 	gfs2_alloc_extent(&rbm, dinode, nblocks);
2260 	block = gfs2_rbm_to_block(&rbm);
2261 	rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2262 	if (gfs2_rs_active(ip->i_res))
2263 		gfs2_adjust_reservation(ip, &rbm, *nblocks);
2264 	ndata = *nblocks;
2265 	if (dinode)
2266 		ndata--;
2267 
2268 	if (!dinode) {
2269 		ip->i_goal = block + ndata - 1;
2270 		error = gfs2_meta_inode_buffer(ip, &dibh);
2271 		if (error == 0) {
2272 			struct gfs2_dinode *di =
2273 				(struct gfs2_dinode *)dibh->b_data;
2274 			gfs2_trans_add_meta(ip->i_gl, dibh);
2275 			di->di_goal_meta = di->di_goal_data =
2276 				cpu_to_be64(ip->i_goal);
2277 			brelse(dibh);
2278 		}
2279 	}
2280 	if (rbm.rgd->rd_free < *nblocks) {
2281 		printk(KERN_WARNING "nblocks=%u\n", *nblocks);
2282 		goto rgrp_error;
2283 	}
2284 
2285 	rbm.rgd->rd_free -= *nblocks;
2286 	if (dinode) {
2287 		rbm.rgd->rd_dinodes++;
2288 		*generation = rbm.rgd->rd_igeneration++;
2289 		if (*generation == 0)
2290 			*generation = rbm.rgd->rd_igeneration++;
2291 	}
2292 
2293 	gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2294 	gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2295 	gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2296 
2297 	gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2298 	if (dinode)
2299 		gfs2_trans_add_unrevoke(sdp, block, 1);
2300 
2301 	gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
2302 
2303 	rbm.rgd->rd_free_clone -= *nblocks;
2304 	trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2305 			       dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2306 	*bn = block;
2307 	return 0;
2308 
2309 rgrp_error:
2310 	gfs2_rgrp_error(rbm.rgd);
2311 	return -EIO;
2312 }
2313 
2314 /**
2315  * __gfs2_free_blocks - free a contiguous run of block(s)
2316  * @ip: the inode these blocks are being freed from
2317  * @bstart: first block of a run of contiguous blocks
2318  * @blen: the length of the block run
2319  * @meta: 1 if the blocks represent metadata
2320  *
2321  */
2322 
2323 void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2324 {
2325 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2326 	struct gfs2_rgrpd *rgd;
2327 
2328 	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
2329 	if (!rgd)
2330 		return;
2331 	trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2332 	rgd->rd_free += blen;
2333 	rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2334 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2335 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2336 	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2337 
2338 	/* Directories keep their data in the metadata address space */
2339 	if (meta || ip->i_depth)
2340 		gfs2_meta_wipe(ip, bstart, blen);
2341 }
2342 
2343 /**
2344  * gfs2_free_meta - free a contiguous run of data block(s)
2345  * @ip: the inode these blocks are being freed from
2346  * @bstart: first block of a run of contiguous blocks
2347  * @blen: the length of the block run
2348  *
2349  */
2350 
2351 void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
2352 {
2353 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2354 
2355 	__gfs2_free_blocks(ip, bstart, blen, 1);
2356 	gfs2_statfs_change(sdp, 0, +blen, 0);
2357 	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
2358 }
2359 
2360 void gfs2_unlink_di(struct inode *inode)
2361 {
2362 	struct gfs2_inode *ip = GFS2_I(inode);
2363 	struct gfs2_sbd *sdp = GFS2_SB(inode);
2364 	struct gfs2_rgrpd *rgd;
2365 	u64 blkno = ip->i_no_addr;
2366 
2367 	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
2368 	if (!rgd)
2369 		return;
2370 	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2371 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2372 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2373 	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2374 	update_rgrp_lvb_unlinked(rgd, 1);
2375 }
2376 
2377 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2378 {
2379 	struct gfs2_sbd *sdp = rgd->rd_sbd;
2380 	struct gfs2_rgrpd *tmp_rgd;
2381 
2382 	tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
2383 	if (!tmp_rgd)
2384 		return;
2385 	gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
2386 
2387 	if (!rgd->rd_dinodes)
2388 		gfs2_consist_rgrpd(rgd);
2389 	rgd->rd_dinodes--;
2390 	rgd->rd_free++;
2391 
2392 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2393 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2394 	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2395 	update_rgrp_lvb_unlinked(rgd, -1);
2396 
2397 	gfs2_statfs_change(sdp, 0, +1, -1);
2398 }
2399 
2400 
2401 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2402 {
2403 	gfs2_free_uninit_di(rgd, ip->i_no_addr);
2404 	trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2405 	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2406 	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
2407 }
2408 
2409 /**
2410  * gfs2_check_blk_type - Check the type of a block
2411  * @sdp: The superblock
2412  * @no_addr: The block number to check
2413  * @type: The block type we are looking for
2414  *
2415  * Returns: 0 if the block type matches the expected type
2416  *          -ESTALE if it doesn't match
2417  *          or -ve errno if something went wrong while checking
2418  */
2419 
2420 int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2421 {
2422 	struct gfs2_rgrpd *rgd;
2423 	struct gfs2_holder rgd_gh;
2424 	int error = -EINVAL;
2425 
2426 	rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
2427 	if (!rgd)
2428 		goto fail;
2429 
2430 	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2431 	if (error)
2432 		goto fail;
2433 
2434 	if (gfs2_get_block_type(rgd, no_addr) != type)
2435 		error = -ESTALE;
2436 
2437 	gfs2_glock_dq_uninit(&rgd_gh);
2438 fail:
2439 	return error;
2440 }
2441 
2442 /**
2443  * gfs2_rlist_add - add a RG to a list of RGs
2444  * @ip: the inode
2445  * @rlist: the list of resource groups
2446  * @block: the block
2447  *
2448  * Figure out what RG a block belongs to and add that RG to the list
2449  *
2450  * FIXME: Don't use NOFAIL
2451  *
2452  */
2453 
2454 void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
2455 		    u64 block)
2456 {
2457 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2458 	struct gfs2_rgrpd *rgd;
2459 	struct gfs2_rgrpd **tmp;
2460 	unsigned int new_space;
2461 	unsigned int x;
2462 
2463 	if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2464 		return;
2465 
2466 	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
2467 		rgd = ip->i_rgd;
2468 	else
2469 		rgd = gfs2_blk2rgrpd(sdp, block, 1);
2470 	if (!rgd) {
2471 		fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
2472 		return;
2473 	}
2474 	ip->i_rgd = rgd;
2475 
2476 	for (x = 0; x < rlist->rl_rgrps; x++)
2477 		if (rlist->rl_rgd[x] == rgd)
2478 			return;
2479 
2480 	if (rlist->rl_rgrps == rlist->rl_space) {
2481 		new_space = rlist->rl_space + 10;
2482 
2483 		tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
2484 			      GFP_NOFS | __GFP_NOFAIL);
2485 
2486 		if (rlist->rl_rgd) {
2487 			memcpy(tmp, rlist->rl_rgd,
2488 			       rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2489 			kfree(rlist->rl_rgd);
2490 		}
2491 
2492 		rlist->rl_space = new_space;
2493 		rlist->rl_rgd = tmp;
2494 	}
2495 
2496 	rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2497 }
2498 
2499 /**
2500  * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2501  *      and initialize an array of glock holders for them
2502  * @rlist: the list of resource groups
2503  * @state: the lock state to acquire the RG lock in
2504  *
2505  * FIXME: Don't use NOFAIL
2506  *
2507  */
2508 
2509 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
2510 {
2511 	unsigned int x;
2512 
2513 	rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
2514 				GFP_NOFS | __GFP_NOFAIL);
2515 	for (x = 0; x < rlist->rl_rgrps; x++)
2516 		gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
2517 				state, 0,
2518 				&rlist->rl_ghs[x]);
2519 }
2520 
2521 /**
2522  * gfs2_rlist_free - free a resource group list
2523  * @list: the list of resource groups
2524  *
2525  */
2526 
2527 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2528 {
2529 	unsigned int x;
2530 
2531 	kfree(rlist->rl_rgd);
2532 
2533 	if (rlist->rl_ghs) {
2534 		for (x = 0; x < rlist->rl_rgrps; x++)
2535 			gfs2_holder_uninit(&rlist->rl_ghs[x]);
2536 		kfree(rlist->rl_ghs);
2537 		rlist->rl_ghs = NULL;
2538 	}
2539 }
2540 
2541