xref: /openbmc/linux/fs/gfs2/rgrp.c (revision 22246614)
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9 
10 #include <linux/slab.h>
11 #include <linux/spinlock.h>
12 #include <linux/completion.h>
13 #include <linux/buffer_head.h>
14 #include <linux/fs.h>
15 #include <linux/gfs2_ondisk.h>
16 #include <linux/lm_interface.h>
17 #include <linux/prefetch.h>
18 
19 #include "gfs2.h"
20 #include "incore.h"
21 #include "glock.h"
22 #include "glops.h"
23 #include "lops.h"
24 #include "meta_io.h"
25 #include "quota.h"
26 #include "rgrp.h"
27 #include "super.h"
28 #include "trans.h"
29 #include "util.h"
30 #include "log.h"
31 #include "inode.h"
32 #include "ops_address.h"
33 
34 #define BFITNOENT ((u32)~0)
35 #define NO_BLOCK ((u64)~0)
36 
37 #if BITS_PER_LONG == 32
38 #define LBITMASK   (0x55555555UL)
39 #define LBITSKIP55 (0x55555555UL)
40 #define LBITSKIP00 (0x00000000UL)
41 #else
42 #define LBITMASK   (0x5555555555555555UL)
43 #define LBITSKIP55 (0x5555555555555555UL)
44 #define LBITSKIP00 (0x0000000000000000UL)
45 #endif
46 
47 /*
48  * These routines are used by the resource group routines (rgrp.c)
49  * to keep track of block allocation.  Each block is represented by two
50  * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
51  *
52  * 0 = Free
53  * 1 = Used (not metadata)
54  * 2 = Unlinked (still in use) inode
55  * 3 = Used (metadata)
56  */
57 
58 static const char valid_change[16] = {
59 	        /* current */
60 	/* n */ 0, 1, 1, 1,
61 	/* e */ 1, 0, 0, 0,
62 	/* w */ 0, 0, 0, 1,
63 	        1, 0, 0, 0
64 };
65 
66 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
67                         unsigned char old_state, unsigned char new_state,
68 			unsigned int *n);
69 
70 /**
71  * gfs2_setbit - Set a bit in the bitmaps
72  * @buffer: the buffer that holds the bitmaps
73  * @buflen: the length (in bytes) of the buffer
74  * @block: the block to set
75  * @new_state: the new state of the block
76  *
77  */
78 
79 static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
80 			       unsigned char *buf2, unsigned int offset,
81 			       unsigned int buflen, u32 block,
82 			       unsigned char new_state)
83 {
84 	unsigned char *byte1, *byte2, *end, cur_state;
85 	const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
86 
87 	byte1 = buf1 + offset + (block / GFS2_NBBY);
88 	end = buf1 + offset + buflen;
89 
90 	BUG_ON(byte1 >= end);
91 
92 	cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
93 
94 	if (unlikely(!valid_change[new_state * 4 + cur_state])) {
95 		gfs2_consist_rgrpd(rgd);
96 		return;
97 	}
98 	*byte1 ^= (cur_state ^ new_state) << bit;
99 
100 	if (buf2) {
101 		byte2 = buf2 + offset + (block / GFS2_NBBY);
102 		cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
103 		*byte2 ^= (cur_state ^ new_state) << bit;
104 	}
105 }
106 
107 /**
108  * gfs2_testbit - test a bit in the bitmaps
109  * @buffer: the buffer that holds the bitmaps
110  * @buflen: the length (in bytes) of the buffer
111  * @block: the block to read
112  *
113  */
114 
115 static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
116 					 const unsigned char *buffer,
117 					 unsigned int buflen, u32 block)
118 {
119 	const unsigned char *byte, *end;
120 	unsigned char cur_state;
121 	unsigned int bit;
122 
123 	byte = buffer + (block / GFS2_NBBY);
124 	bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
125 	end = buffer + buflen;
126 
127 	gfs2_assert(rgd->rd_sbd, byte < end);
128 
129 	cur_state = (*byte >> bit) & GFS2_BIT_MASK;
130 
131 	return cur_state;
132 }
133 
134 /**
135  * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
136  *       a block in a given allocation state.
137  * @buffer: the buffer that holds the bitmaps
138  * @buflen: the length (in bytes) of the buffer
139  * @goal: start search at this block's bit-pair (within @buffer)
140  * @old_state: GFS2_BLKST_XXX the state of the block we're looking for.
141  *
142  * Scope of @goal and returned block number is only within this bitmap buffer,
143  * not entire rgrp or filesystem.  @buffer will be offset from the actual
144  * beginning of a bitmap block buffer, skipping any header structures.
145  *
146  * Return: the block number (bitmap buffer scope) that was found
147  */
148 
149 static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal,
150 		       u8 old_state)
151 {
152 	const u8 *byte, *start, *end;
153 	int bit, startbit;
154 	u32 g1, g2, misaligned;
155 	unsigned long *plong;
156 	unsigned long lskipval;
157 
158 	lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55;
159 	g1 = (goal / GFS2_NBBY);
160 	start = buffer + g1;
161 	byte = start;
162         end = buffer + buflen;
163 	g2 = ALIGN(g1, sizeof(unsigned long));
164 	plong = (unsigned long *)(buffer + g2);
165 	startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
166 	misaligned = g2 - g1;
167 	if (!misaligned)
168 		goto ulong_aligned;
169 /* parse the bitmap a byte at a time */
170 misaligned:
171 	while (byte < end) {
172 		if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) {
173 			return goal +
174 				(((byte - start) * GFS2_NBBY) +
175 				 ((bit - startbit) >> 1));
176 		}
177 		bit += GFS2_BIT_SIZE;
178 		if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) {
179 			bit = 0;
180 			byte++;
181 			misaligned--;
182 			if (!misaligned) {
183 				plong = (unsigned long *)byte;
184 				goto ulong_aligned;
185 			}
186 		}
187 	}
188 	return BFITNOENT;
189 
190 /* parse the bitmap a unsigned long at a time */
191 ulong_aligned:
192 	/* Stop at "end - 1" or else prefetch can go past the end and segfault.
193 	   We could "if" it but we'd lose some of the performance gained.
194 	   This way will only slow down searching the very last 4/8 bytes
195 	   depending on architecture.  I've experimented with several ways
196 	   of writing this section such as using an else before the goto
197 	   but this one seems to be the fastest. */
198 	while ((unsigned char *)plong < end - 1) {
199 		prefetch(plong + 1);
200 		if (((*plong) & LBITMASK) != lskipval)
201 			break;
202 		plong++;
203 	}
204 	if ((unsigned char *)plong < end) {
205 		byte = (const u8 *)plong;
206 		misaligned += sizeof(unsigned long) - 1;
207 		goto misaligned;
208 	}
209 	return BFITNOENT;
210 }
211 
212 /**
213  * gfs2_bitcount - count the number of bits in a certain state
214  * @buffer: the buffer that holds the bitmaps
215  * @buflen: the length (in bytes) of the buffer
216  * @state: the state of the block we're looking for
217  *
218  * Returns: The number of bits
219  */
220 
221 static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
222 			 unsigned int buflen, u8 state)
223 {
224 	const u8 *byte = buffer;
225 	const u8 *end = buffer + buflen;
226 	const u8 state1 = state << 2;
227 	const u8 state2 = state << 4;
228 	const u8 state3 = state << 6;
229 	u32 count = 0;
230 
231 	for (; byte < end; byte++) {
232 		if (((*byte) & 0x03) == state)
233 			count++;
234 		if (((*byte) & 0x0C) == state1)
235 			count++;
236 		if (((*byte) & 0x30) == state2)
237 			count++;
238 		if (((*byte) & 0xC0) == state3)
239 			count++;
240 	}
241 
242 	return count;
243 }
244 
245 /**
246  * gfs2_rgrp_verify - Verify that a resource group is consistent
247  * @sdp: the filesystem
248  * @rgd: the rgrp
249  *
250  */
251 
252 void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
253 {
254 	struct gfs2_sbd *sdp = rgd->rd_sbd;
255 	struct gfs2_bitmap *bi = NULL;
256 	u32 length = rgd->rd_length;
257 	u32 count[4], tmp;
258 	int buf, x;
259 
260 	memset(count, 0, 4 * sizeof(u32));
261 
262 	/* Count # blocks in each of 4 possible allocation states */
263 	for (buf = 0; buf < length; buf++) {
264 		bi = rgd->rd_bits + buf;
265 		for (x = 0; x < 4; x++)
266 			count[x] += gfs2_bitcount(rgd,
267 						  bi->bi_bh->b_data +
268 						  bi->bi_offset,
269 						  bi->bi_len, x);
270 	}
271 
272 	if (count[0] != rgd->rd_rg.rg_free) {
273 		if (gfs2_consist_rgrpd(rgd))
274 			fs_err(sdp, "free data mismatch:  %u != %u\n",
275 			       count[0], rgd->rd_rg.rg_free);
276 		return;
277 	}
278 
279 	tmp = rgd->rd_data -
280 		rgd->rd_rg.rg_free -
281 		rgd->rd_rg.rg_dinodes;
282 	if (count[1] + count[2] != tmp) {
283 		if (gfs2_consist_rgrpd(rgd))
284 			fs_err(sdp, "used data mismatch:  %u != %u\n",
285 			       count[1], tmp);
286 		return;
287 	}
288 
289 	if (count[3] != rgd->rd_rg.rg_dinodes) {
290 		if (gfs2_consist_rgrpd(rgd))
291 			fs_err(sdp, "used metadata mismatch:  %u != %u\n",
292 			       count[3], rgd->rd_rg.rg_dinodes);
293 		return;
294 	}
295 
296 	if (count[2] > count[3]) {
297 		if (gfs2_consist_rgrpd(rgd))
298 			fs_err(sdp, "unlinked inodes > inodes:  %u\n",
299 			       count[2]);
300 		return;
301 	}
302 
303 }
304 
305 static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
306 {
307 	u64 first = rgd->rd_data0;
308 	u64 last = first + rgd->rd_data;
309 	return first <= block && block < last;
310 }
311 
312 /**
313  * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
314  * @sdp: The GFS2 superblock
315  * @n: The data block number
316  *
317  * Returns: The resource group, or NULL if not found
318  */
319 
320 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
321 {
322 	struct gfs2_rgrpd *rgd;
323 
324 	spin_lock(&sdp->sd_rindex_spin);
325 
326 	list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
327 		if (rgrp_contains_block(rgd, blk)) {
328 			list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
329 			spin_unlock(&sdp->sd_rindex_spin);
330 			return rgd;
331 		}
332 	}
333 
334 	spin_unlock(&sdp->sd_rindex_spin);
335 
336 	return NULL;
337 }
338 
339 /**
340  * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
341  * @sdp: The GFS2 superblock
342  *
343  * Returns: The first rgrp in the filesystem
344  */
345 
346 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
347 {
348 	gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list));
349 	return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list);
350 }
351 
352 /**
353  * gfs2_rgrpd_get_next - get the next RG
354  * @rgd: A RG
355  *
356  * Returns: The next rgrp
357  */
358 
359 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
360 {
361 	if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list)
362 		return NULL;
363 	return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list);
364 }
365 
366 static void clear_rgrpdi(struct gfs2_sbd *sdp)
367 {
368 	struct list_head *head;
369 	struct gfs2_rgrpd *rgd;
370 	struct gfs2_glock *gl;
371 
372 	spin_lock(&sdp->sd_rindex_spin);
373 	sdp->sd_rindex_forward = NULL;
374 	head = &sdp->sd_rindex_recent_list;
375 	while (!list_empty(head)) {
376 		rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
377 		list_del(&rgd->rd_recent);
378 	}
379 	spin_unlock(&sdp->sd_rindex_spin);
380 
381 	head = &sdp->sd_rindex_list;
382 	while (!list_empty(head)) {
383 		rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list);
384 		gl = rgd->rd_gl;
385 
386 		list_del(&rgd->rd_list);
387 		list_del(&rgd->rd_list_mru);
388 
389 		if (gl) {
390 			gl->gl_object = NULL;
391 			gfs2_glock_put(gl);
392 		}
393 
394 		kfree(rgd->rd_bits);
395 		kmem_cache_free(gfs2_rgrpd_cachep, rgd);
396 	}
397 }
398 
399 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
400 {
401 	mutex_lock(&sdp->sd_rindex_mutex);
402 	clear_rgrpdi(sdp);
403 	mutex_unlock(&sdp->sd_rindex_mutex);
404 }
405 
406 static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
407 {
408 	printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
409 	printk(KERN_INFO "  ri_length = %u\n", rgd->rd_length);
410 	printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
411 	printk(KERN_INFO "  ri_data = %u\n", rgd->rd_data);
412 	printk(KERN_INFO "  ri_bitbytes = %u\n", rgd->rd_bitbytes);
413 }
414 
415 /**
416  * gfs2_compute_bitstructs - Compute the bitmap sizes
417  * @rgd: The resource group descriptor
418  *
419  * Calculates bitmap descriptors, one for each block that contains bitmap data
420  *
421  * Returns: errno
422  */
423 
424 static int compute_bitstructs(struct gfs2_rgrpd *rgd)
425 {
426 	struct gfs2_sbd *sdp = rgd->rd_sbd;
427 	struct gfs2_bitmap *bi;
428 	u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
429 	u32 bytes_left, bytes;
430 	int x;
431 
432 	if (!length)
433 		return -EINVAL;
434 
435 	rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
436 	if (!rgd->rd_bits)
437 		return -ENOMEM;
438 
439 	bytes_left = rgd->rd_bitbytes;
440 
441 	for (x = 0; x < length; x++) {
442 		bi = rgd->rd_bits + x;
443 
444 		/* small rgrp; bitmap stored completely in header block */
445 		if (length == 1) {
446 			bytes = bytes_left;
447 			bi->bi_offset = sizeof(struct gfs2_rgrp);
448 			bi->bi_start = 0;
449 			bi->bi_len = bytes;
450 		/* header block */
451 		} else if (x == 0) {
452 			bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
453 			bi->bi_offset = sizeof(struct gfs2_rgrp);
454 			bi->bi_start = 0;
455 			bi->bi_len = bytes;
456 		/* last block */
457 		} else if (x + 1 == length) {
458 			bytes = bytes_left;
459 			bi->bi_offset = sizeof(struct gfs2_meta_header);
460 			bi->bi_start = rgd->rd_bitbytes - bytes_left;
461 			bi->bi_len = bytes;
462 		/* other blocks */
463 		} else {
464 			bytes = sdp->sd_sb.sb_bsize -
465 				sizeof(struct gfs2_meta_header);
466 			bi->bi_offset = sizeof(struct gfs2_meta_header);
467 			bi->bi_start = rgd->rd_bitbytes - bytes_left;
468 			bi->bi_len = bytes;
469 		}
470 
471 		bytes_left -= bytes;
472 	}
473 
474 	if (bytes_left) {
475 		gfs2_consist_rgrpd(rgd);
476 		return -EIO;
477 	}
478 	bi = rgd->rd_bits + (length - 1);
479 	if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
480 		if (gfs2_consist_rgrpd(rgd)) {
481 			gfs2_rindex_print(rgd);
482 			fs_err(sdp, "start=%u len=%u offset=%u\n",
483 			       bi->bi_start, bi->bi_len, bi->bi_offset);
484 		}
485 		return -EIO;
486 	}
487 
488 	return 0;
489 }
490 
491 /**
492  * gfs2_ri_total - Total up the file system space, according to the rindex.
493  *
494  */
495 u64 gfs2_ri_total(struct gfs2_sbd *sdp)
496 {
497 	u64 total_data = 0;
498 	struct inode *inode = sdp->sd_rindex;
499 	struct gfs2_inode *ip = GFS2_I(inode);
500 	char buf[sizeof(struct gfs2_rindex)];
501 	struct file_ra_state ra_state;
502 	int error, rgrps;
503 
504 	mutex_lock(&sdp->sd_rindex_mutex);
505 	file_ra_state_init(&ra_state, inode->i_mapping);
506 	for (rgrps = 0;; rgrps++) {
507 		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
508 
509 		if (pos + sizeof(struct gfs2_rindex) >= ip->i_di.di_size)
510 			break;
511 		error = gfs2_internal_read(ip, &ra_state, buf, &pos,
512 					   sizeof(struct gfs2_rindex));
513 		if (error != sizeof(struct gfs2_rindex))
514 			break;
515 		total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
516 	}
517 	mutex_unlock(&sdp->sd_rindex_mutex);
518 	return total_data;
519 }
520 
521 static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf)
522 {
523 	const struct gfs2_rindex *str = buf;
524 
525 	rgd->rd_addr = be64_to_cpu(str->ri_addr);
526 	rgd->rd_length = be32_to_cpu(str->ri_length);
527 	rgd->rd_data0 = be64_to_cpu(str->ri_data0);
528 	rgd->rd_data = be32_to_cpu(str->ri_data);
529 	rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes);
530 }
531 
532 /**
533  * read_rindex_entry - Pull in a new resource index entry from the disk
534  * @gl: The glock covering the rindex inode
535  *
536  * Returns: 0 on success, error code otherwise
537  */
538 
539 static int read_rindex_entry(struct gfs2_inode *ip,
540 			     struct file_ra_state *ra_state)
541 {
542 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
543 	loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
544 	char buf[sizeof(struct gfs2_rindex)];
545 	int error;
546 	struct gfs2_rgrpd *rgd;
547 
548 	error = gfs2_internal_read(ip, ra_state, buf, &pos,
549 				   sizeof(struct gfs2_rindex));
550 	if (!error)
551 		return 0;
552 	if (error != sizeof(struct gfs2_rindex)) {
553 		if (error > 0)
554 			error = -EIO;
555 		return error;
556 	}
557 
558 	rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
559 	error = -ENOMEM;
560 	if (!rgd)
561 		return error;
562 
563 	mutex_init(&rgd->rd_mutex);
564 	lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
565 	rgd->rd_sbd = sdp;
566 
567 	list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
568 	list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
569 
570 	gfs2_rindex_in(rgd, buf);
571 	error = compute_bitstructs(rgd);
572 	if (error)
573 		return error;
574 
575 	error = gfs2_glock_get(sdp, rgd->rd_addr,
576 			       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
577 	if (error)
578 		return error;
579 
580 	rgd->rd_gl->gl_object = rgd;
581 	rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
582 	rgd->rd_flags |= GFS2_RDF_CHECK;
583 	return error;
584 }
585 
586 /**
587  * gfs2_ri_update - Pull in a new resource index from the disk
588  * @ip: pointer to the rindex inode
589  *
590  * Returns: 0 on successful update, error code otherwise
591  */
592 
593 static int gfs2_ri_update(struct gfs2_inode *ip)
594 {
595 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
596 	struct inode *inode = &ip->i_inode;
597 	struct file_ra_state ra_state;
598 	u64 rgrp_count = ip->i_di.di_size;
599 	int error;
600 
601 	if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) {
602 		gfs2_consist_inode(ip);
603 		return -EIO;
604 	}
605 
606 	clear_rgrpdi(sdp);
607 
608 	file_ra_state_init(&ra_state, inode->i_mapping);
609 	for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) {
610 		error = read_rindex_entry(ip, &ra_state);
611 		if (error) {
612 			clear_rgrpdi(sdp);
613 			return error;
614 		}
615 	}
616 
617 	sdp->sd_rindex_uptodate = 1;
618 	return 0;
619 }
620 
621 /**
622  * gfs2_ri_update_special - Pull in a new resource index from the disk
623  *
624  * This is a special version that's safe to call from gfs2_inplace_reserve_i.
625  * In this case we know that we don't have any resource groups in memory yet.
626  *
627  * @ip: pointer to the rindex inode
628  *
629  * Returns: 0 on successful update, error code otherwise
630  */
631 static int gfs2_ri_update_special(struct gfs2_inode *ip)
632 {
633 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
634 	struct inode *inode = &ip->i_inode;
635 	struct file_ra_state ra_state;
636 	int error;
637 
638 	file_ra_state_init(&ra_state, inode->i_mapping);
639 	for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
640 		/* Ignore partials */
641 		if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
642 		    ip->i_di.di_size)
643 			break;
644 		error = read_rindex_entry(ip, &ra_state);
645 		if (error) {
646 			clear_rgrpdi(sdp);
647 			return error;
648 		}
649 	}
650 
651 	sdp->sd_rindex_uptodate = 1;
652 	return 0;
653 }
654 
655 /**
656  * gfs2_rindex_hold - Grab a lock on the rindex
657  * @sdp: The GFS2 superblock
658  * @ri_gh: the glock holder
659  *
660  * We grab a lock on the rindex inode to make sure that it doesn't
661  * change whilst we are performing an operation. We keep this lock
662  * for quite long periods of time compared to other locks. This
663  * doesn't matter, since it is shared and it is very, very rarely
664  * accessed in the exclusive mode (i.e. only when expanding the filesystem).
665  *
666  * This makes sure that we're using the latest copy of the resource index
667  * special file, which might have been updated if someone expanded the
668  * filesystem (via gfs2_grow utility), which adds new resource groups.
669  *
670  * Returns: 0 on success, error code otherwise
671  */
672 
673 int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
674 {
675 	struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
676 	struct gfs2_glock *gl = ip->i_gl;
677 	int error;
678 
679 	error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh);
680 	if (error)
681 		return error;
682 
683 	/* Read new copy from disk if we don't have the latest */
684 	if (!sdp->sd_rindex_uptodate) {
685 		mutex_lock(&sdp->sd_rindex_mutex);
686 		if (!sdp->sd_rindex_uptodate) {
687 			error = gfs2_ri_update(ip);
688 			if (error)
689 				gfs2_glock_dq_uninit(ri_gh);
690 		}
691 		mutex_unlock(&sdp->sd_rindex_mutex);
692 	}
693 
694 	return error;
695 }
696 
697 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
698 {
699 	const struct gfs2_rgrp *str = buf;
700 	struct gfs2_rgrp_host *rg = &rgd->rd_rg;
701 	u32 rg_flags;
702 
703 	rg_flags = be32_to_cpu(str->rg_flags);
704 	if (rg_flags & GFS2_RGF_NOALLOC)
705 		rgd->rd_flags |= GFS2_RDF_NOALLOC;
706 	else
707 		rgd->rd_flags &= ~GFS2_RDF_NOALLOC;
708 	rg->rg_free = be32_to_cpu(str->rg_free);
709 	rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
710 	rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
711 }
712 
713 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
714 {
715 	struct gfs2_rgrp *str = buf;
716 	struct gfs2_rgrp_host *rg = &rgd->rd_rg;
717 	u32 rg_flags = 0;
718 
719 	if (rgd->rd_flags & GFS2_RDF_NOALLOC)
720 		rg_flags |= GFS2_RGF_NOALLOC;
721 	str->rg_flags = cpu_to_be32(rg_flags);
722 	str->rg_free = cpu_to_be32(rg->rg_free);
723 	str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
724 	str->__pad = cpu_to_be32(0);
725 	str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
726 	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
727 }
728 
729 /**
730  * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
731  * @rgd: the struct gfs2_rgrpd describing the RG to read in
732  *
733  * Read in all of a Resource Group's header and bitmap blocks.
734  * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
735  *
736  * Returns: errno
737  */
738 
739 int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
740 {
741 	struct gfs2_sbd *sdp = rgd->rd_sbd;
742 	struct gfs2_glock *gl = rgd->rd_gl;
743 	unsigned int length = rgd->rd_length;
744 	struct gfs2_bitmap *bi;
745 	unsigned int x, y;
746 	int error;
747 
748 	mutex_lock(&rgd->rd_mutex);
749 
750 	spin_lock(&sdp->sd_rindex_spin);
751 	if (rgd->rd_bh_count) {
752 		rgd->rd_bh_count++;
753 		spin_unlock(&sdp->sd_rindex_spin);
754 		mutex_unlock(&rgd->rd_mutex);
755 		return 0;
756 	}
757 	spin_unlock(&sdp->sd_rindex_spin);
758 
759 	for (x = 0; x < length; x++) {
760 		bi = rgd->rd_bits + x;
761 		error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
762 		if (error)
763 			goto fail;
764 	}
765 
766 	for (y = length; y--;) {
767 		bi = rgd->rd_bits + y;
768 		error = gfs2_meta_wait(sdp, bi->bi_bh);
769 		if (error)
770 			goto fail;
771 		if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
772 					      GFS2_METATYPE_RG)) {
773 			error = -EIO;
774 			goto fail;
775 		}
776 	}
777 
778 	if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
779 		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
780 		rgd->rd_flags |= GFS2_RDF_UPTODATE;
781 	}
782 
783 	spin_lock(&sdp->sd_rindex_spin);
784 	rgd->rd_free_clone = rgd->rd_rg.rg_free;
785 	rgd->rd_bh_count++;
786 	spin_unlock(&sdp->sd_rindex_spin);
787 
788 	mutex_unlock(&rgd->rd_mutex);
789 
790 	return 0;
791 
792 fail:
793 	while (x--) {
794 		bi = rgd->rd_bits + x;
795 		brelse(bi->bi_bh);
796 		bi->bi_bh = NULL;
797 		gfs2_assert_warn(sdp, !bi->bi_clone);
798 	}
799 	mutex_unlock(&rgd->rd_mutex);
800 
801 	return error;
802 }
803 
804 void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
805 {
806 	struct gfs2_sbd *sdp = rgd->rd_sbd;
807 
808 	spin_lock(&sdp->sd_rindex_spin);
809 	gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
810 	rgd->rd_bh_count++;
811 	spin_unlock(&sdp->sd_rindex_spin);
812 }
813 
814 /**
815  * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get()
816  * @rgd: the struct gfs2_rgrpd describing the RG to read in
817  *
818  */
819 
820 void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
821 {
822 	struct gfs2_sbd *sdp = rgd->rd_sbd;
823 	int x, length = rgd->rd_length;
824 
825 	spin_lock(&sdp->sd_rindex_spin);
826 	gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
827 	if (--rgd->rd_bh_count) {
828 		spin_unlock(&sdp->sd_rindex_spin);
829 		return;
830 	}
831 
832 	for (x = 0; x < length; x++) {
833 		struct gfs2_bitmap *bi = rgd->rd_bits + x;
834 		kfree(bi->bi_clone);
835 		bi->bi_clone = NULL;
836 		brelse(bi->bi_bh);
837 		bi->bi_bh = NULL;
838 	}
839 
840 	spin_unlock(&sdp->sd_rindex_spin);
841 }
842 
843 void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
844 {
845 	struct gfs2_sbd *sdp = rgd->rd_sbd;
846 	unsigned int length = rgd->rd_length;
847 	unsigned int x;
848 
849 	for (x = 0; x < length; x++) {
850 		struct gfs2_bitmap *bi = rgd->rd_bits + x;
851 		if (!bi->bi_clone)
852 			continue;
853 		memcpy(bi->bi_clone + bi->bi_offset,
854 		       bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
855 	}
856 
857 	spin_lock(&sdp->sd_rindex_spin);
858 	rgd->rd_free_clone = rgd->rd_rg.rg_free;
859 	spin_unlock(&sdp->sd_rindex_spin);
860 }
861 
862 /**
863  * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode
864  * @ip: the incore GFS2 inode structure
865  *
866  * Returns: the struct gfs2_alloc
867  */
868 
869 struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
870 {
871 	BUG_ON(ip->i_alloc != NULL);
872 	ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_KERNEL);
873 	return ip->i_alloc;
874 }
875 
876 /**
877  * try_rgrp_fit - See if a given reservation will fit in a given RG
878  * @rgd: the RG data
879  * @al: the struct gfs2_alloc structure describing the reservation
880  *
881  * If there's room for the requested blocks to be allocated from the RG:
882  *   Sets the $al_rgd field in @al.
883  *
884  * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
885  */
886 
887 static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
888 {
889 	struct gfs2_sbd *sdp = rgd->rd_sbd;
890 	int ret = 0;
891 
892 	if (rgd->rd_flags & GFS2_RDF_NOALLOC)
893 		return 0;
894 
895 	spin_lock(&sdp->sd_rindex_spin);
896 	if (rgd->rd_free_clone >= al->al_requested) {
897 		al->al_rgd = rgd;
898 		ret = 1;
899 	}
900 	spin_unlock(&sdp->sd_rindex_spin);
901 
902 	return ret;
903 }
904 
905 /**
906  * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
907  * @rgd: The rgrp
908  *
909  * Returns: The inode, if one has been found
910  */
911 
912 static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
913 {
914 	struct inode *inode;
915 	u32 goal = 0, block;
916 	u64 no_addr;
917 	struct gfs2_sbd *sdp = rgd->rd_sbd;
918 	unsigned int n;
919 
920 	for(;;) {
921 		if (goal >= rgd->rd_data)
922 			break;
923 		down_write(&sdp->sd_log_flush_lock);
924 		n = 1;
925 		block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
926 				     GFS2_BLKST_UNLINKED, &n);
927 		up_write(&sdp->sd_log_flush_lock);
928 		if (block == BFITNOENT)
929 			break;
930 		/* rgblk_search can return a block < goal, so we need to
931 		   keep it marching forward. */
932 		no_addr = block + rgd->rd_data0;
933 		goal++;
934 		if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
935 			continue;
936 		*last_unlinked = no_addr;
937 		inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
938 					  no_addr, -1, 1);
939 		if (!IS_ERR(inode))
940 			return inode;
941 	}
942 
943 	rgd->rd_flags &= ~GFS2_RDF_CHECK;
944 	return NULL;
945 }
946 
947 /**
948  * recent_rgrp_first - get first RG from "recent" list
949  * @sdp: The GFS2 superblock
950  * @rglast: address of the rgrp used last
951  *
952  * Returns: The first rgrp in the recent list
953  */
954 
955 static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
956 					    u64 rglast)
957 {
958 	struct gfs2_rgrpd *rgd;
959 
960 	spin_lock(&sdp->sd_rindex_spin);
961 
962 	if (rglast) {
963 		list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
964 			if (rgrp_contains_block(rgd, rglast))
965 				goto out;
966 		}
967 	}
968 	rgd = NULL;
969 	if (!list_empty(&sdp->sd_rindex_recent_list))
970 		rgd = list_entry(sdp->sd_rindex_recent_list.next,
971 				 struct gfs2_rgrpd, rd_recent);
972 out:
973 	spin_unlock(&sdp->sd_rindex_spin);
974 	return rgd;
975 }
976 
977 /**
978  * recent_rgrp_next - get next RG from "recent" list
979  * @cur_rgd: current rgrp
980  * @remove:
981  *
982  * Returns: The next rgrp in the recent list
983  */
984 
985 static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd,
986 					   int remove)
987 {
988 	struct gfs2_sbd *sdp = cur_rgd->rd_sbd;
989 	struct list_head *head;
990 	struct gfs2_rgrpd *rgd;
991 
992 	spin_lock(&sdp->sd_rindex_spin);
993 
994 	head = &sdp->sd_rindex_recent_list;
995 
996 	list_for_each_entry(rgd, head, rd_recent) {
997 		if (rgd == cur_rgd) {
998 			if (cur_rgd->rd_recent.next != head)
999 				rgd = list_entry(cur_rgd->rd_recent.next,
1000 						 struct gfs2_rgrpd, rd_recent);
1001 			else
1002 				rgd = NULL;
1003 
1004 			if (remove)
1005 				list_del(&cur_rgd->rd_recent);
1006 
1007 			goto out;
1008 		}
1009 	}
1010 
1011 	rgd = NULL;
1012 	if (!list_empty(head))
1013 		rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
1014 
1015 out:
1016 	spin_unlock(&sdp->sd_rindex_spin);
1017 	return rgd;
1018 }
1019 
1020 /**
1021  * recent_rgrp_add - add an RG to tail of "recent" list
1022  * @new_rgd: The rgrp to add
1023  *
1024  */
1025 
1026 static void recent_rgrp_add(struct gfs2_rgrpd *new_rgd)
1027 {
1028 	struct gfs2_sbd *sdp = new_rgd->rd_sbd;
1029 	struct gfs2_rgrpd *rgd;
1030 	unsigned int count = 0;
1031 	unsigned int max = sdp->sd_rgrps / gfs2_jindex_size(sdp);
1032 
1033 	spin_lock(&sdp->sd_rindex_spin);
1034 
1035 	list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
1036 		if (rgd == new_rgd)
1037 			goto out;
1038 
1039 		if (++count >= max)
1040 			goto out;
1041 	}
1042 	list_add_tail(&new_rgd->rd_recent, &sdp->sd_rindex_recent_list);
1043 
1044 out:
1045 	spin_unlock(&sdp->sd_rindex_spin);
1046 }
1047 
1048 /**
1049  * forward_rgrp_get - get an rgrp to try next from full list
1050  * @sdp: The GFS2 superblock
1051  *
1052  * Returns: The rgrp to try next
1053  */
1054 
1055 static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp)
1056 {
1057 	struct gfs2_rgrpd *rgd;
1058 	unsigned int journals = gfs2_jindex_size(sdp);
1059 	unsigned int rg = 0, x;
1060 
1061 	spin_lock(&sdp->sd_rindex_spin);
1062 
1063 	rgd = sdp->sd_rindex_forward;
1064 	if (!rgd) {
1065 		if (sdp->sd_rgrps >= journals)
1066 			rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals;
1067 
1068 		for (x = 0, rgd = gfs2_rgrpd_get_first(sdp); x < rg;
1069 		     x++, rgd = gfs2_rgrpd_get_next(rgd))
1070 			/* Do Nothing */;
1071 
1072 		sdp->sd_rindex_forward = rgd;
1073 	}
1074 
1075 	spin_unlock(&sdp->sd_rindex_spin);
1076 
1077 	return rgd;
1078 }
1079 
1080 /**
1081  * forward_rgrp_set - set the forward rgrp pointer
1082  * @sdp: the filesystem
1083  * @rgd: The new forward rgrp
1084  *
1085  */
1086 
1087 static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
1088 {
1089 	spin_lock(&sdp->sd_rindex_spin);
1090 	sdp->sd_rindex_forward = rgd;
1091 	spin_unlock(&sdp->sd_rindex_spin);
1092 }
1093 
1094 /**
1095  * get_local_rgrp - Choose and lock a rgrp for allocation
1096  * @ip: the inode to reserve space for
1097  * @rgp: the chosen and locked rgrp
1098  *
1099  * Try to acquire rgrp in way which avoids contending with others.
1100  *
1101  * Returns: errno
1102  */
1103 
1104 static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1105 {
1106 	struct inode *inode = NULL;
1107 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1108 	struct gfs2_rgrpd *rgd, *begin = NULL;
1109 	struct gfs2_alloc *al = ip->i_alloc;
1110 	int flags = LM_FLAG_TRY;
1111 	int skipped = 0;
1112 	int loops = 0;
1113 	int error, rg_locked;
1114 
1115 	/* Try recently successful rgrps */
1116 
1117 	rgd = recent_rgrp_first(sdp, ip->i_goal);
1118 
1119 	while (rgd) {
1120 		rg_locked = 0;
1121 
1122 		if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
1123 			rg_locked = 1;
1124 			error = 0;
1125 		} else {
1126 			error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1127 						   LM_FLAG_TRY, &al->al_rgd_gh);
1128 		}
1129 		switch (error) {
1130 		case 0:
1131 			if (try_rgrp_fit(rgd, al))
1132 				goto out;
1133 			if (rgd->rd_flags & GFS2_RDF_CHECK)
1134 				inode = try_rgrp_unlink(rgd, last_unlinked);
1135 			if (!rg_locked)
1136 				gfs2_glock_dq_uninit(&al->al_rgd_gh);
1137 			if (inode)
1138 				return inode;
1139 			rgd = recent_rgrp_next(rgd, 1);
1140 			break;
1141 
1142 		case GLR_TRYFAILED:
1143 			rgd = recent_rgrp_next(rgd, 0);
1144 			break;
1145 
1146 		default:
1147 			return ERR_PTR(error);
1148 		}
1149 	}
1150 
1151 	/* Go through full list of rgrps */
1152 
1153 	begin = rgd = forward_rgrp_get(sdp);
1154 
1155 	for (;;) {
1156 		rg_locked = 0;
1157 
1158 		if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
1159 			rg_locked = 1;
1160 			error = 0;
1161 		} else {
1162 			error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags,
1163 						   &al->al_rgd_gh);
1164 		}
1165 		switch (error) {
1166 		case 0:
1167 			if (try_rgrp_fit(rgd, al))
1168 				goto out;
1169 			if (rgd->rd_flags & GFS2_RDF_CHECK)
1170 				inode = try_rgrp_unlink(rgd, last_unlinked);
1171 			if (!rg_locked)
1172 				gfs2_glock_dq_uninit(&al->al_rgd_gh);
1173 			if (inode)
1174 				return inode;
1175 			break;
1176 
1177 		case GLR_TRYFAILED:
1178 			skipped++;
1179 			break;
1180 
1181 		default:
1182 			return ERR_PTR(error);
1183 		}
1184 
1185 		rgd = gfs2_rgrpd_get_next(rgd);
1186 		if (!rgd)
1187 			rgd = gfs2_rgrpd_get_first(sdp);
1188 
1189 		if (rgd == begin) {
1190 			if (++loops >= 3)
1191 				return ERR_PTR(-ENOSPC);
1192 			if (!skipped)
1193 				loops++;
1194 			flags = 0;
1195 			if (loops == 2)
1196 				gfs2_log_flush(sdp, NULL);
1197 		}
1198 	}
1199 
1200 out:
1201 	if (begin) {
1202 		recent_rgrp_add(rgd);
1203 		rgd = gfs2_rgrpd_get_next(rgd);
1204 		if (!rgd)
1205 			rgd = gfs2_rgrpd_get_first(sdp);
1206 		forward_rgrp_set(sdp, rgd);
1207 	}
1208 
1209 	return NULL;
1210 }
1211 
1212 /**
1213  * gfs2_inplace_reserve_i - Reserve space in the filesystem
1214  * @ip: the inode to reserve space for
1215  *
1216  * Returns: errno
1217  */
1218 
1219 int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
1220 {
1221 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1222 	struct gfs2_alloc *al = ip->i_alloc;
1223 	struct inode *inode;
1224 	int error = 0;
1225 	u64 last_unlinked = NO_BLOCK;
1226 
1227 	if (gfs2_assert_warn(sdp, al->al_requested))
1228 		return -EINVAL;
1229 
1230 try_again:
1231 	/* We need to hold the rindex unless the inode we're using is
1232 	   the rindex itself, in which case it's already held. */
1233 	if (ip != GFS2_I(sdp->sd_rindex))
1234 		error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
1235 	else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */
1236 		error = gfs2_ri_update_special(ip);
1237 
1238 	if (error)
1239 		return error;
1240 
1241 	inode = get_local_rgrp(ip, &last_unlinked);
1242 	if (inode) {
1243 		if (ip != GFS2_I(sdp->sd_rindex))
1244 			gfs2_glock_dq_uninit(&al->al_ri_gh);
1245 		if (IS_ERR(inode))
1246 			return PTR_ERR(inode);
1247 		iput(inode);
1248 		gfs2_log_flush(sdp, NULL);
1249 		goto try_again;
1250 	}
1251 
1252 	al->al_file = file;
1253 	al->al_line = line;
1254 
1255 	return 0;
1256 }
1257 
1258 /**
1259  * gfs2_inplace_release - release an inplace reservation
1260  * @ip: the inode the reservation was taken out on
1261  *
1262  * Release a reservation made by gfs2_inplace_reserve().
1263  */
1264 
1265 void gfs2_inplace_release(struct gfs2_inode *ip)
1266 {
1267 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1268 	struct gfs2_alloc *al = ip->i_alloc;
1269 
1270 	if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1)
1271 		fs_warn(sdp, "al_alloced = %u, al_requested = %u "
1272 			     "al_file = %s, al_line = %u\n",
1273 		             al->al_alloced, al->al_requested, al->al_file,
1274 			     al->al_line);
1275 
1276 	al->al_rgd = NULL;
1277 	if (al->al_rgd_gh.gh_gl)
1278 		gfs2_glock_dq_uninit(&al->al_rgd_gh);
1279 	if (ip != GFS2_I(sdp->sd_rindex))
1280 		gfs2_glock_dq_uninit(&al->al_ri_gh);
1281 }
1282 
1283 /**
1284  * gfs2_get_block_type - Check a block in a RG is of given type
1285  * @rgd: the resource group holding the block
1286  * @block: the block number
1287  *
1288  * Returns: The block type (GFS2_BLKST_*)
1289  */
1290 
1291 unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1292 {
1293 	struct gfs2_bitmap *bi = NULL;
1294 	u32 length, rgrp_block, buf_block;
1295 	unsigned int buf;
1296 	unsigned char type;
1297 
1298 	length = rgd->rd_length;
1299 	rgrp_block = block - rgd->rd_data0;
1300 
1301 	for (buf = 0; buf < length; buf++) {
1302 		bi = rgd->rd_bits + buf;
1303 		if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1304 			break;
1305 	}
1306 
1307 	gfs2_assert(rgd->rd_sbd, buf < length);
1308 	buf_block = rgrp_block - bi->bi_start * GFS2_NBBY;
1309 
1310 	type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
1311 			   bi->bi_len, buf_block);
1312 
1313 	return type;
1314 }
1315 
1316 /**
1317  * rgblk_search - find a block in @old_state, change allocation
1318  *           state to @new_state
1319  * @rgd: the resource group descriptor
1320  * @goal: the goal block within the RG (start here to search for avail block)
1321  * @old_state: GFS2_BLKST_XXX the before-allocation state to find
1322  * @new_state: GFS2_BLKST_XXX the after-allocation block state
1323  * @n: The extent length
1324  *
1325  * Walk rgrp's bitmap to find bits that represent a block in @old_state.
1326  * Add the found bitmap buffer to the transaction.
1327  * Set the found bits to @new_state to change block's allocation state.
1328  *
1329  * This function never fails, because we wouldn't call it unless we
1330  * know (from reservation results, etc.) that a block is available.
1331  *
1332  * Scope of @goal and returned block is just within rgrp, not the whole
1333  * filesystem.
1334  *
1335  * Returns:  the block number allocated
1336  */
1337 
1338 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1339 			unsigned char old_state, unsigned char new_state,
1340 			unsigned int *n)
1341 {
1342 	struct gfs2_bitmap *bi = NULL;
1343 	const u32 length = rgd->rd_length;
1344 	u32 blk = 0;
1345 	unsigned int buf, x;
1346 	const unsigned int elen = *n;
1347 	const u8 *buffer;
1348 
1349 	*n = 0;
1350 	/* Find bitmap block that contains bits for goal block */
1351 	for (buf = 0; buf < length; buf++) {
1352 		bi = rgd->rd_bits + buf;
1353 		if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1354 			break;
1355 	}
1356 
1357 	gfs2_assert(rgd->rd_sbd, buf < length);
1358 
1359 	/* Convert scope of "goal" from rgrp-wide to within found bit block */
1360 	goal -= bi->bi_start * GFS2_NBBY;
1361 
1362 	/* Search (up to entire) bitmap in this rgrp for allocatable block.
1363 	   "x <= length", instead of "x < length", because we typically start
1364 	   the search in the middle of a bit block, but if we can't find an
1365 	   allocatable block anywhere else, we want to be able wrap around and
1366 	   search in the first part of our first-searched bit block.  */
1367 	for (x = 0; x <= length; x++) {
1368 		/* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
1369 		   bitmaps, so we must search the originals for that. */
1370 		buffer = bi->bi_bh->b_data + bi->bi_offset;
1371 		if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1372 			buffer = bi->bi_clone + bi->bi_offset;
1373 
1374 		blk = gfs2_bitfit(buffer, bi->bi_len, goal, old_state);
1375 		if (blk != BFITNOENT)
1376 			break;
1377 
1378 		/* Try next bitmap block (wrap back to rgrp header if at end) */
1379 		buf = (buf + 1) % length;
1380 		bi = rgd->rd_bits + buf;
1381 		goal = 0;
1382 	}
1383 
1384 	if (blk != BFITNOENT && old_state != new_state) {
1385 		*n = 1;
1386 		gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1387 		gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1388 			    bi->bi_len, blk, new_state);
1389 		goal = blk;
1390 		while (*n < elen) {
1391 			goal++;
1392 			if (goal >= (bi->bi_len * GFS2_NBBY))
1393 				break;
1394 			if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
1395 			    GFS2_BLKST_FREE)
1396 				break;
1397 			gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone,
1398 				    bi->bi_offset, bi->bi_len, goal,
1399 				    new_state);
1400 			(*n)++;
1401 		}
1402 	}
1403 
1404 	return (blk == BFITNOENT) ? blk : (bi->bi_start * GFS2_NBBY) + blk;
1405 }
1406 
1407 /**
1408  * rgblk_free - Change alloc state of given block(s)
1409  * @sdp: the filesystem
1410  * @bstart: the start of a run of blocks to free
1411  * @blen: the length of the block run (all must lie within ONE RG!)
1412  * @new_state: GFS2_BLKST_XXX the after-allocation block state
1413  *
1414  * Returns:  Resource group containing the block(s)
1415  */
1416 
1417 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1418 				     u32 blen, unsigned char new_state)
1419 {
1420 	struct gfs2_rgrpd *rgd;
1421 	struct gfs2_bitmap *bi = NULL;
1422 	u32 length, rgrp_blk, buf_blk;
1423 	unsigned int buf;
1424 
1425 	rgd = gfs2_blk2rgrpd(sdp, bstart);
1426 	if (!rgd) {
1427 		if (gfs2_consist(sdp))
1428 			fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
1429 		return NULL;
1430 	}
1431 
1432 	length = rgd->rd_length;
1433 
1434 	rgrp_blk = bstart - rgd->rd_data0;
1435 
1436 	while (blen--) {
1437 		for (buf = 0; buf < length; buf++) {
1438 			bi = rgd->rd_bits + buf;
1439 			if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1440 				break;
1441 		}
1442 
1443 		gfs2_assert(rgd->rd_sbd, buf < length);
1444 
1445 		buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
1446 		rgrp_blk++;
1447 
1448 		if (!bi->bi_clone) {
1449 			bi->bi_clone = kmalloc(bi->bi_bh->b_size,
1450 					       GFP_NOFS | __GFP_NOFAIL);
1451 			memcpy(bi->bi_clone + bi->bi_offset,
1452 			       bi->bi_bh->b_data + bi->bi_offset,
1453 			       bi->bi_len);
1454 		}
1455 		gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1456 		gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset,
1457 			    bi->bi_len, buf_blk, new_state);
1458 	}
1459 
1460 	return rgd;
1461 }
1462 
1463 /**
1464  * gfs2_alloc_block - Allocate a block
1465  * @ip: the inode to allocate the block for
1466  *
1467  * Returns: the allocated block
1468  */
1469 
1470 u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
1471 {
1472 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1473 	struct gfs2_alloc *al = ip->i_alloc;
1474 	struct gfs2_rgrpd *rgd = al->al_rgd;
1475 	u32 goal, blk;
1476 	u64 block;
1477 
1478 	if (rgrp_contains_block(rgd, ip->i_goal))
1479 		goal = ip->i_goal - rgd->rd_data0;
1480 	else
1481 		goal = rgd->rd_last_alloc;
1482 
1483 	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n);
1484 	BUG_ON(blk == BFITNOENT);
1485 
1486 	rgd->rd_last_alloc = blk;
1487 	block = rgd->rd_data0 + blk;
1488 	ip->i_goal = block;
1489 
1490 	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free >= *n);
1491 	rgd->rd_rg.rg_free -= *n;
1492 
1493 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1494 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1495 
1496 	al->al_alloced += *n;
1497 
1498 	gfs2_statfs_change(sdp, 0, -*n, 0);
1499 	gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid);
1500 
1501 	spin_lock(&sdp->sd_rindex_spin);
1502 	rgd->rd_free_clone -= *n;
1503 	spin_unlock(&sdp->sd_rindex_spin);
1504 
1505 	return block;
1506 }
1507 
1508 /**
1509  * gfs2_alloc_di - Allocate a dinode
1510  * @dip: the directory that the inode is going in
1511  *
1512  * Returns: the block allocated
1513  */
1514 
1515 u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
1516 {
1517 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1518 	struct gfs2_alloc *al = dip->i_alloc;
1519 	struct gfs2_rgrpd *rgd = al->al_rgd;
1520 	u32 blk;
1521 	u64 block;
1522 	unsigned int n = 1;
1523 
1524 	blk = rgblk_search(rgd, rgd->rd_last_alloc,
1525 			   GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n);
1526 	BUG_ON(blk == BFITNOENT);
1527 
1528 	rgd->rd_last_alloc = blk;
1529 
1530 	block = rgd->rd_data0 + blk;
1531 
1532 	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1533 	rgd->rd_rg.rg_free--;
1534 	rgd->rd_rg.rg_dinodes++;
1535 	*generation = rgd->rd_rg.rg_igeneration++;
1536 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1537 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1538 
1539 	al->al_alloced++;
1540 
1541 	gfs2_statfs_change(sdp, 0, -1, +1);
1542 	gfs2_trans_add_unrevoke(sdp, block, 1);
1543 
1544 	spin_lock(&sdp->sd_rindex_spin);
1545 	rgd->rd_free_clone--;
1546 	spin_unlock(&sdp->sd_rindex_spin);
1547 
1548 	return block;
1549 }
1550 
1551 /**
1552  * gfs2_free_data - free a contiguous run of data block(s)
1553  * @ip: the inode these blocks are being freed from
1554  * @bstart: first block of a run of contiguous blocks
1555  * @blen: the length of the block run
1556  *
1557  */
1558 
1559 void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1560 {
1561 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1562 	struct gfs2_rgrpd *rgd;
1563 
1564 	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1565 	if (!rgd)
1566 		return;
1567 
1568 	rgd->rd_rg.rg_free += blen;
1569 
1570 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1571 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1572 
1573 	gfs2_trans_add_rg(rgd);
1574 
1575 	gfs2_statfs_change(sdp, 0, +blen, 0);
1576 	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1577 }
1578 
1579 /**
1580  * gfs2_free_meta - free a contiguous run of data block(s)
1581  * @ip: the inode these blocks are being freed from
1582  * @bstart: first block of a run of contiguous blocks
1583  * @blen: the length of the block run
1584  *
1585  */
1586 
1587 void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1588 {
1589 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1590 	struct gfs2_rgrpd *rgd;
1591 
1592 	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1593 	if (!rgd)
1594 		return;
1595 
1596 	rgd->rd_rg.rg_free += blen;
1597 
1598 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1599 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1600 
1601 	gfs2_trans_add_rg(rgd);
1602 
1603 	gfs2_statfs_change(sdp, 0, +blen, 0);
1604 	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1605 	gfs2_meta_wipe(ip, bstart, blen);
1606 }
1607 
1608 void gfs2_unlink_di(struct inode *inode)
1609 {
1610 	struct gfs2_inode *ip = GFS2_I(inode);
1611 	struct gfs2_sbd *sdp = GFS2_SB(inode);
1612 	struct gfs2_rgrpd *rgd;
1613 	u64 blkno = ip->i_no_addr;
1614 
1615 	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
1616 	if (!rgd)
1617 		return;
1618 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1619 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1620 	gfs2_trans_add_rg(rgd);
1621 }
1622 
1623 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
1624 {
1625 	struct gfs2_sbd *sdp = rgd->rd_sbd;
1626 	struct gfs2_rgrpd *tmp_rgd;
1627 
1628 	tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
1629 	if (!tmp_rgd)
1630 		return;
1631 	gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
1632 
1633 	if (!rgd->rd_rg.rg_dinodes)
1634 		gfs2_consist_rgrpd(rgd);
1635 	rgd->rd_rg.rg_dinodes--;
1636 	rgd->rd_rg.rg_free++;
1637 
1638 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1639 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1640 
1641 	gfs2_statfs_change(sdp, 0, +1, -1);
1642 	gfs2_trans_add_rg(rgd);
1643 }
1644 
1645 
1646 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1647 {
1648 	gfs2_free_uninit_di(rgd, ip->i_no_addr);
1649 	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
1650 	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
1651 }
1652 
1653 /**
1654  * gfs2_rlist_add - add a RG to a list of RGs
1655  * @sdp: the filesystem
1656  * @rlist: the list of resource groups
1657  * @block: the block
1658  *
1659  * Figure out what RG a block belongs to and add that RG to the list
1660  *
1661  * FIXME: Don't use NOFAIL
1662  *
1663  */
1664 
1665 void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
1666 		    u64 block)
1667 {
1668 	struct gfs2_rgrpd *rgd;
1669 	struct gfs2_rgrpd **tmp;
1670 	unsigned int new_space;
1671 	unsigned int x;
1672 
1673 	if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
1674 		return;
1675 
1676 	rgd = gfs2_blk2rgrpd(sdp, block);
1677 	if (!rgd) {
1678 		if (gfs2_consist(sdp))
1679 			fs_err(sdp, "block = %llu\n", (unsigned long long)block);
1680 		return;
1681 	}
1682 
1683 	for (x = 0; x < rlist->rl_rgrps; x++)
1684 		if (rlist->rl_rgd[x] == rgd)
1685 			return;
1686 
1687 	if (rlist->rl_rgrps == rlist->rl_space) {
1688 		new_space = rlist->rl_space + 10;
1689 
1690 		tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
1691 			      GFP_NOFS | __GFP_NOFAIL);
1692 
1693 		if (rlist->rl_rgd) {
1694 			memcpy(tmp, rlist->rl_rgd,
1695 			       rlist->rl_space * sizeof(struct gfs2_rgrpd *));
1696 			kfree(rlist->rl_rgd);
1697 		}
1698 
1699 		rlist->rl_space = new_space;
1700 		rlist->rl_rgd = tmp;
1701 	}
1702 
1703 	rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
1704 }
1705 
1706 /**
1707  * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
1708  *      and initialize an array of glock holders for them
1709  * @rlist: the list of resource groups
1710  * @state: the lock state to acquire the RG lock in
1711  * @flags: the modifier flags for the holder structures
1712  *
1713  * FIXME: Don't use NOFAIL
1714  *
1715  */
1716 
1717 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
1718 {
1719 	unsigned int x;
1720 
1721 	rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
1722 				GFP_NOFS | __GFP_NOFAIL);
1723 	for (x = 0; x < rlist->rl_rgrps; x++)
1724 		gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
1725 				state, 0,
1726 				&rlist->rl_ghs[x]);
1727 }
1728 
1729 /**
1730  * gfs2_rlist_free - free a resource group list
1731  * @list: the list of resource groups
1732  *
1733  */
1734 
1735 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
1736 {
1737 	unsigned int x;
1738 
1739 	kfree(rlist->rl_rgd);
1740 
1741 	if (rlist->rl_ghs) {
1742 		for (x = 0; x < rlist->rl_rgrps; x++)
1743 			gfs2_holder_uninit(&rlist->rl_ghs[x]);
1744 		kfree(rlist->rl_ghs);
1745 	}
1746 }
1747 
1748