xref: /openbmc/linux/fs/gfs2/rgrp.c (revision 9ac8d3fb)
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9 
10 #include <linux/slab.h>
11 #include <linux/spinlock.h>
12 #include <linux/completion.h>
13 #include <linux/buffer_head.h>
14 #include <linux/fs.h>
15 #include <linux/gfs2_ondisk.h>
16 #include <linux/lm_interface.h>
17 #include <linux/prefetch.h>
18 
19 #include "gfs2.h"
20 #include "incore.h"
21 #include "glock.h"
22 #include "glops.h"
23 #include "lops.h"
24 #include "meta_io.h"
25 #include "quota.h"
26 #include "rgrp.h"
27 #include "super.h"
28 #include "trans.h"
29 #include "util.h"
30 #include "log.h"
31 #include "inode.h"
32 #include "ops_address.h"
33 
34 #define BFITNOENT ((u32)~0)
35 #define NO_BLOCK ((u64)~0)
36 
37 #if BITS_PER_LONG == 32
38 #define LBITMASK   (0x55555555UL)
39 #define LBITSKIP55 (0x55555555UL)
40 #define LBITSKIP00 (0x00000000UL)
41 #else
42 #define LBITMASK   (0x5555555555555555UL)
43 #define LBITSKIP55 (0x5555555555555555UL)
44 #define LBITSKIP00 (0x0000000000000000UL)
45 #endif
46 
47 /*
48  * These routines are used by the resource group routines (rgrp.c)
49  * to keep track of block allocation.  Each block is represented by two
50  * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
51  *
52  * 0 = Free
53  * 1 = Used (not metadata)
54  * 2 = Unlinked (still in use) inode
55  * 3 = Used (metadata)
56  */
57 
58 static const char valid_change[16] = {
59 	        /* current */
60 	/* n */ 0, 1, 1, 1,
61 	/* e */ 1, 0, 0, 0,
62 	/* w */ 0, 0, 0, 1,
63 	        1, 0, 0, 0
64 };
65 
66 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
67                         unsigned char old_state, unsigned char new_state,
68 			unsigned int *n);
69 
70 /**
71  * gfs2_setbit - Set a bit in the bitmaps
72  * @buffer: the buffer that holds the bitmaps
73  * @buflen: the length (in bytes) of the buffer
74  * @block: the block to set
75  * @new_state: the new state of the block
76  *
77  */
78 
79 static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
80 			       unsigned char *buf2, unsigned int offset,
81 			       unsigned int buflen, u32 block,
82 			       unsigned char new_state)
83 {
84 	unsigned char *byte1, *byte2, *end, cur_state;
85 	const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
86 
87 	byte1 = buf1 + offset + (block / GFS2_NBBY);
88 	end = buf1 + offset + buflen;
89 
90 	BUG_ON(byte1 >= end);
91 
92 	cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
93 
94 	if (unlikely(!valid_change[new_state * 4 + cur_state])) {
95 		gfs2_consist_rgrpd(rgd);
96 		return;
97 	}
98 	*byte1 ^= (cur_state ^ new_state) << bit;
99 
100 	if (buf2) {
101 		byte2 = buf2 + offset + (block / GFS2_NBBY);
102 		cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
103 		*byte2 ^= (cur_state ^ new_state) << bit;
104 	}
105 }
106 
107 /**
108  * gfs2_testbit - test a bit in the bitmaps
109  * @buffer: the buffer that holds the bitmaps
110  * @buflen: the length (in bytes) of the buffer
111  * @block: the block to read
112  *
113  */
114 
115 static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
116 					 const unsigned char *buffer,
117 					 unsigned int buflen, u32 block)
118 {
119 	const unsigned char *byte, *end;
120 	unsigned char cur_state;
121 	unsigned int bit;
122 
123 	byte = buffer + (block / GFS2_NBBY);
124 	bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
125 	end = buffer + buflen;
126 
127 	gfs2_assert(rgd->rd_sbd, byte < end);
128 
129 	cur_state = (*byte >> bit) & GFS2_BIT_MASK;
130 
131 	return cur_state;
132 }
133 
134 /**
135  * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
136  *       a block in a given allocation state.
137  * @buffer: the buffer that holds the bitmaps
138  * @buflen: the length (in bytes) of the buffer
139  * @goal: start search at this block's bit-pair (within @buffer)
140  * @old_state: GFS2_BLKST_XXX the state of the block we're looking for.
141  *
142  * Scope of @goal and returned block number is only within this bitmap buffer,
143  * not entire rgrp or filesystem.  @buffer will be offset from the actual
144  * beginning of a bitmap block buffer, skipping any header structures.
145  *
146  * Return: the block number (bitmap buffer scope) that was found
147  */
148 
149 static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal,
150 		       u8 old_state)
151 {
152 	const u8 *byte, *start, *end;
153 	int bit, startbit;
154 	u32 g1, g2, misaligned;
155 	unsigned long *plong;
156 	unsigned long lskipval;
157 
158 	lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55;
159 	g1 = (goal / GFS2_NBBY);
160 	start = buffer + g1;
161 	byte = start;
162         end = buffer + buflen;
163 	g2 = ALIGN(g1, sizeof(unsigned long));
164 	plong = (unsigned long *)(buffer + g2);
165 	startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
166 	misaligned = g2 - g1;
167 	if (!misaligned)
168 		goto ulong_aligned;
169 /* parse the bitmap a byte at a time */
170 misaligned:
171 	while (byte < end) {
172 		if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) {
173 			return goal +
174 				(((byte - start) * GFS2_NBBY) +
175 				 ((bit - startbit) >> 1));
176 		}
177 		bit += GFS2_BIT_SIZE;
178 		if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) {
179 			bit = 0;
180 			byte++;
181 			misaligned--;
182 			if (!misaligned) {
183 				plong = (unsigned long *)byte;
184 				goto ulong_aligned;
185 			}
186 		}
187 	}
188 	return BFITNOENT;
189 
190 /* parse the bitmap a unsigned long at a time */
191 ulong_aligned:
192 	/* Stop at "end - 1" or else prefetch can go past the end and segfault.
193 	   We could "if" it but we'd lose some of the performance gained.
194 	   This way will only slow down searching the very last 4/8 bytes
195 	   depending on architecture.  I've experimented with several ways
196 	   of writing this section such as using an else before the goto
197 	   but this one seems to be the fastest. */
198 	while ((unsigned char *)plong < end - sizeof(unsigned long)) {
199 		prefetch(plong + 1);
200 		if (((*plong) & LBITMASK) != lskipval)
201 			break;
202 		plong++;
203 	}
204 	if ((unsigned char *)plong < end) {
205 		byte = (const u8 *)plong;
206 		misaligned += sizeof(unsigned long) - 1;
207 		goto misaligned;
208 	}
209 	return BFITNOENT;
210 }
211 
212 /**
213  * gfs2_bitcount - count the number of bits in a certain state
214  * @buffer: the buffer that holds the bitmaps
215  * @buflen: the length (in bytes) of the buffer
216  * @state: the state of the block we're looking for
217  *
218  * Returns: The number of bits
219  */
220 
221 static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
222 			 unsigned int buflen, u8 state)
223 {
224 	const u8 *byte = buffer;
225 	const u8 *end = buffer + buflen;
226 	const u8 state1 = state << 2;
227 	const u8 state2 = state << 4;
228 	const u8 state3 = state << 6;
229 	u32 count = 0;
230 
231 	for (; byte < end; byte++) {
232 		if (((*byte) & 0x03) == state)
233 			count++;
234 		if (((*byte) & 0x0C) == state1)
235 			count++;
236 		if (((*byte) & 0x30) == state2)
237 			count++;
238 		if (((*byte) & 0xC0) == state3)
239 			count++;
240 	}
241 
242 	return count;
243 }
244 
245 /**
246  * gfs2_rgrp_verify - Verify that a resource group is consistent
247  * @sdp: the filesystem
248  * @rgd: the rgrp
249  *
250  */
251 
252 void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
253 {
254 	struct gfs2_sbd *sdp = rgd->rd_sbd;
255 	struct gfs2_bitmap *bi = NULL;
256 	u32 length = rgd->rd_length;
257 	u32 count[4], tmp;
258 	int buf, x;
259 
260 	memset(count, 0, 4 * sizeof(u32));
261 
262 	/* Count # blocks in each of 4 possible allocation states */
263 	for (buf = 0; buf < length; buf++) {
264 		bi = rgd->rd_bits + buf;
265 		for (x = 0; x < 4; x++)
266 			count[x] += gfs2_bitcount(rgd,
267 						  bi->bi_bh->b_data +
268 						  bi->bi_offset,
269 						  bi->bi_len, x);
270 	}
271 
272 	if (count[0] != rgd->rd_rg.rg_free) {
273 		if (gfs2_consist_rgrpd(rgd))
274 			fs_err(sdp, "free data mismatch:  %u != %u\n",
275 			       count[0], rgd->rd_rg.rg_free);
276 		return;
277 	}
278 
279 	tmp = rgd->rd_data -
280 		rgd->rd_rg.rg_free -
281 		rgd->rd_rg.rg_dinodes;
282 	if (count[1] + count[2] != tmp) {
283 		if (gfs2_consist_rgrpd(rgd))
284 			fs_err(sdp, "used data mismatch:  %u != %u\n",
285 			       count[1], tmp);
286 		return;
287 	}
288 
289 	if (count[3] != rgd->rd_rg.rg_dinodes) {
290 		if (gfs2_consist_rgrpd(rgd))
291 			fs_err(sdp, "used metadata mismatch:  %u != %u\n",
292 			       count[3], rgd->rd_rg.rg_dinodes);
293 		return;
294 	}
295 
296 	if (count[2] > count[3]) {
297 		if (gfs2_consist_rgrpd(rgd))
298 			fs_err(sdp, "unlinked inodes > inodes:  %u\n",
299 			       count[2]);
300 		return;
301 	}
302 
303 }
304 
305 static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
306 {
307 	u64 first = rgd->rd_data0;
308 	u64 last = first + rgd->rd_data;
309 	return first <= block && block < last;
310 }
311 
312 /**
313  * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
314  * @sdp: The GFS2 superblock
315  * @n: The data block number
316  *
317  * Returns: The resource group, or NULL if not found
318  */
319 
320 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
321 {
322 	struct gfs2_rgrpd *rgd;
323 
324 	spin_lock(&sdp->sd_rindex_spin);
325 
326 	list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
327 		if (rgrp_contains_block(rgd, blk)) {
328 			list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
329 			spin_unlock(&sdp->sd_rindex_spin);
330 			return rgd;
331 		}
332 	}
333 
334 	spin_unlock(&sdp->sd_rindex_spin);
335 
336 	return NULL;
337 }
338 
339 /**
340  * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
341  * @sdp: The GFS2 superblock
342  *
343  * Returns: The first rgrp in the filesystem
344  */
345 
346 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
347 {
348 	gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list));
349 	return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list);
350 }
351 
352 /**
353  * gfs2_rgrpd_get_next - get the next RG
354  * @rgd: A RG
355  *
356  * Returns: The next rgrp
357  */
358 
359 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
360 {
361 	if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list)
362 		return NULL;
363 	return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list);
364 }
365 
366 static void clear_rgrpdi(struct gfs2_sbd *sdp)
367 {
368 	struct list_head *head;
369 	struct gfs2_rgrpd *rgd;
370 	struct gfs2_glock *gl;
371 
372 	spin_lock(&sdp->sd_rindex_spin);
373 	sdp->sd_rindex_forward = NULL;
374 	spin_unlock(&sdp->sd_rindex_spin);
375 
376 	head = &sdp->sd_rindex_list;
377 	while (!list_empty(head)) {
378 		rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list);
379 		gl = rgd->rd_gl;
380 
381 		list_del(&rgd->rd_list);
382 		list_del(&rgd->rd_list_mru);
383 
384 		if (gl) {
385 			gl->gl_object = NULL;
386 			gfs2_glock_put(gl);
387 		}
388 
389 		kfree(rgd->rd_bits);
390 		kmem_cache_free(gfs2_rgrpd_cachep, rgd);
391 	}
392 }
393 
394 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
395 {
396 	mutex_lock(&sdp->sd_rindex_mutex);
397 	clear_rgrpdi(sdp);
398 	mutex_unlock(&sdp->sd_rindex_mutex);
399 }
400 
401 static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
402 {
403 	printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
404 	printk(KERN_INFO "  ri_length = %u\n", rgd->rd_length);
405 	printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
406 	printk(KERN_INFO "  ri_data = %u\n", rgd->rd_data);
407 	printk(KERN_INFO "  ri_bitbytes = %u\n", rgd->rd_bitbytes);
408 }
409 
410 /**
411  * gfs2_compute_bitstructs - Compute the bitmap sizes
412  * @rgd: The resource group descriptor
413  *
414  * Calculates bitmap descriptors, one for each block that contains bitmap data
415  *
416  * Returns: errno
417  */
418 
419 static int compute_bitstructs(struct gfs2_rgrpd *rgd)
420 {
421 	struct gfs2_sbd *sdp = rgd->rd_sbd;
422 	struct gfs2_bitmap *bi;
423 	u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
424 	u32 bytes_left, bytes;
425 	int x;
426 
427 	if (!length)
428 		return -EINVAL;
429 
430 	rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
431 	if (!rgd->rd_bits)
432 		return -ENOMEM;
433 
434 	bytes_left = rgd->rd_bitbytes;
435 
436 	for (x = 0; x < length; x++) {
437 		bi = rgd->rd_bits + x;
438 
439 		/* small rgrp; bitmap stored completely in header block */
440 		if (length == 1) {
441 			bytes = bytes_left;
442 			bi->bi_offset = sizeof(struct gfs2_rgrp);
443 			bi->bi_start = 0;
444 			bi->bi_len = bytes;
445 		/* header block */
446 		} else if (x == 0) {
447 			bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
448 			bi->bi_offset = sizeof(struct gfs2_rgrp);
449 			bi->bi_start = 0;
450 			bi->bi_len = bytes;
451 		/* last block */
452 		} else if (x + 1 == length) {
453 			bytes = bytes_left;
454 			bi->bi_offset = sizeof(struct gfs2_meta_header);
455 			bi->bi_start = rgd->rd_bitbytes - bytes_left;
456 			bi->bi_len = bytes;
457 		/* other blocks */
458 		} else {
459 			bytes = sdp->sd_sb.sb_bsize -
460 				sizeof(struct gfs2_meta_header);
461 			bi->bi_offset = sizeof(struct gfs2_meta_header);
462 			bi->bi_start = rgd->rd_bitbytes - bytes_left;
463 			bi->bi_len = bytes;
464 		}
465 
466 		bytes_left -= bytes;
467 	}
468 
469 	if (bytes_left) {
470 		gfs2_consist_rgrpd(rgd);
471 		return -EIO;
472 	}
473 	bi = rgd->rd_bits + (length - 1);
474 	if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
475 		if (gfs2_consist_rgrpd(rgd)) {
476 			gfs2_rindex_print(rgd);
477 			fs_err(sdp, "start=%u len=%u offset=%u\n",
478 			       bi->bi_start, bi->bi_len, bi->bi_offset);
479 		}
480 		return -EIO;
481 	}
482 
483 	return 0;
484 }
485 
486 /**
487  * gfs2_ri_total - Total up the file system space, according to the rindex.
488  *
489  */
490 u64 gfs2_ri_total(struct gfs2_sbd *sdp)
491 {
492 	u64 total_data = 0;
493 	struct inode *inode = sdp->sd_rindex;
494 	struct gfs2_inode *ip = GFS2_I(inode);
495 	char buf[sizeof(struct gfs2_rindex)];
496 	struct file_ra_state ra_state;
497 	int error, rgrps;
498 
499 	mutex_lock(&sdp->sd_rindex_mutex);
500 	file_ra_state_init(&ra_state, inode->i_mapping);
501 	for (rgrps = 0;; rgrps++) {
502 		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
503 
504 		if (pos + sizeof(struct gfs2_rindex) >= ip->i_di.di_size)
505 			break;
506 		error = gfs2_internal_read(ip, &ra_state, buf, &pos,
507 					   sizeof(struct gfs2_rindex));
508 		if (error != sizeof(struct gfs2_rindex))
509 			break;
510 		total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
511 	}
512 	mutex_unlock(&sdp->sd_rindex_mutex);
513 	return total_data;
514 }
515 
516 static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf)
517 {
518 	const struct gfs2_rindex *str = buf;
519 
520 	rgd->rd_addr = be64_to_cpu(str->ri_addr);
521 	rgd->rd_length = be32_to_cpu(str->ri_length);
522 	rgd->rd_data0 = be64_to_cpu(str->ri_data0);
523 	rgd->rd_data = be32_to_cpu(str->ri_data);
524 	rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes);
525 }
526 
527 /**
528  * read_rindex_entry - Pull in a new resource index entry from the disk
529  * @gl: The glock covering the rindex inode
530  *
531  * Returns: 0 on success, error code otherwise
532  */
533 
534 static int read_rindex_entry(struct gfs2_inode *ip,
535 			     struct file_ra_state *ra_state)
536 {
537 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
538 	loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
539 	char buf[sizeof(struct gfs2_rindex)];
540 	int error;
541 	struct gfs2_rgrpd *rgd;
542 
543 	error = gfs2_internal_read(ip, ra_state, buf, &pos,
544 				   sizeof(struct gfs2_rindex));
545 	if (!error)
546 		return 0;
547 	if (error != sizeof(struct gfs2_rindex)) {
548 		if (error > 0)
549 			error = -EIO;
550 		return error;
551 	}
552 
553 	rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
554 	error = -ENOMEM;
555 	if (!rgd)
556 		return error;
557 
558 	mutex_init(&rgd->rd_mutex);
559 	lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
560 	rgd->rd_sbd = sdp;
561 
562 	list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
563 	list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
564 
565 	gfs2_rindex_in(rgd, buf);
566 	error = compute_bitstructs(rgd);
567 	if (error)
568 		return error;
569 
570 	error = gfs2_glock_get(sdp, rgd->rd_addr,
571 			       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
572 	if (error)
573 		return error;
574 
575 	rgd->rd_gl->gl_object = rgd;
576 	rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
577 	rgd->rd_flags |= GFS2_RDF_CHECK;
578 	return error;
579 }
580 
581 /**
582  * gfs2_ri_update - Pull in a new resource index from the disk
583  * @ip: pointer to the rindex inode
584  *
585  * Returns: 0 on successful update, error code otherwise
586  */
587 
588 static int gfs2_ri_update(struct gfs2_inode *ip)
589 {
590 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
591 	struct inode *inode = &ip->i_inode;
592 	struct file_ra_state ra_state;
593 	u64 rgrp_count = ip->i_di.di_size;
594 	int error;
595 
596 	if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) {
597 		gfs2_consist_inode(ip);
598 		return -EIO;
599 	}
600 
601 	clear_rgrpdi(sdp);
602 
603 	file_ra_state_init(&ra_state, inode->i_mapping);
604 	for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) {
605 		error = read_rindex_entry(ip, &ra_state);
606 		if (error) {
607 			clear_rgrpdi(sdp);
608 			return error;
609 		}
610 	}
611 
612 	sdp->sd_rindex_uptodate = 1;
613 	return 0;
614 }
615 
616 /**
617  * gfs2_ri_update_special - Pull in a new resource index from the disk
618  *
619  * This is a special version that's safe to call from gfs2_inplace_reserve_i.
620  * In this case we know that we don't have any resource groups in memory yet.
621  *
622  * @ip: pointer to the rindex inode
623  *
624  * Returns: 0 on successful update, error code otherwise
625  */
626 static int gfs2_ri_update_special(struct gfs2_inode *ip)
627 {
628 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
629 	struct inode *inode = &ip->i_inode;
630 	struct file_ra_state ra_state;
631 	int error;
632 
633 	file_ra_state_init(&ra_state, inode->i_mapping);
634 	for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
635 		/* Ignore partials */
636 		if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
637 		    ip->i_di.di_size)
638 			break;
639 		error = read_rindex_entry(ip, &ra_state);
640 		if (error) {
641 			clear_rgrpdi(sdp);
642 			return error;
643 		}
644 	}
645 
646 	sdp->sd_rindex_uptodate = 1;
647 	return 0;
648 }
649 
650 /**
651  * gfs2_rindex_hold - Grab a lock on the rindex
652  * @sdp: The GFS2 superblock
653  * @ri_gh: the glock holder
654  *
655  * We grab a lock on the rindex inode to make sure that it doesn't
656  * change whilst we are performing an operation. We keep this lock
657  * for quite long periods of time compared to other locks. This
658  * doesn't matter, since it is shared and it is very, very rarely
659  * accessed in the exclusive mode (i.e. only when expanding the filesystem).
660  *
661  * This makes sure that we're using the latest copy of the resource index
662  * special file, which might have been updated if someone expanded the
663  * filesystem (via gfs2_grow utility), which adds new resource groups.
664  *
665  * Returns: 0 on success, error code otherwise
666  */
667 
668 int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
669 {
670 	struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
671 	struct gfs2_glock *gl = ip->i_gl;
672 	int error;
673 
674 	error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh);
675 	if (error)
676 		return error;
677 
678 	/* Read new copy from disk if we don't have the latest */
679 	if (!sdp->sd_rindex_uptodate) {
680 		mutex_lock(&sdp->sd_rindex_mutex);
681 		if (!sdp->sd_rindex_uptodate) {
682 			error = gfs2_ri_update(ip);
683 			if (error)
684 				gfs2_glock_dq_uninit(ri_gh);
685 		}
686 		mutex_unlock(&sdp->sd_rindex_mutex);
687 	}
688 
689 	return error;
690 }
691 
692 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
693 {
694 	const struct gfs2_rgrp *str = buf;
695 	struct gfs2_rgrp_host *rg = &rgd->rd_rg;
696 	u32 rg_flags;
697 
698 	rg_flags = be32_to_cpu(str->rg_flags);
699 	if (rg_flags & GFS2_RGF_NOALLOC)
700 		rgd->rd_flags |= GFS2_RDF_NOALLOC;
701 	else
702 		rgd->rd_flags &= ~GFS2_RDF_NOALLOC;
703 	rg->rg_free = be32_to_cpu(str->rg_free);
704 	rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
705 	rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
706 }
707 
708 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
709 {
710 	struct gfs2_rgrp *str = buf;
711 	struct gfs2_rgrp_host *rg = &rgd->rd_rg;
712 	u32 rg_flags = 0;
713 
714 	if (rgd->rd_flags & GFS2_RDF_NOALLOC)
715 		rg_flags |= GFS2_RGF_NOALLOC;
716 	str->rg_flags = cpu_to_be32(rg_flags);
717 	str->rg_free = cpu_to_be32(rg->rg_free);
718 	str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
719 	str->__pad = cpu_to_be32(0);
720 	str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
721 	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
722 }
723 
724 /**
725  * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
726  * @rgd: the struct gfs2_rgrpd describing the RG to read in
727  *
728  * Read in all of a Resource Group's header and bitmap blocks.
729  * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
730  *
731  * Returns: errno
732  */
733 
734 int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
735 {
736 	struct gfs2_sbd *sdp = rgd->rd_sbd;
737 	struct gfs2_glock *gl = rgd->rd_gl;
738 	unsigned int length = rgd->rd_length;
739 	struct gfs2_bitmap *bi;
740 	unsigned int x, y;
741 	int error;
742 
743 	mutex_lock(&rgd->rd_mutex);
744 
745 	spin_lock(&sdp->sd_rindex_spin);
746 	if (rgd->rd_bh_count) {
747 		rgd->rd_bh_count++;
748 		spin_unlock(&sdp->sd_rindex_spin);
749 		mutex_unlock(&rgd->rd_mutex);
750 		return 0;
751 	}
752 	spin_unlock(&sdp->sd_rindex_spin);
753 
754 	for (x = 0; x < length; x++) {
755 		bi = rgd->rd_bits + x;
756 		error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
757 		if (error)
758 			goto fail;
759 	}
760 
761 	for (y = length; y--;) {
762 		bi = rgd->rd_bits + y;
763 		error = gfs2_meta_wait(sdp, bi->bi_bh);
764 		if (error)
765 			goto fail;
766 		if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
767 					      GFS2_METATYPE_RG)) {
768 			error = -EIO;
769 			goto fail;
770 		}
771 	}
772 
773 	if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
774 		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
775 		rgd->rd_flags |= GFS2_RDF_UPTODATE;
776 	}
777 
778 	spin_lock(&sdp->sd_rindex_spin);
779 	rgd->rd_free_clone = rgd->rd_rg.rg_free;
780 	rgd->rd_bh_count++;
781 	spin_unlock(&sdp->sd_rindex_spin);
782 
783 	mutex_unlock(&rgd->rd_mutex);
784 
785 	return 0;
786 
787 fail:
788 	while (x--) {
789 		bi = rgd->rd_bits + x;
790 		brelse(bi->bi_bh);
791 		bi->bi_bh = NULL;
792 		gfs2_assert_warn(sdp, !bi->bi_clone);
793 	}
794 	mutex_unlock(&rgd->rd_mutex);
795 
796 	return error;
797 }
798 
799 void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
800 {
801 	struct gfs2_sbd *sdp = rgd->rd_sbd;
802 
803 	spin_lock(&sdp->sd_rindex_spin);
804 	gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
805 	rgd->rd_bh_count++;
806 	spin_unlock(&sdp->sd_rindex_spin);
807 }
808 
809 /**
810  * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get()
811  * @rgd: the struct gfs2_rgrpd describing the RG to read in
812  *
813  */
814 
815 void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
816 {
817 	struct gfs2_sbd *sdp = rgd->rd_sbd;
818 	int x, length = rgd->rd_length;
819 
820 	spin_lock(&sdp->sd_rindex_spin);
821 	gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
822 	if (--rgd->rd_bh_count) {
823 		spin_unlock(&sdp->sd_rindex_spin);
824 		return;
825 	}
826 
827 	for (x = 0; x < length; x++) {
828 		struct gfs2_bitmap *bi = rgd->rd_bits + x;
829 		kfree(bi->bi_clone);
830 		bi->bi_clone = NULL;
831 		brelse(bi->bi_bh);
832 		bi->bi_bh = NULL;
833 	}
834 
835 	spin_unlock(&sdp->sd_rindex_spin);
836 }
837 
838 void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
839 {
840 	struct gfs2_sbd *sdp = rgd->rd_sbd;
841 	unsigned int length = rgd->rd_length;
842 	unsigned int x;
843 
844 	for (x = 0; x < length; x++) {
845 		struct gfs2_bitmap *bi = rgd->rd_bits + x;
846 		if (!bi->bi_clone)
847 			continue;
848 		memcpy(bi->bi_clone + bi->bi_offset,
849 		       bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
850 	}
851 
852 	spin_lock(&sdp->sd_rindex_spin);
853 	rgd->rd_free_clone = rgd->rd_rg.rg_free;
854 	spin_unlock(&sdp->sd_rindex_spin);
855 }
856 
857 /**
858  * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode
859  * @ip: the incore GFS2 inode structure
860  *
861  * Returns: the struct gfs2_alloc
862  */
863 
864 struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
865 {
866 	BUG_ON(ip->i_alloc != NULL);
867 	ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_KERNEL);
868 	return ip->i_alloc;
869 }
870 
871 /**
872  * try_rgrp_fit - See if a given reservation will fit in a given RG
873  * @rgd: the RG data
874  * @al: the struct gfs2_alloc structure describing the reservation
875  *
876  * If there's room for the requested blocks to be allocated from the RG:
877  *   Sets the $al_rgd field in @al.
878  *
879  * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
880  */
881 
882 static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
883 {
884 	struct gfs2_sbd *sdp = rgd->rd_sbd;
885 	int ret = 0;
886 
887 	if (rgd->rd_flags & GFS2_RDF_NOALLOC)
888 		return 0;
889 
890 	spin_lock(&sdp->sd_rindex_spin);
891 	if (rgd->rd_free_clone >= al->al_requested) {
892 		al->al_rgd = rgd;
893 		ret = 1;
894 	}
895 	spin_unlock(&sdp->sd_rindex_spin);
896 
897 	return ret;
898 }
899 
900 /**
901  * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
902  * @rgd: The rgrp
903  *
904  * Returns: The inode, if one has been found
905  */
906 
907 static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
908 {
909 	struct inode *inode;
910 	u32 goal = 0, block;
911 	u64 no_addr;
912 	struct gfs2_sbd *sdp = rgd->rd_sbd;
913 	unsigned int n;
914 
915 	for(;;) {
916 		if (goal >= rgd->rd_data)
917 			break;
918 		down_write(&sdp->sd_log_flush_lock);
919 		n = 1;
920 		block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
921 				     GFS2_BLKST_UNLINKED, &n);
922 		up_write(&sdp->sd_log_flush_lock);
923 		if (block == BFITNOENT)
924 			break;
925 		/* rgblk_search can return a block < goal, so we need to
926 		   keep it marching forward. */
927 		no_addr = block + rgd->rd_data0;
928 		goal++;
929 		if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
930 			continue;
931 		*last_unlinked = no_addr;
932 		inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
933 					  no_addr, -1, 1);
934 		if (!IS_ERR(inode))
935 			return inode;
936 	}
937 
938 	rgd->rd_flags &= ~GFS2_RDF_CHECK;
939 	return NULL;
940 }
941 
942 /**
943  * recent_rgrp_next - get next RG from "recent" list
944  * @cur_rgd: current rgrp
945  *
946  * Returns: The next rgrp in the recent list
947  */
948 
949 static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd)
950 {
951 	struct gfs2_sbd *sdp = cur_rgd->rd_sbd;
952 	struct list_head *head;
953 	struct gfs2_rgrpd *rgd;
954 
955 	spin_lock(&sdp->sd_rindex_spin);
956 	head = &sdp->sd_rindex_mru_list;
957 	if (unlikely(cur_rgd->rd_list_mru.next == head)) {
958 		spin_unlock(&sdp->sd_rindex_spin);
959 		return NULL;
960 	}
961 	rgd = list_entry(cur_rgd->rd_list_mru.next, struct gfs2_rgrpd, rd_list_mru);
962 	spin_unlock(&sdp->sd_rindex_spin);
963 	return rgd;
964 }
965 
966 /**
967  * forward_rgrp_get - get an rgrp to try next from full list
968  * @sdp: The GFS2 superblock
969  *
970  * Returns: The rgrp to try next
971  */
972 
973 static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp)
974 {
975 	struct gfs2_rgrpd *rgd;
976 	unsigned int journals = gfs2_jindex_size(sdp);
977 	unsigned int rg = 0, x;
978 
979 	spin_lock(&sdp->sd_rindex_spin);
980 
981 	rgd = sdp->sd_rindex_forward;
982 	if (!rgd) {
983 		if (sdp->sd_rgrps >= journals)
984 			rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals;
985 
986 		for (x = 0, rgd = gfs2_rgrpd_get_first(sdp); x < rg;
987 		     x++, rgd = gfs2_rgrpd_get_next(rgd))
988 			/* Do Nothing */;
989 
990 		sdp->sd_rindex_forward = rgd;
991 	}
992 
993 	spin_unlock(&sdp->sd_rindex_spin);
994 
995 	return rgd;
996 }
997 
998 /**
999  * forward_rgrp_set - set the forward rgrp pointer
1000  * @sdp: the filesystem
1001  * @rgd: The new forward rgrp
1002  *
1003  */
1004 
1005 static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
1006 {
1007 	spin_lock(&sdp->sd_rindex_spin);
1008 	sdp->sd_rindex_forward = rgd;
1009 	spin_unlock(&sdp->sd_rindex_spin);
1010 }
1011 
1012 /**
1013  * get_local_rgrp - Choose and lock a rgrp for allocation
1014  * @ip: the inode to reserve space for
1015  * @rgp: the chosen and locked rgrp
1016  *
1017  * Try to acquire rgrp in way which avoids contending with others.
1018  *
1019  * Returns: errno
1020  */
1021 
1022 static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1023 {
1024 	struct inode *inode = NULL;
1025 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1026 	struct gfs2_rgrpd *rgd, *begin = NULL;
1027 	struct gfs2_alloc *al = ip->i_alloc;
1028 	int flags = LM_FLAG_TRY;
1029 	int skipped = 0;
1030 	int loops = 0;
1031 	int error, rg_locked;
1032 
1033 	rgd = gfs2_blk2rgrpd(sdp, ip->i_goal);
1034 
1035 	while (rgd) {
1036 		rg_locked = 0;
1037 
1038 		if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
1039 			rg_locked = 1;
1040 			error = 0;
1041 		} else {
1042 			error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1043 						   LM_FLAG_TRY, &al->al_rgd_gh);
1044 		}
1045 		switch (error) {
1046 		case 0:
1047 			if (try_rgrp_fit(rgd, al))
1048 				goto out;
1049 			if (rgd->rd_flags & GFS2_RDF_CHECK)
1050 				inode = try_rgrp_unlink(rgd, last_unlinked);
1051 			if (!rg_locked)
1052 				gfs2_glock_dq_uninit(&al->al_rgd_gh);
1053 			if (inode)
1054 				return inode;
1055 			/* fall through */
1056 		case GLR_TRYFAILED:
1057 			rgd = recent_rgrp_next(rgd);
1058 			break;
1059 
1060 		default:
1061 			return ERR_PTR(error);
1062 		}
1063 	}
1064 
1065 	/* Go through full list of rgrps */
1066 
1067 	begin = rgd = forward_rgrp_get(sdp);
1068 
1069 	for (;;) {
1070 		rg_locked = 0;
1071 
1072 		if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
1073 			rg_locked = 1;
1074 			error = 0;
1075 		} else {
1076 			error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags,
1077 						   &al->al_rgd_gh);
1078 		}
1079 		switch (error) {
1080 		case 0:
1081 			if (try_rgrp_fit(rgd, al))
1082 				goto out;
1083 			if (rgd->rd_flags & GFS2_RDF_CHECK)
1084 				inode = try_rgrp_unlink(rgd, last_unlinked);
1085 			if (!rg_locked)
1086 				gfs2_glock_dq_uninit(&al->al_rgd_gh);
1087 			if (inode)
1088 				return inode;
1089 			break;
1090 
1091 		case GLR_TRYFAILED:
1092 			skipped++;
1093 			break;
1094 
1095 		default:
1096 			return ERR_PTR(error);
1097 		}
1098 
1099 		rgd = gfs2_rgrpd_get_next(rgd);
1100 		if (!rgd)
1101 			rgd = gfs2_rgrpd_get_first(sdp);
1102 
1103 		if (rgd == begin) {
1104 			if (++loops >= 3)
1105 				return ERR_PTR(-ENOSPC);
1106 			if (!skipped)
1107 				loops++;
1108 			flags = 0;
1109 			if (loops == 2)
1110 				gfs2_log_flush(sdp, NULL);
1111 		}
1112 	}
1113 
1114 out:
1115 	if (begin) {
1116 		spin_lock(&sdp->sd_rindex_spin);
1117 		list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
1118 		spin_unlock(&sdp->sd_rindex_spin);
1119 		rgd = gfs2_rgrpd_get_next(rgd);
1120 		if (!rgd)
1121 			rgd = gfs2_rgrpd_get_first(sdp);
1122 		forward_rgrp_set(sdp, rgd);
1123 	}
1124 
1125 	return NULL;
1126 }
1127 
1128 /**
1129  * gfs2_inplace_reserve_i - Reserve space in the filesystem
1130  * @ip: the inode to reserve space for
1131  *
1132  * Returns: errno
1133  */
1134 
1135 int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
1136 {
1137 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1138 	struct gfs2_alloc *al = ip->i_alloc;
1139 	struct inode *inode;
1140 	int error = 0;
1141 	u64 last_unlinked = NO_BLOCK;
1142 
1143 	if (gfs2_assert_warn(sdp, al->al_requested))
1144 		return -EINVAL;
1145 
1146 try_again:
1147 	/* We need to hold the rindex unless the inode we're using is
1148 	   the rindex itself, in which case it's already held. */
1149 	if (ip != GFS2_I(sdp->sd_rindex))
1150 		error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
1151 	else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */
1152 		error = gfs2_ri_update_special(ip);
1153 
1154 	if (error)
1155 		return error;
1156 
1157 	inode = get_local_rgrp(ip, &last_unlinked);
1158 	if (inode) {
1159 		if (ip != GFS2_I(sdp->sd_rindex))
1160 			gfs2_glock_dq_uninit(&al->al_ri_gh);
1161 		if (IS_ERR(inode))
1162 			return PTR_ERR(inode);
1163 		iput(inode);
1164 		gfs2_log_flush(sdp, NULL);
1165 		goto try_again;
1166 	}
1167 
1168 	al->al_file = file;
1169 	al->al_line = line;
1170 
1171 	return 0;
1172 }
1173 
1174 /**
1175  * gfs2_inplace_release - release an inplace reservation
1176  * @ip: the inode the reservation was taken out on
1177  *
1178  * Release a reservation made by gfs2_inplace_reserve().
1179  */
1180 
1181 void gfs2_inplace_release(struct gfs2_inode *ip)
1182 {
1183 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1184 	struct gfs2_alloc *al = ip->i_alloc;
1185 
1186 	if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1)
1187 		fs_warn(sdp, "al_alloced = %u, al_requested = %u "
1188 			     "al_file = %s, al_line = %u\n",
1189 		             al->al_alloced, al->al_requested, al->al_file,
1190 			     al->al_line);
1191 
1192 	al->al_rgd = NULL;
1193 	if (al->al_rgd_gh.gh_gl)
1194 		gfs2_glock_dq_uninit(&al->al_rgd_gh);
1195 	if (ip != GFS2_I(sdp->sd_rindex))
1196 		gfs2_glock_dq_uninit(&al->al_ri_gh);
1197 }
1198 
1199 /**
1200  * gfs2_get_block_type - Check a block in a RG is of given type
1201  * @rgd: the resource group holding the block
1202  * @block: the block number
1203  *
1204  * Returns: The block type (GFS2_BLKST_*)
1205  */
1206 
1207 unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1208 {
1209 	struct gfs2_bitmap *bi = NULL;
1210 	u32 length, rgrp_block, buf_block;
1211 	unsigned int buf;
1212 	unsigned char type;
1213 
1214 	length = rgd->rd_length;
1215 	rgrp_block = block - rgd->rd_data0;
1216 
1217 	for (buf = 0; buf < length; buf++) {
1218 		bi = rgd->rd_bits + buf;
1219 		if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1220 			break;
1221 	}
1222 
1223 	gfs2_assert(rgd->rd_sbd, buf < length);
1224 	buf_block = rgrp_block - bi->bi_start * GFS2_NBBY;
1225 
1226 	type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
1227 			   bi->bi_len, buf_block);
1228 
1229 	return type;
1230 }
1231 
1232 /**
1233  * rgblk_search - find a block in @old_state, change allocation
1234  *           state to @new_state
1235  * @rgd: the resource group descriptor
1236  * @goal: the goal block within the RG (start here to search for avail block)
1237  * @old_state: GFS2_BLKST_XXX the before-allocation state to find
1238  * @new_state: GFS2_BLKST_XXX the after-allocation block state
1239  * @n: The extent length
1240  *
1241  * Walk rgrp's bitmap to find bits that represent a block in @old_state.
1242  * Add the found bitmap buffer to the transaction.
1243  * Set the found bits to @new_state to change block's allocation state.
1244  *
1245  * This function never fails, because we wouldn't call it unless we
1246  * know (from reservation results, etc.) that a block is available.
1247  *
1248  * Scope of @goal and returned block is just within rgrp, not the whole
1249  * filesystem.
1250  *
1251  * Returns:  the block number allocated
1252  */
1253 
1254 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1255 			unsigned char old_state, unsigned char new_state,
1256 			unsigned int *n)
1257 {
1258 	struct gfs2_bitmap *bi = NULL;
1259 	const u32 length = rgd->rd_length;
1260 	u32 blk = 0;
1261 	unsigned int buf, x;
1262 	const unsigned int elen = *n;
1263 	const u8 *buffer;
1264 
1265 	*n = 0;
1266 	/* Find bitmap block that contains bits for goal block */
1267 	for (buf = 0; buf < length; buf++) {
1268 		bi = rgd->rd_bits + buf;
1269 		if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1270 			break;
1271 	}
1272 
1273 	gfs2_assert(rgd->rd_sbd, buf < length);
1274 
1275 	/* Convert scope of "goal" from rgrp-wide to within found bit block */
1276 	goal -= bi->bi_start * GFS2_NBBY;
1277 
1278 	/* Search (up to entire) bitmap in this rgrp for allocatable block.
1279 	   "x <= length", instead of "x < length", because we typically start
1280 	   the search in the middle of a bit block, but if we can't find an
1281 	   allocatable block anywhere else, we want to be able wrap around and
1282 	   search in the first part of our first-searched bit block.  */
1283 	for (x = 0; x <= length; x++) {
1284 		/* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
1285 		   bitmaps, so we must search the originals for that. */
1286 		buffer = bi->bi_bh->b_data + bi->bi_offset;
1287 		if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1288 			buffer = bi->bi_clone + bi->bi_offset;
1289 
1290 		blk = gfs2_bitfit(buffer, bi->bi_len, goal, old_state);
1291 		if (blk != BFITNOENT)
1292 			break;
1293 
1294 		/* Try next bitmap block (wrap back to rgrp header if at end) */
1295 		buf = (buf + 1) % length;
1296 		bi = rgd->rd_bits + buf;
1297 		goal = 0;
1298 	}
1299 
1300 	if (blk != BFITNOENT && old_state != new_state) {
1301 		*n = 1;
1302 		gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1303 		gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1304 			    bi->bi_len, blk, new_state);
1305 		goal = blk;
1306 		while (*n < elen) {
1307 			goal++;
1308 			if (goal >= (bi->bi_len * GFS2_NBBY))
1309 				break;
1310 			if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
1311 			    GFS2_BLKST_FREE)
1312 				break;
1313 			gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone,
1314 				    bi->bi_offset, bi->bi_len, goal,
1315 				    new_state);
1316 			(*n)++;
1317 		}
1318 	}
1319 
1320 	return (blk == BFITNOENT) ? blk : (bi->bi_start * GFS2_NBBY) + blk;
1321 }
1322 
1323 /**
1324  * rgblk_free - Change alloc state of given block(s)
1325  * @sdp: the filesystem
1326  * @bstart: the start of a run of blocks to free
1327  * @blen: the length of the block run (all must lie within ONE RG!)
1328  * @new_state: GFS2_BLKST_XXX the after-allocation block state
1329  *
1330  * Returns:  Resource group containing the block(s)
1331  */
1332 
1333 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1334 				     u32 blen, unsigned char new_state)
1335 {
1336 	struct gfs2_rgrpd *rgd;
1337 	struct gfs2_bitmap *bi = NULL;
1338 	u32 length, rgrp_blk, buf_blk;
1339 	unsigned int buf;
1340 
1341 	rgd = gfs2_blk2rgrpd(sdp, bstart);
1342 	if (!rgd) {
1343 		if (gfs2_consist(sdp))
1344 			fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
1345 		return NULL;
1346 	}
1347 
1348 	length = rgd->rd_length;
1349 
1350 	rgrp_blk = bstart - rgd->rd_data0;
1351 
1352 	while (blen--) {
1353 		for (buf = 0; buf < length; buf++) {
1354 			bi = rgd->rd_bits + buf;
1355 			if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1356 				break;
1357 		}
1358 
1359 		gfs2_assert(rgd->rd_sbd, buf < length);
1360 
1361 		buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
1362 		rgrp_blk++;
1363 
1364 		if (!bi->bi_clone) {
1365 			bi->bi_clone = kmalloc(bi->bi_bh->b_size,
1366 					       GFP_NOFS | __GFP_NOFAIL);
1367 			memcpy(bi->bi_clone + bi->bi_offset,
1368 			       bi->bi_bh->b_data + bi->bi_offset,
1369 			       bi->bi_len);
1370 		}
1371 		gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1372 		gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset,
1373 			    bi->bi_len, buf_blk, new_state);
1374 	}
1375 
1376 	return rgd;
1377 }
1378 
1379 /**
1380  * gfs2_alloc_block - Allocate a block
1381  * @ip: the inode to allocate the block for
1382  *
1383  * Returns: the allocated block
1384  */
1385 
1386 u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
1387 {
1388 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1389 	struct gfs2_alloc *al = ip->i_alloc;
1390 	struct gfs2_rgrpd *rgd = al->al_rgd;
1391 	u32 goal, blk;
1392 	u64 block;
1393 
1394 	if (rgrp_contains_block(rgd, ip->i_goal))
1395 		goal = ip->i_goal - rgd->rd_data0;
1396 	else
1397 		goal = rgd->rd_last_alloc;
1398 
1399 	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n);
1400 	BUG_ON(blk == BFITNOENT);
1401 
1402 	rgd->rd_last_alloc = blk;
1403 	block = rgd->rd_data0 + blk;
1404 	ip->i_goal = block;
1405 
1406 	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free >= *n);
1407 	rgd->rd_rg.rg_free -= *n;
1408 
1409 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1410 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1411 
1412 	al->al_alloced += *n;
1413 
1414 	gfs2_statfs_change(sdp, 0, -(s64)*n, 0);
1415 	gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid);
1416 
1417 	spin_lock(&sdp->sd_rindex_spin);
1418 	rgd->rd_free_clone -= *n;
1419 	spin_unlock(&sdp->sd_rindex_spin);
1420 
1421 	return block;
1422 }
1423 
1424 /**
1425  * gfs2_alloc_di - Allocate a dinode
1426  * @dip: the directory that the inode is going in
1427  *
1428  * Returns: the block allocated
1429  */
1430 
1431 u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
1432 {
1433 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1434 	struct gfs2_alloc *al = dip->i_alloc;
1435 	struct gfs2_rgrpd *rgd = al->al_rgd;
1436 	u32 blk;
1437 	u64 block;
1438 	unsigned int n = 1;
1439 
1440 	blk = rgblk_search(rgd, rgd->rd_last_alloc,
1441 			   GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n);
1442 	BUG_ON(blk == BFITNOENT);
1443 
1444 	rgd->rd_last_alloc = blk;
1445 
1446 	block = rgd->rd_data0 + blk;
1447 
1448 	gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1449 	rgd->rd_rg.rg_free--;
1450 	rgd->rd_rg.rg_dinodes++;
1451 	*generation = rgd->rd_rg.rg_igeneration++;
1452 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1453 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1454 
1455 	al->al_alloced++;
1456 
1457 	gfs2_statfs_change(sdp, 0, -1, +1);
1458 	gfs2_trans_add_unrevoke(sdp, block, 1);
1459 
1460 	spin_lock(&sdp->sd_rindex_spin);
1461 	rgd->rd_free_clone--;
1462 	spin_unlock(&sdp->sd_rindex_spin);
1463 
1464 	return block;
1465 }
1466 
1467 /**
1468  * gfs2_free_data - free a contiguous run of data block(s)
1469  * @ip: the inode these blocks are being freed from
1470  * @bstart: first block of a run of contiguous blocks
1471  * @blen: the length of the block run
1472  *
1473  */
1474 
1475 void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1476 {
1477 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1478 	struct gfs2_rgrpd *rgd;
1479 
1480 	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1481 	if (!rgd)
1482 		return;
1483 
1484 	rgd->rd_rg.rg_free += blen;
1485 
1486 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1487 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1488 
1489 	gfs2_trans_add_rg(rgd);
1490 
1491 	gfs2_statfs_change(sdp, 0, +blen, 0);
1492 	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1493 }
1494 
1495 /**
1496  * gfs2_free_meta - free a contiguous run of data block(s)
1497  * @ip: the inode these blocks are being freed from
1498  * @bstart: first block of a run of contiguous blocks
1499  * @blen: the length of the block run
1500  *
1501  */
1502 
1503 void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1504 {
1505 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1506 	struct gfs2_rgrpd *rgd;
1507 
1508 	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1509 	if (!rgd)
1510 		return;
1511 
1512 	rgd->rd_rg.rg_free += blen;
1513 
1514 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1515 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1516 
1517 	gfs2_trans_add_rg(rgd);
1518 
1519 	gfs2_statfs_change(sdp, 0, +blen, 0);
1520 	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1521 	gfs2_meta_wipe(ip, bstart, blen);
1522 }
1523 
1524 void gfs2_unlink_di(struct inode *inode)
1525 {
1526 	struct gfs2_inode *ip = GFS2_I(inode);
1527 	struct gfs2_sbd *sdp = GFS2_SB(inode);
1528 	struct gfs2_rgrpd *rgd;
1529 	u64 blkno = ip->i_no_addr;
1530 
1531 	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
1532 	if (!rgd)
1533 		return;
1534 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1535 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1536 	gfs2_trans_add_rg(rgd);
1537 }
1538 
1539 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
1540 {
1541 	struct gfs2_sbd *sdp = rgd->rd_sbd;
1542 	struct gfs2_rgrpd *tmp_rgd;
1543 
1544 	tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
1545 	if (!tmp_rgd)
1546 		return;
1547 	gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
1548 
1549 	if (!rgd->rd_rg.rg_dinodes)
1550 		gfs2_consist_rgrpd(rgd);
1551 	rgd->rd_rg.rg_dinodes--;
1552 	rgd->rd_rg.rg_free++;
1553 
1554 	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1555 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1556 
1557 	gfs2_statfs_change(sdp, 0, +1, -1);
1558 	gfs2_trans_add_rg(rgd);
1559 }
1560 
1561 
1562 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1563 {
1564 	gfs2_free_uninit_di(rgd, ip->i_no_addr);
1565 	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
1566 	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
1567 }
1568 
1569 /**
1570  * gfs2_rlist_add - add a RG to a list of RGs
1571  * @sdp: the filesystem
1572  * @rlist: the list of resource groups
1573  * @block: the block
1574  *
1575  * Figure out what RG a block belongs to and add that RG to the list
1576  *
1577  * FIXME: Don't use NOFAIL
1578  *
1579  */
1580 
1581 void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
1582 		    u64 block)
1583 {
1584 	struct gfs2_rgrpd *rgd;
1585 	struct gfs2_rgrpd **tmp;
1586 	unsigned int new_space;
1587 	unsigned int x;
1588 
1589 	if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
1590 		return;
1591 
1592 	rgd = gfs2_blk2rgrpd(sdp, block);
1593 	if (!rgd) {
1594 		if (gfs2_consist(sdp))
1595 			fs_err(sdp, "block = %llu\n", (unsigned long long)block);
1596 		return;
1597 	}
1598 
1599 	for (x = 0; x < rlist->rl_rgrps; x++)
1600 		if (rlist->rl_rgd[x] == rgd)
1601 			return;
1602 
1603 	if (rlist->rl_rgrps == rlist->rl_space) {
1604 		new_space = rlist->rl_space + 10;
1605 
1606 		tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
1607 			      GFP_NOFS | __GFP_NOFAIL);
1608 
1609 		if (rlist->rl_rgd) {
1610 			memcpy(tmp, rlist->rl_rgd,
1611 			       rlist->rl_space * sizeof(struct gfs2_rgrpd *));
1612 			kfree(rlist->rl_rgd);
1613 		}
1614 
1615 		rlist->rl_space = new_space;
1616 		rlist->rl_rgd = tmp;
1617 	}
1618 
1619 	rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
1620 }
1621 
1622 /**
1623  * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
1624  *      and initialize an array of glock holders for them
1625  * @rlist: the list of resource groups
1626  * @state: the lock state to acquire the RG lock in
1627  * @flags: the modifier flags for the holder structures
1628  *
1629  * FIXME: Don't use NOFAIL
1630  *
1631  */
1632 
1633 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
1634 {
1635 	unsigned int x;
1636 
1637 	rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
1638 				GFP_NOFS | __GFP_NOFAIL);
1639 	for (x = 0; x < rlist->rl_rgrps; x++)
1640 		gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
1641 				state, 0,
1642 				&rlist->rl_ghs[x]);
1643 }
1644 
1645 /**
1646  * gfs2_rlist_free - free a resource group list
1647  * @list: the list of resource groups
1648  *
1649  */
1650 
1651 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
1652 {
1653 	unsigned int x;
1654 
1655 	kfree(rlist->rl_rgd);
1656 
1657 	if (rlist->rl_ghs) {
1658 		for (x = 0; x < rlist->rl_rgrps; x++)
1659 			gfs2_holder_uninit(&rlist->rl_ghs[x]);
1660 		kfree(rlist->rl_ghs);
1661 	}
1662 }
1663 
1664