1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/slab.h> 11 #include <linux/spinlock.h> 12 #include <linux/completion.h> 13 #include <linux/buffer_head.h> 14 #include <linux/fs.h> 15 #include <linux/gfs2_ondisk.h> 16 #include <linux/prefetch.h> 17 #include <linux/blkdev.h> 18 #include <linux/rbtree.h> 19 20 #include "gfs2.h" 21 #include "incore.h" 22 #include "glock.h" 23 #include "glops.h" 24 #include "lops.h" 25 #include "meta_io.h" 26 #include "quota.h" 27 #include "rgrp.h" 28 #include "super.h" 29 #include "trans.h" 30 #include "util.h" 31 #include "log.h" 32 #include "inode.h" 33 #include "trace_gfs2.h" 34 35 #define BFITNOENT ((u32)~0) 36 #define NO_BLOCK ((u64)~0) 37 38 #define RSRV_CONTENTION_FACTOR 4 39 #define RGRP_RSRV_MAX_CONTENDERS 2 40 41 #if BITS_PER_LONG == 32 42 #define LBITMASK (0x55555555UL) 43 #define LBITSKIP55 (0x55555555UL) 44 #define LBITSKIP00 (0x00000000UL) 45 #else 46 #define LBITMASK (0x5555555555555555UL) 47 #define LBITSKIP55 (0x5555555555555555UL) 48 #define LBITSKIP00 (0x0000000000000000UL) 49 #endif 50 51 /* 52 * These routines are used by the resource group routines (rgrp.c) 53 * to keep track of block allocation. Each block is represented by two 54 * bits. So, each byte represents GFS2_NBBY (i.e. 4) blocks. 55 * 56 * 0 = Free 57 * 1 = Used (not metadata) 58 * 2 = Unlinked (still in use) inode 59 * 3 = Used (metadata) 60 */ 61 62 static const char valid_change[16] = { 63 /* current */ 64 /* n */ 0, 1, 1, 1, 65 /* e */ 1, 0, 0, 0, 66 /* w */ 0, 0, 0, 1, 67 1, 0, 0, 0 68 }; 69 70 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, 71 unsigned char old_state, 72 struct gfs2_bitmap **rbi); 73 74 /** 75 * gfs2_setbit - Set a bit in the bitmaps 76 * @rgd: the resource group descriptor 77 * @buf2: the clone buffer that holds the bitmaps 78 * @bi: the bitmap structure 79 * @block: the block to set 80 * @new_state: the new state of the block 81 * 82 */ 83 84 static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, 85 struct gfs2_bitmap *bi, u32 block, 86 unsigned char new_state) 87 { 88 unsigned char *byte1, *byte2, *end, cur_state; 89 unsigned int buflen = bi->bi_len; 90 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 91 92 byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY); 93 end = bi->bi_bh->b_data + bi->bi_offset + buflen; 94 95 BUG_ON(byte1 >= end); 96 97 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; 98 99 if (unlikely(!valid_change[new_state * 4 + cur_state])) { 100 printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, " 101 "new_state=%d\n", 102 (unsigned long long)block, cur_state, new_state); 103 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n", 104 (unsigned long long)rgd->rd_addr, 105 (unsigned long)bi->bi_start); 106 printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n", 107 (unsigned long)bi->bi_offset, 108 (unsigned long)bi->bi_len); 109 dump_stack(); 110 gfs2_consist_rgrpd(rgd); 111 return; 112 } 113 *byte1 ^= (cur_state ^ new_state) << bit; 114 115 if (buf2) { 116 byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY); 117 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; 118 *byte2 ^= (cur_state ^ new_state) << bit; 119 } 120 } 121 122 /** 123 * gfs2_testbit - test a bit in the bitmaps 124 * @rgd: the resource group descriptor 125 * @buffer: the buffer that holds the bitmaps 126 * @buflen: the length (in bytes) of the buffer 127 * @block: the block to read 128 * 129 */ 130 131 static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, 132 const unsigned char *buffer, 133 unsigned int buflen, u32 block) 134 { 135 const unsigned char *byte, *end; 136 unsigned char cur_state; 137 unsigned int bit; 138 139 byte = buffer + (block / GFS2_NBBY); 140 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 141 end = buffer + buflen; 142 143 gfs2_assert(rgd->rd_sbd, byte < end); 144 145 cur_state = (*byte >> bit) & GFS2_BIT_MASK; 146 147 return cur_state; 148 } 149 150 /** 151 * gfs2_bit_search 152 * @ptr: Pointer to bitmap data 153 * @mask: Mask to use (normally 0x55555.... but adjusted for search start) 154 * @state: The state we are searching for 155 * 156 * We xor the bitmap data with a patter which is the bitwise opposite 157 * of what we are looking for, this gives rise to a pattern of ones 158 * wherever there is a match. Since we have two bits per entry, we 159 * take this pattern, shift it down by one place and then and it with 160 * the original. All the even bit positions (0,2,4, etc) then represent 161 * successful matches, so we mask with 0x55555..... to remove the unwanted 162 * odd bit positions. 163 * 164 * This allows searching of a whole u64 at once (32 blocks) with a 165 * single test (on 64 bit arches). 166 */ 167 168 static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) 169 { 170 u64 tmp; 171 static const u64 search[] = { 172 [0] = 0xffffffffffffffffULL, 173 [1] = 0xaaaaaaaaaaaaaaaaULL, 174 [2] = 0x5555555555555555ULL, 175 [3] = 0x0000000000000000ULL, 176 }; 177 tmp = le64_to_cpu(*ptr) ^ search[state]; 178 tmp &= (tmp >> 1); 179 tmp &= mask; 180 return tmp; 181 } 182 183 /** 184 * rs_cmp - multi-block reservation range compare 185 * @blk: absolute file system block number of the new reservation 186 * @len: number of blocks in the new reservation 187 * @rs: existing reservation to compare against 188 * 189 * returns: 1 if the block range is beyond the reach of the reservation 190 * -1 if the block range is before the start of the reservation 191 * 0 if the block range overlaps with the reservation 192 */ 193 static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) 194 { 195 u64 startblk = gfs2_rs_startblk(rs); 196 197 if (blk >= startblk + rs->rs_free) 198 return 1; 199 if (blk + len - 1 < startblk) 200 return -1; 201 return 0; 202 } 203 204 /** 205 * rs_find - Find a rgrp multi-block reservation that contains a given block 206 * @rgd: The rgrp 207 * @rgblk: The block we're looking for, relative to the rgrp 208 */ 209 static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk) 210 { 211 struct rb_node **newn; 212 int rc; 213 u64 fsblk = rgblk + rgd->rd_data0; 214 215 spin_lock(&rgd->rd_rsspin); 216 newn = &rgd->rd_rstree.rb_node; 217 while (*newn) { 218 struct gfs2_blkreserv *cur = 219 rb_entry(*newn, struct gfs2_blkreserv, rs_node); 220 rc = rs_cmp(fsblk, 1, cur); 221 if (rc < 0) 222 newn = &((*newn)->rb_left); 223 else if (rc > 0) 224 newn = &((*newn)->rb_right); 225 else { 226 spin_unlock(&rgd->rd_rsspin); 227 return cur; 228 } 229 } 230 spin_unlock(&rgd->rd_rsspin); 231 return NULL; 232 } 233 234 /** 235 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing 236 * a block in a given allocation state. 237 * @buf: the buffer that holds the bitmaps 238 * @len: the length (in bytes) of the buffer 239 * @goal: start search at this block's bit-pair (within @buffer) 240 * @state: GFS2_BLKST_XXX the state of the block we're looking for. 241 * 242 * Scope of @goal and returned block number is only within this bitmap buffer, 243 * not entire rgrp or filesystem. @buffer will be offset from the actual 244 * beginning of a bitmap block buffer, skipping any header structures, but 245 * headers are always a multiple of 64 bits long so that the buffer is 246 * always aligned to a 64 bit boundary. 247 * 248 * The size of the buffer is in bytes, but is it assumed that it is 249 * always ok to read a complete multiple of 64 bits at the end 250 * of the block in case the end is no aligned to a natural boundary. 251 * 252 * Return: the block number (bitmap buffer scope) that was found 253 */ 254 255 static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, 256 u32 goal, u8 state) 257 { 258 u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1); 259 const __le64 *ptr = ((__le64 *)buf) + (goal >> 5); 260 const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64))); 261 u64 tmp; 262 u64 mask = 0x5555555555555555ULL; 263 u32 bit; 264 265 BUG_ON(state > 3); 266 267 /* Mask off bits we don't care about at the start of the search */ 268 mask <<= spoint; 269 tmp = gfs2_bit_search(ptr, mask, state); 270 ptr++; 271 while(tmp == 0 && ptr < end) { 272 tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state); 273 ptr++; 274 } 275 /* Mask off any bits which are more than len bytes from the start */ 276 if (ptr == end && (len & (sizeof(u64) - 1))) 277 tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1)))); 278 /* Didn't find anything, so return */ 279 if (tmp == 0) 280 return BFITNOENT; 281 ptr--; 282 bit = __ffs64(tmp); 283 bit /= 2; /* two bits per entry in the bitmap */ 284 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; 285 } 286 287 /** 288 * gfs2_bitcount - count the number of bits in a certain state 289 * @rgd: the resource group descriptor 290 * @buffer: the buffer that holds the bitmaps 291 * @buflen: the length (in bytes) of the buffer 292 * @state: the state of the block we're looking for 293 * 294 * Returns: The number of bits 295 */ 296 297 static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer, 298 unsigned int buflen, u8 state) 299 { 300 const u8 *byte = buffer; 301 const u8 *end = buffer + buflen; 302 const u8 state1 = state << 2; 303 const u8 state2 = state << 4; 304 const u8 state3 = state << 6; 305 u32 count = 0; 306 307 for (; byte < end; byte++) { 308 if (((*byte) & 0x03) == state) 309 count++; 310 if (((*byte) & 0x0C) == state1) 311 count++; 312 if (((*byte) & 0x30) == state2) 313 count++; 314 if (((*byte) & 0xC0) == state3) 315 count++; 316 } 317 318 return count; 319 } 320 321 /** 322 * gfs2_rgrp_verify - Verify that a resource group is consistent 323 * @rgd: the rgrp 324 * 325 */ 326 327 void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) 328 { 329 struct gfs2_sbd *sdp = rgd->rd_sbd; 330 struct gfs2_bitmap *bi = NULL; 331 u32 length = rgd->rd_length; 332 u32 count[4], tmp; 333 int buf, x; 334 335 memset(count, 0, 4 * sizeof(u32)); 336 337 /* Count # blocks in each of 4 possible allocation states */ 338 for (buf = 0; buf < length; buf++) { 339 bi = rgd->rd_bits + buf; 340 for (x = 0; x < 4; x++) 341 count[x] += gfs2_bitcount(rgd, 342 bi->bi_bh->b_data + 343 bi->bi_offset, 344 bi->bi_len, x); 345 } 346 347 if (count[0] != rgd->rd_free) { 348 if (gfs2_consist_rgrpd(rgd)) 349 fs_err(sdp, "free data mismatch: %u != %u\n", 350 count[0], rgd->rd_free); 351 return; 352 } 353 354 tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes; 355 if (count[1] != tmp) { 356 if (gfs2_consist_rgrpd(rgd)) 357 fs_err(sdp, "used data mismatch: %u != %u\n", 358 count[1], tmp); 359 return; 360 } 361 362 if (count[2] + count[3] != rgd->rd_dinodes) { 363 if (gfs2_consist_rgrpd(rgd)) 364 fs_err(sdp, "used metadata mismatch: %u != %u\n", 365 count[2] + count[3], rgd->rd_dinodes); 366 return; 367 } 368 } 369 370 static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) 371 { 372 u64 first = rgd->rd_data0; 373 u64 last = first + rgd->rd_data; 374 return first <= block && block < last; 375 } 376 377 /** 378 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number 379 * @sdp: The GFS2 superblock 380 * @blk: The data block number 381 * @exact: True if this needs to be an exact match 382 * 383 * Returns: The resource group, or NULL if not found 384 */ 385 386 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact) 387 { 388 struct rb_node *n, *next; 389 struct gfs2_rgrpd *cur; 390 391 spin_lock(&sdp->sd_rindex_spin); 392 n = sdp->sd_rindex_tree.rb_node; 393 while (n) { 394 cur = rb_entry(n, struct gfs2_rgrpd, rd_node); 395 next = NULL; 396 if (blk < cur->rd_addr) 397 next = n->rb_left; 398 else if (blk >= cur->rd_data0 + cur->rd_data) 399 next = n->rb_right; 400 if (next == NULL) { 401 spin_unlock(&sdp->sd_rindex_spin); 402 if (exact) { 403 if (blk < cur->rd_addr) 404 return NULL; 405 if (blk >= cur->rd_data0 + cur->rd_data) 406 return NULL; 407 } 408 return cur; 409 } 410 n = next; 411 } 412 spin_unlock(&sdp->sd_rindex_spin); 413 414 return NULL; 415 } 416 417 /** 418 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem 419 * @sdp: The GFS2 superblock 420 * 421 * Returns: The first rgrp in the filesystem 422 */ 423 424 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp) 425 { 426 const struct rb_node *n; 427 struct gfs2_rgrpd *rgd; 428 429 spin_lock(&sdp->sd_rindex_spin); 430 n = rb_first(&sdp->sd_rindex_tree); 431 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 432 spin_unlock(&sdp->sd_rindex_spin); 433 434 return rgd; 435 } 436 437 /** 438 * gfs2_rgrpd_get_next - get the next RG 439 * @rgd: the resource group descriptor 440 * 441 * Returns: The next rgrp 442 */ 443 444 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd) 445 { 446 struct gfs2_sbd *sdp = rgd->rd_sbd; 447 const struct rb_node *n; 448 449 spin_lock(&sdp->sd_rindex_spin); 450 n = rb_next(&rgd->rd_node); 451 if (n == NULL) 452 n = rb_first(&sdp->sd_rindex_tree); 453 454 if (unlikely(&rgd->rd_node == n)) { 455 spin_unlock(&sdp->sd_rindex_spin); 456 return NULL; 457 } 458 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 459 spin_unlock(&sdp->sd_rindex_spin); 460 return rgd; 461 } 462 463 void gfs2_free_clones(struct gfs2_rgrpd *rgd) 464 { 465 int x; 466 467 for (x = 0; x < rgd->rd_length; x++) { 468 struct gfs2_bitmap *bi = rgd->rd_bits + x; 469 kfree(bi->bi_clone); 470 bi->bi_clone = NULL; 471 } 472 } 473 474 /** 475 * gfs2_rs_alloc - make sure we have a reservation assigned to the inode 476 * @ip: the inode for this reservation 477 */ 478 int gfs2_rs_alloc(struct gfs2_inode *ip) 479 { 480 int error = 0; 481 struct gfs2_blkreserv *res; 482 483 if (ip->i_res) 484 return 0; 485 486 res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); 487 if (!res) 488 error = -ENOMEM; 489 490 down_write(&ip->i_rw_mutex); 491 if (ip->i_res) 492 kmem_cache_free(gfs2_rsrv_cachep, res); 493 else 494 ip->i_res = res; 495 up_write(&ip->i_rw_mutex); 496 return error; 497 } 498 499 static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) 500 { 501 gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", 502 rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, 503 rs->rs_free); 504 } 505 506 /** 507 * __rs_deltree - remove a multi-block reservation from the rgd tree 508 * @rs: The reservation to remove 509 * 510 */ 511 static void __rs_deltree(struct gfs2_blkreserv *rs) 512 { 513 struct gfs2_rgrpd *rgd; 514 515 if (!gfs2_rs_active(rs)) 516 return; 517 518 rgd = rs->rs_rgd; 519 /* We can't do this: The reason is that when the rgrp is invalidated, 520 it's in the "middle" of acquiring the glock, but the HOLDER bit 521 isn't set yet: 522 BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/ 523 trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL); 524 525 if (!RB_EMPTY_ROOT(&rgd->rd_rstree)) 526 rb_erase(&rs->rs_node, &rgd->rd_rstree); 527 BUG_ON(!rgd->rd_rs_cnt); 528 rgd->rd_rs_cnt--; 529 530 if (rs->rs_free) { 531 /* return reserved blocks to the rgrp and the ip */ 532 BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); 533 rs->rs_rgd->rd_reserved -= rs->rs_free; 534 rs->rs_free = 0; 535 clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); 536 smp_mb__after_clear_bit(); 537 } 538 /* We can't change any of the step 1 or step 2 components of the rs. 539 E.g. We can't set rs_rgd to NULL because the rgd glock is held and 540 dequeued through this pointer. 541 Can't: atomic_set(&rs->rs_sizehint, 0); 542 Can't: rs->rs_requested = 0; 543 Can't: rs->rs_rgd = NULL;*/ 544 rs->rs_bi = NULL; 545 rs->rs_biblk = 0; 546 } 547 548 /** 549 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree 550 * @rs: The reservation to remove 551 * 552 */ 553 void gfs2_rs_deltree(struct gfs2_blkreserv *rs) 554 { 555 struct gfs2_rgrpd *rgd; 556 557 if (!gfs2_rs_active(rs)) 558 return; 559 560 rgd = rs->rs_rgd; 561 spin_lock(&rgd->rd_rsspin); 562 __rs_deltree(rs); 563 spin_unlock(&rgd->rd_rsspin); 564 } 565 566 /** 567 * gfs2_rs_delete - delete a multi-block reservation 568 * @ip: The inode for this reservation 569 * 570 */ 571 void gfs2_rs_delete(struct gfs2_inode *ip) 572 { 573 down_write(&ip->i_rw_mutex); 574 if (ip->i_res) { 575 gfs2_rs_deltree(ip->i_res); 576 trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); 577 BUG_ON(ip->i_res->rs_free); 578 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); 579 ip->i_res = NULL; 580 } 581 up_write(&ip->i_rw_mutex); 582 } 583 584 /** 585 * return_all_reservations - return all reserved blocks back to the rgrp. 586 * @rgd: the rgrp that needs its space back 587 * 588 * We previously reserved a bunch of blocks for allocation. Now we need to 589 * give them back. This leave the reservation structures in tact, but removes 590 * all of their corresponding "no-fly zones". 591 */ 592 static void return_all_reservations(struct gfs2_rgrpd *rgd) 593 { 594 struct rb_node *n; 595 struct gfs2_blkreserv *rs; 596 597 spin_lock(&rgd->rd_rsspin); 598 while ((n = rb_first(&rgd->rd_rstree))) { 599 rs = rb_entry(n, struct gfs2_blkreserv, rs_node); 600 __rs_deltree(rs); 601 } 602 spin_unlock(&rgd->rd_rsspin); 603 } 604 605 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) 606 { 607 struct rb_node *n; 608 struct gfs2_rgrpd *rgd; 609 struct gfs2_glock *gl; 610 611 while ((n = rb_first(&sdp->sd_rindex_tree))) { 612 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 613 gl = rgd->rd_gl; 614 615 rb_erase(n, &sdp->sd_rindex_tree); 616 617 if (gl) { 618 spin_lock(&gl->gl_spin); 619 gl->gl_object = NULL; 620 spin_unlock(&gl->gl_spin); 621 gfs2_glock_add_to_lru(gl); 622 gfs2_glock_put(gl); 623 } 624 625 gfs2_free_clones(rgd); 626 kfree(rgd->rd_bits); 627 return_all_reservations(rgd); 628 kmem_cache_free(gfs2_rgrpd_cachep, rgd); 629 } 630 } 631 632 static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) 633 { 634 printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); 635 printk(KERN_INFO " ri_length = %u\n", rgd->rd_length); 636 printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); 637 printk(KERN_INFO " ri_data = %u\n", rgd->rd_data); 638 printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes); 639 } 640 641 /** 642 * gfs2_compute_bitstructs - Compute the bitmap sizes 643 * @rgd: The resource group descriptor 644 * 645 * Calculates bitmap descriptors, one for each block that contains bitmap data 646 * 647 * Returns: errno 648 */ 649 650 static int compute_bitstructs(struct gfs2_rgrpd *rgd) 651 { 652 struct gfs2_sbd *sdp = rgd->rd_sbd; 653 struct gfs2_bitmap *bi; 654 u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */ 655 u32 bytes_left, bytes; 656 int x; 657 658 if (!length) 659 return -EINVAL; 660 661 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS); 662 if (!rgd->rd_bits) 663 return -ENOMEM; 664 665 bytes_left = rgd->rd_bitbytes; 666 667 for (x = 0; x < length; x++) { 668 bi = rgd->rd_bits + x; 669 670 bi->bi_flags = 0; 671 /* small rgrp; bitmap stored completely in header block */ 672 if (length == 1) { 673 bytes = bytes_left; 674 bi->bi_offset = sizeof(struct gfs2_rgrp); 675 bi->bi_start = 0; 676 bi->bi_len = bytes; 677 /* header block */ 678 } else if (x == 0) { 679 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp); 680 bi->bi_offset = sizeof(struct gfs2_rgrp); 681 bi->bi_start = 0; 682 bi->bi_len = bytes; 683 /* last block */ 684 } else if (x + 1 == length) { 685 bytes = bytes_left; 686 bi->bi_offset = sizeof(struct gfs2_meta_header); 687 bi->bi_start = rgd->rd_bitbytes - bytes_left; 688 bi->bi_len = bytes; 689 /* other blocks */ 690 } else { 691 bytes = sdp->sd_sb.sb_bsize - 692 sizeof(struct gfs2_meta_header); 693 bi->bi_offset = sizeof(struct gfs2_meta_header); 694 bi->bi_start = rgd->rd_bitbytes - bytes_left; 695 bi->bi_len = bytes; 696 } 697 698 bytes_left -= bytes; 699 } 700 701 if (bytes_left) { 702 gfs2_consist_rgrpd(rgd); 703 return -EIO; 704 } 705 bi = rgd->rd_bits + (length - 1); 706 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) { 707 if (gfs2_consist_rgrpd(rgd)) { 708 gfs2_rindex_print(rgd); 709 fs_err(sdp, "start=%u len=%u offset=%u\n", 710 bi->bi_start, bi->bi_len, bi->bi_offset); 711 } 712 return -EIO; 713 } 714 715 return 0; 716 } 717 718 /** 719 * gfs2_ri_total - Total up the file system space, according to the rindex. 720 * @sdp: the filesystem 721 * 722 */ 723 u64 gfs2_ri_total(struct gfs2_sbd *sdp) 724 { 725 u64 total_data = 0; 726 struct inode *inode = sdp->sd_rindex; 727 struct gfs2_inode *ip = GFS2_I(inode); 728 char buf[sizeof(struct gfs2_rindex)]; 729 int error, rgrps; 730 731 for (rgrps = 0;; rgrps++) { 732 loff_t pos = rgrps * sizeof(struct gfs2_rindex); 733 734 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode)) 735 break; 736 error = gfs2_internal_read(ip, buf, &pos, 737 sizeof(struct gfs2_rindex)); 738 if (error != sizeof(struct gfs2_rindex)) 739 break; 740 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data); 741 } 742 return total_data; 743 } 744 745 static int rgd_insert(struct gfs2_rgrpd *rgd) 746 { 747 struct gfs2_sbd *sdp = rgd->rd_sbd; 748 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL; 749 750 /* Figure out where to put new node */ 751 while (*newn) { 752 struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd, 753 rd_node); 754 755 parent = *newn; 756 if (rgd->rd_addr < cur->rd_addr) 757 newn = &((*newn)->rb_left); 758 else if (rgd->rd_addr > cur->rd_addr) 759 newn = &((*newn)->rb_right); 760 else 761 return -EEXIST; 762 } 763 764 rb_link_node(&rgd->rd_node, parent, newn); 765 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree); 766 sdp->sd_rgrps++; 767 return 0; 768 } 769 770 /** 771 * read_rindex_entry - Pull in a new resource index entry from the disk 772 * @ip: Pointer to the rindex inode 773 * 774 * Returns: 0 on success, > 0 on EOF, error code otherwise 775 */ 776 777 static int read_rindex_entry(struct gfs2_inode *ip) 778 { 779 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 780 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); 781 struct gfs2_rindex buf; 782 int error; 783 struct gfs2_rgrpd *rgd; 784 785 if (pos >= i_size_read(&ip->i_inode)) 786 return 1; 787 788 error = gfs2_internal_read(ip, (char *)&buf, &pos, 789 sizeof(struct gfs2_rindex)); 790 791 if (error != sizeof(struct gfs2_rindex)) 792 return (error == 0) ? 1 : error; 793 794 rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS); 795 error = -ENOMEM; 796 if (!rgd) 797 return error; 798 799 rgd->rd_sbd = sdp; 800 rgd->rd_addr = be64_to_cpu(buf.ri_addr); 801 rgd->rd_length = be32_to_cpu(buf.ri_length); 802 rgd->rd_data0 = be64_to_cpu(buf.ri_data0); 803 rgd->rd_data = be32_to_cpu(buf.ri_data); 804 rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); 805 spin_lock_init(&rgd->rd_rsspin); 806 807 error = compute_bitstructs(rgd); 808 if (error) 809 goto fail; 810 811 error = gfs2_glock_get(sdp, rgd->rd_addr, 812 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); 813 if (error) 814 goto fail; 815 816 rgd->rd_gl->gl_object = rgd; 817 rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; 818 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 819 if (rgd->rd_data > sdp->sd_max_rg_data) 820 sdp->sd_max_rg_data = rgd->rd_data; 821 spin_lock(&sdp->sd_rindex_spin); 822 error = rgd_insert(rgd); 823 spin_unlock(&sdp->sd_rindex_spin); 824 if (!error) 825 return 0; 826 827 error = 0; /* someone else read in the rgrp; free it and ignore it */ 828 gfs2_glock_put(rgd->rd_gl); 829 830 fail: 831 kfree(rgd->rd_bits); 832 kmem_cache_free(gfs2_rgrpd_cachep, rgd); 833 return error; 834 } 835 836 /** 837 * gfs2_ri_update - Pull in a new resource index from the disk 838 * @ip: pointer to the rindex inode 839 * 840 * Returns: 0 on successful update, error code otherwise 841 */ 842 843 static int gfs2_ri_update(struct gfs2_inode *ip) 844 { 845 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 846 int error; 847 848 do { 849 error = read_rindex_entry(ip); 850 } while (error == 0); 851 852 if (error < 0) 853 return error; 854 855 sdp->sd_rindex_uptodate = 1; 856 return 0; 857 } 858 859 /** 860 * gfs2_rindex_update - Update the rindex if required 861 * @sdp: The GFS2 superblock 862 * 863 * We grab a lock on the rindex inode to make sure that it doesn't 864 * change whilst we are performing an operation. We keep this lock 865 * for quite long periods of time compared to other locks. This 866 * doesn't matter, since it is shared and it is very, very rarely 867 * accessed in the exclusive mode (i.e. only when expanding the filesystem). 868 * 869 * This makes sure that we're using the latest copy of the resource index 870 * special file, which might have been updated if someone expanded the 871 * filesystem (via gfs2_grow utility), which adds new resource groups. 872 * 873 * Returns: 0 on succeess, error code otherwise 874 */ 875 876 int gfs2_rindex_update(struct gfs2_sbd *sdp) 877 { 878 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); 879 struct gfs2_glock *gl = ip->i_gl; 880 struct gfs2_holder ri_gh; 881 int error = 0; 882 int unlock_required = 0; 883 884 /* Read new copy from disk if we don't have the latest */ 885 if (!sdp->sd_rindex_uptodate) { 886 if (!gfs2_glock_is_locked_by_me(gl)) { 887 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh); 888 if (error) 889 return error; 890 unlock_required = 1; 891 } 892 if (!sdp->sd_rindex_uptodate) 893 error = gfs2_ri_update(ip); 894 if (unlock_required) 895 gfs2_glock_dq_uninit(&ri_gh); 896 } 897 898 return error; 899 } 900 901 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) 902 { 903 const struct gfs2_rgrp *str = buf; 904 u32 rg_flags; 905 906 rg_flags = be32_to_cpu(str->rg_flags); 907 rg_flags &= ~GFS2_RDF_MASK; 908 rgd->rd_flags &= GFS2_RDF_MASK; 909 rgd->rd_flags |= rg_flags; 910 rgd->rd_free = be32_to_cpu(str->rg_free); 911 rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); 912 rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); 913 } 914 915 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) 916 { 917 struct gfs2_rgrp *str = buf; 918 919 str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK); 920 str->rg_free = cpu_to_be32(rgd->rd_free); 921 str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes); 922 str->__pad = cpu_to_be32(0); 923 str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration); 924 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); 925 } 926 927 static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) 928 { 929 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; 930 struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data; 931 932 if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free || 933 rgl->rl_dinodes != str->rg_dinodes || 934 rgl->rl_igeneration != str->rg_igeneration) 935 return 0; 936 return 1; 937 } 938 939 static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) 940 { 941 const struct gfs2_rgrp *str = buf; 942 943 rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); 944 rgl->rl_flags = str->rg_flags; 945 rgl->rl_free = str->rg_free; 946 rgl->rl_dinodes = str->rg_dinodes; 947 rgl->rl_igeneration = str->rg_igeneration; 948 rgl->__pad = 0UL; 949 } 950 951 static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change) 952 { 953 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; 954 u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change; 955 rgl->rl_unlinked = cpu_to_be32(unlinked); 956 } 957 958 static u32 count_unlinked(struct gfs2_rgrpd *rgd) 959 { 960 struct gfs2_bitmap *bi; 961 const u32 length = rgd->rd_length; 962 const u8 *buffer = NULL; 963 u32 i, goal, count = 0; 964 965 for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) { 966 goal = 0; 967 buffer = bi->bi_bh->b_data + bi->bi_offset; 968 WARN_ON(!buffer_uptodate(bi->bi_bh)); 969 while (goal < bi->bi_len * GFS2_NBBY) { 970 goal = gfs2_bitfit(buffer, bi->bi_len, goal, 971 GFS2_BLKST_UNLINKED); 972 if (goal == BFITNOENT) 973 break; 974 count++; 975 goal++; 976 } 977 } 978 979 return count; 980 } 981 982 983 /** 984 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps 985 * @rgd: the struct gfs2_rgrpd describing the RG to read in 986 * 987 * Read in all of a Resource Group's header and bitmap blocks. 988 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. 989 * 990 * Returns: errno 991 */ 992 993 int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) 994 { 995 struct gfs2_sbd *sdp = rgd->rd_sbd; 996 struct gfs2_glock *gl = rgd->rd_gl; 997 unsigned int length = rgd->rd_length; 998 struct gfs2_bitmap *bi; 999 unsigned int x, y; 1000 int error; 1001 1002 if (rgd->rd_bits[0].bi_bh != NULL) 1003 return 0; 1004 1005 for (x = 0; x < length; x++) { 1006 bi = rgd->rd_bits + x; 1007 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); 1008 if (error) 1009 goto fail; 1010 } 1011 1012 for (y = length; y--;) { 1013 bi = rgd->rd_bits + y; 1014 error = gfs2_meta_wait(sdp, bi->bi_bh); 1015 if (error) 1016 goto fail; 1017 if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB : 1018 GFS2_METATYPE_RG)) { 1019 error = -EIO; 1020 goto fail; 1021 } 1022 } 1023 1024 if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { 1025 for (x = 0; x < length; x++) 1026 clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); 1027 gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); 1028 rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); 1029 rgd->rd_free_clone = rgd->rd_free; 1030 } 1031 if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { 1032 rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); 1033 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, 1034 rgd->rd_bits[0].bi_bh->b_data); 1035 } 1036 else if (sdp->sd_args.ar_rgrplvb) { 1037 if (!gfs2_rgrp_lvb_valid(rgd)){ 1038 gfs2_consist_rgrpd(rgd); 1039 error = -EIO; 1040 goto fail; 1041 } 1042 if (rgd->rd_rgl->rl_unlinked == 0) 1043 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1044 } 1045 return 0; 1046 1047 fail: 1048 while (x--) { 1049 bi = rgd->rd_bits + x; 1050 brelse(bi->bi_bh); 1051 bi->bi_bh = NULL; 1052 gfs2_assert_warn(sdp, !bi->bi_clone); 1053 } 1054 1055 return error; 1056 } 1057 1058 int update_rgrp_lvb(struct gfs2_rgrpd *rgd) 1059 { 1060 u32 rl_flags; 1061 1062 if (rgd->rd_flags & GFS2_RDF_UPTODATE) 1063 return 0; 1064 1065 if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) 1066 return gfs2_rgrp_bh_get(rgd); 1067 1068 rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); 1069 rl_flags &= ~GFS2_RDF_MASK; 1070 rgd->rd_flags &= GFS2_RDF_MASK; 1071 rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); 1072 if (rgd->rd_rgl->rl_unlinked == 0) 1073 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1074 rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); 1075 rgd->rd_free_clone = rgd->rd_free; 1076 rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); 1077 rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration); 1078 return 0; 1079 } 1080 1081 int gfs2_rgrp_go_lock(struct gfs2_holder *gh) 1082 { 1083 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; 1084 struct gfs2_sbd *sdp = rgd->rd_sbd; 1085 1086 if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) 1087 return 0; 1088 return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object); 1089 } 1090 1091 /** 1092 * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() 1093 * @gh: The glock holder for the resource group 1094 * 1095 */ 1096 1097 void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) 1098 { 1099 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; 1100 int x, length = rgd->rd_length; 1101 1102 for (x = 0; x < length; x++) { 1103 struct gfs2_bitmap *bi = rgd->rd_bits + x; 1104 if (bi->bi_bh) { 1105 brelse(bi->bi_bh); 1106 bi->bi_bh = NULL; 1107 } 1108 } 1109 1110 } 1111 1112 int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 1113 struct buffer_head *bh, 1114 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed) 1115 { 1116 struct super_block *sb = sdp->sd_vfs; 1117 struct block_device *bdev = sb->s_bdev; 1118 const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize / 1119 bdev_logical_block_size(sb->s_bdev); 1120 u64 blk; 1121 sector_t start = 0; 1122 sector_t nr_sects = 0; 1123 int rv; 1124 unsigned int x; 1125 u32 trimmed = 0; 1126 u8 diff; 1127 1128 for (x = 0; x < bi->bi_len; x++) { 1129 const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data; 1130 clone += bi->bi_offset; 1131 clone += x; 1132 if (bh) { 1133 const u8 *orig = bh->b_data + bi->bi_offset + x; 1134 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); 1135 } else { 1136 diff = ~(*clone | (*clone >> 1)); 1137 } 1138 diff &= 0x55; 1139 if (diff == 0) 1140 continue; 1141 blk = offset + ((bi->bi_start + x) * GFS2_NBBY); 1142 blk *= sects_per_blk; /* convert to sectors */ 1143 while(diff) { 1144 if (diff & 1) { 1145 if (nr_sects == 0) 1146 goto start_new_extent; 1147 if ((start + nr_sects) != blk) { 1148 if (nr_sects >= minlen) { 1149 rv = blkdev_issue_discard(bdev, 1150 start, nr_sects, 1151 GFP_NOFS, 0); 1152 if (rv) 1153 goto fail; 1154 trimmed += nr_sects; 1155 } 1156 nr_sects = 0; 1157 start_new_extent: 1158 start = blk; 1159 } 1160 nr_sects += sects_per_blk; 1161 } 1162 diff >>= 2; 1163 blk += sects_per_blk; 1164 } 1165 } 1166 if (nr_sects >= minlen) { 1167 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); 1168 if (rv) 1169 goto fail; 1170 trimmed += nr_sects; 1171 } 1172 if (ptrimmed) 1173 *ptrimmed = trimmed; 1174 return 0; 1175 1176 fail: 1177 if (sdp->sd_args.ar_discard) 1178 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv); 1179 sdp->sd_args.ar_discard = 0; 1180 return -EIO; 1181 } 1182 1183 /** 1184 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem 1185 * @filp: Any file on the filesystem 1186 * @argp: Pointer to the arguments (also used to pass result) 1187 * 1188 * Returns: 0 on success, otherwise error code 1189 */ 1190 1191 int gfs2_fitrim(struct file *filp, void __user *argp) 1192 { 1193 struct inode *inode = filp->f_dentry->d_inode; 1194 struct gfs2_sbd *sdp = GFS2_SB(inode); 1195 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev); 1196 struct buffer_head *bh; 1197 struct gfs2_rgrpd *rgd; 1198 struct gfs2_rgrpd *rgd_end; 1199 struct gfs2_holder gh; 1200 struct fstrim_range r; 1201 int ret = 0; 1202 u64 amt; 1203 u64 trimmed = 0; 1204 unsigned int x; 1205 1206 if (!capable(CAP_SYS_ADMIN)) 1207 return -EPERM; 1208 1209 if (!blk_queue_discard(q)) 1210 return -EOPNOTSUPP; 1211 1212 if (argp == NULL) { 1213 r.start = 0; 1214 r.len = ULLONG_MAX; 1215 r.minlen = 0; 1216 } else if (copy_from_user(&r, argp, sizeof(r))) 1217 return -EFAULT; 1218 1219 ret = gfs2_rindex_update(sdp); 1220 if (ret) 1221 return ret; 1222 1223 rgd = gfs2_blk2rgrpd(sdp, r.start, 0); 1224 rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0); 1225 1226 while (1) { 1227 1228 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); 1229 if (ret) 1230 goto out; 1231 1232 if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) { 1233 /* Trim each bitmap in the rgrp */ 1234 for (x = 0; x < rgd->rd_length; x++) { 1235 struct gfs2_bitmap *bi = rgd->rd_bits + x; 1236 ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt); 1237 if (ret) { 1238 gfs2_glock_dq_uninit(&gh); 1239 goto out; 1240 } 1241 trimmed += amt; 1242 } 1243 1244 /* Mark rgrp as having been trimmed */ 1245 ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0); 1246 if (ret == 0) { 1247 bh = rgd->rd_bits[0].bi_bh; 1248 rgd->rd_flags |= GFS2_RGF_TRIMMED; 1249 gfs2_trans_add_bh(rgd->rd_gl, bh, 1); 1250 gfs2_rgrp_out(rgd, bh->b_data); 1251 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); 1252 gfs2_trans_end(sdp); 1253 } 1254 } 1255 gfs2_glock_dq_uninit(&gh); 1256 1257 if (rgd == rgd_end) 1258 break; 1259 1260 rgd = gfs2_rgrpd_get_next(rgd); 1261 } 1262 1263 out: 1264 r.len = trimmed << 9; 1265 if (argp && copy_to_user(argp, &r, sizeof(r))) 1266 return -EFAULT; 1267 1268 return ret; 1269 } 1270 1271 /** 1272 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree 1273 * @bi: the bitmap with the blocks 1274 * @ip: the inode structure 1275 * @biblk: the 32-bit block number relative to the start of the bitmap 1276 * @amount: the number of blocks to reserve 1277 * 1278 * Returns: NULL - reservation was already taken, so not inserted 1279 * pointer to the inserted reservation 1280 */ 1281 static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, 1282 struct gfs2_inode *ip, u32 biblk, 1283 int amount) 1284 { 1285 struct rb_node **newn, *parent = NULL; 1286 int rc; 1287 struct gfs2_blkreserv *rs = ip->i_res; 1288 struct gfs2_rgrpd *rgd = rs->rs_rgd; 1289 u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; 1290 1291 spin_lock(&rgd->rd_rsspin); 1292 newn = &rgd->rd_rstree.rb_node; 1293 BUG_ON(!ip->i_res); 1294 BUG_ON(gfs2_rs_active(rs)); 1295 /* Figure out where to put new node */ 1296 /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ 1297 while (*newn) { 1298 struct gfs2_blkreserv *cur = 1299 rb_entry(*newn, struct gfs2_blkreserv, rs_node); 1300 1301 parent = *newn; 1302 rc = rs_cmp(fsblock, amount, cur); 1303 if (rc > 0) 1304 newn = &((*newn)->rb_right); 1305 else if (rc < 0) 1306 newn = &((*newn)->rb_left); 1307 else { 1308 spin_unlock(&rgd->rd_rsspin); 1309 return NULL; /* reservation already in use */ 1310 } 1311 } 1312 1313 /* Do our reservation work */ 1314 rs = ip->i_res; 1315 rs->rs_free = amount; 1316 rs->rs_biblk = biblk; 1317 rs->rs_bi = bi; 1318 rb_link_node(&rs->rs_node, parent, newn); 1319 rb_insert_color(&rs->rs_node, &rgd->rd_rstree); 1320 1321 /* Do our inode accounting for the reservation */ 1322 /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ 1323 1324 /* Do our rgrp accounting for the reservation */ 1325 rgd->rd_reserved += amount; /* blocks reserved */ 1326 rgd->rd_rs_cnt++; /* number of in-tree reservations */ 1327 spin_unlock(&rgd->rd_rsspin); 1328 trace_gfs2_rs(ip, rs, TRACE_RS_INSERT); 1329 return rs; 1330 } 1331 1332 /** 1333 * unclaimed_blocks - return number of blocks that aren't spoken for 1334 */ 1335 static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) 1336 { 1337 return rgd->rd_free_clone - rgd->rd_reserved; 1338 } 1339 1340 /** 1341 * rg_mblk_search - find a group of multiple free blocks 1342 * @rgd: the resource group descriptor 1343 * @rs: the block reservation 1344 * @ip: pointer to the inode for which we're reserving blocks 1345 * 1346 * This is very similar to rgblk_search, except we're looking for whole 1347 * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing 1348 * on aligned dwords for speed's sake. 1349 * 1350 * Returns: 0 if successful or BFITNOENT if there isn't enough free space 1351 */ 1352 1353 static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 1354 { 1355 struct gfs2_bitmap *bi = rgd->rd_bits; 1356 const u32 length = rgd->rd_length; 1357 u32 blk; 1358 unsigned int buf, x, search_bytes; 1359 u8 *buffer = NULL; 1360 u8 *ptr, *end, *nonzero; 1361 u32 goal, rsv_bytes; 1362 struct gfs2_blkreserv *rs; 1363 u32 best_rs_bytes, unclaimed; 1364 int best_rs_blocks; 1365 1366 /* Find bitmap block that contains bits for goal block */ 1367 if (rgrp_contains_block(rgd, ip->i_goal)) 1368 goal = ip->i_goal - rgd->rd_data0; 1369 else 1370 goal = rgd->rd_last_alloc; 1371 for (buf = 0; buf < length; buf++) { 1372 bi = rgd->rd_bits + buf; 1373 /* Convert scope of "goal" from rgrp-wide to within 1374 found bit block */ 1375 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { 1376 goal -= bi->bi_start * GFS2_NBBY; 1377 goto do_search; 1378 } 1379 } 1380 buf = 0; 1381 goal = 0; 1382 1383 do_search: 1384 best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint), 1385 (RGRP_RSRV_MINBLKS * rgd->rd_length)); 1386 best_rs_bytes = (best_rs_blocks * 1387 (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) / 1388 GFS2_NBBY; /* 1 + is for our not-yet-created reservation */ 1389 best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64)); 1390 unclaimed = unclaimed_blocks(rgd); 1391 if (best_rs_bytes * GFS2_NBBY > unclaimed) 1392 best_rs_bytes = unclaimed >> GFS2_BIT_SIZE; 1393 1394 for (x = 0; x <= length; x++) { 1395 bi = rgd->rd_bits + buf; 1396 1397 if (test_bit(GBF_FULL, &bi->bi_flags)) 1398 goto skip; 1399 1400 WARN_ON(!buffer_uptodate(bi->bi_bh)); 1401 if (bi->bi_clone) 1402 buffer = bi->bi_clone + bi->bi_offset; 1403 else 1404 buffer = bi->bi_bh->b_data + bi->bi_offset; 1405 1406 /* We have to keep the reservations aligned on u64 boundaries 1407 otherwise we could get situations where a byte can't be 1408 used because it's after a reservation, but a free bit still 1409 is within the reservation's area. */ 1410 ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64)); 1411 end = (buffer + bi->bi_len); 1412 while (ptr < end) { 1413 rsv_bytes = 0; 1414 if ((ptr + best_rs_bytes) <= end) 1415 search_bytes = best_rs_bytes; 1416 else 1417 search_bytes = end - ptr; 1418 BUG_ON(!search_bytes); 1419 nonzero = memchr_inv(ptr, 0, search_bytes); 1420 /* If the lot is all zeroes, reserve the whole size. If 1421 there's enough zeroes to satisfy the request, use 1422 what we can. If there's not enough, keep looking. */ 1423 if (nonzero == NULL) 1424 rsv_bytes = search_bytes; 1425 else if ((nonzero - ptr) * GFS2_NBBY >= 1426 ip->i_res->rs_requested) 1427 rsv_bytes = (nonzero - ptr); 1428 1429 if (rsv_bytes) { 1430 blk = ((ptr - buffer) * GFS2_NBBY); 1431 BUG_ON(blk >= bi->bi_len * GFS2_NBBY); 1432 rs = rs_insert(bi, ip, blk, 1433 rsv_bytes * GFS2_NBBY); 1434 if (IS_ERR(rs)) 1435 return PTR_ERR(rs); 1436 if (rs) 1437 return 0; 1438 } 1439 ptr += ALIGN(search_bytes, sizeof(u64)); 1440 } 1441 skip: 1442 /* Try next bitmap block (wrap back to rgrp header 1443 if at end) */ 1444 buf++; 1445 buf %= length; 1446 goal = 0; 1447 } 1448 1449 return BFITNOENT; 1450 } 1451 1452 /** 1453 * try_rgrp_fit - See if a given reservation will fit in a given RG 1454 * @rgd: the RG data 1455 * @ip: the inode 1456 * 1457 * If there's room for the requested blocks to be allocated from the RG: 1458 * This will try to get a multi-block reservation first, and if that doesn't 1459 * fit, it will take what it can. 1460 * 1461 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) 1462 */ 1463 1464 static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 1465 { 1466 struct gfs2_blkreserv *rs = ip->i_res; 1467 1468 if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) 1469 return 0; 1470 /* Look for a multi-block reservation. */ 1471 if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && 1472 rg_mblk_search(rgd, ip) != BFITNOENT) 1473 return 1; 1474 if (unclaimed_blocks(rgd) >= rs->rs_requested) 1475 return 1; 1476 1477 return 0; 1478 } 1479 1480 /** 1481 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes 1482 * @rgd: The rgrp 1483 * @last_unlinked: block address of the last dinode we unlinked 1484 * @skip: block address we should explicitly not unlink 1485 * 1486 * Returns: 0 if no error 1487 * The inode, if one has been found, in inode. 1488 */ 1489 1490 static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) 1491 { 1492 u32 goal = 0, block; 1493 u64 no_addr; 1494 struct gfs2_sbd *sdp = rgd->rd_sbd; 1495 struct gfs2_glock *gl; 1496 struct gfs2_inode *ip; 1497 int error; 1498 int found = 0; 1499 struct gfs2_bitmap *bi; 1500 1501 while (goal < rgd->rd_data) { 1502 down_write(&sdp->sd_log_flush_lock); 1503 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi); 1504 up_write(&sdp->sd_log_flush_lock); 1505 if (block == BFITNOENT) 1506 break; 1507 1508 block = gfs2_bi2rgd_blk(bi, block); 1509 /* rgblk_search can return a block < goal, so we need to 1510 keep it marching forward. */ 1511 no_addr = block + rgd->rd_data0; 1512 goal = max(block + 1, goal + 1); 1513 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) 1514 continue; 1515 if (no_addr == skip) 1516 continue; 1517 *last_unlinked = no_addr; 1518 1519 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl); 1520 if (error) 1521 continue; 1522 1523 /* If the inode is already in cache, we can ignore it here 1524 * because the existing inode disposal code will deal with 1525 * it when all refs have gone away. Accessing gl_object like 1526 * this is not safe in general. Here it is ok because we do 1527 * not dereference the pointer, and we only need an approx 1528 * answer to whether it is NULL or not. 1529 */ 1530 ip = gl->gl_object; 1531 1532 if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) 1533 gfs2_glock_put(gl); 1534 else 1535 found++; 1536 1537 /* Limit reclaim to sensible number of tasks */ 1538 if (found > NR_CPUS) 1539 return; 1540 } 1541 1542 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1543 return; 1544 } 1545 1546 /** 1547 * gfs2_inplace_reserve - Reserve space in the filesystem 1548 * @ip: the inode to reserve space for 1549 * @requested: the number of blocks to be reserved 1550 * 1551 * Returns: errno 1552 */ 1553 1554 int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) 1555 { 1556 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1557 struct gfs2_rgrpd *begin = NULL; 1558 struct gfs2_blkreserv *rs = ip->i_res; 1559 int error = 0, rg_locked, flags = LM_FLAG_TRY; 1560 u64 last_unlinked = NO_BLOCK; 1561 int loops = 0; 1562 1563 if (sdp->sd_args.ar_rgrplvb) 1564 flags |= GL_SKIP; 1565 rs->rs_requested = requested; 1566 if (gfs2_assert_warn(sdp, requested)) { 1567 error = -EINVAL; 1568 goto out; 1569 } 1570 if (gfs2_rs_active(rs)) { 1571 begin = rs->rs_rgd; 1572 flags = 0; /* Yoda: Do or do not. There is no try */ 1573 } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { 1574 rs->rs_rgd = begin = ip->i_rgd; 1575 } else { 1576 rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); 1577 } 1578 if (rs->rs_rgd == NULL) 1579 return -EBADSLT; 1580 1581 while (loops < 3) { 1582 rg_locked = 0; 1583 1584 if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { 1585 rg_locked = 1; 1586 error = 0; 1587 } else if (!loops && !gfs2_rs_active(rs) && 1588 rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { 1589 /* If the rgrp already is maxed out for contenders, 1590 we can eliminate it as a "first pass" without even 1591 requesting the rgrp glock. */ 1592 error = GLR_TRYFAILED; 1593 } else { 1594 error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, 1595 LM_ST_EXCLUSIVE, flags, 1596 &rs->rs_rgd_gh); 1597 if (!error && sdp->sd_args.ar_rgrplvb) { 1598 error = update_rgrp_lvb(rs->rs_rgd); 1599 if (error) { 1600 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1601 return error; 1602 } 1603 } 1604 } 1605 switch (error) { 1606 case 0: 1607 if (gfs2_rs_active(rs)) { 1608 if (unclaimed_blocks(rs->rs_rgd) + 1609 rs->rs_free >= rs->rs_requested) { 1610 ip->i_rgd = rs->rs_rgd; 1611 return 0; 1612 } 1613 /* We have a multi-block reservation, but the 1614 rgrp doesn't have enough free blocks to 1615 satisfy the request. Free the reservation 1616 and look for a suitable rgrp. */ 1617 gfs2_rs_deltree(rs); 1618 } 1619 if (try_rgrp_fit(rs->rs_rgd, ip)) { 1620 if (sdp->sd_args.ar_rgrplvb) 1621 gfs2_rgrp_bh_get(rs->rs_rgd); 1622 ip->i_rgd = rs->rs_rgd; 1623 return 0; 1624 } 1625 if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) { 1626 if (sdp->sd_args.ar_rgrplvb) 1627 gfs2_rgrp_bh_get(rs->rs_rgd); 1628 try_rgrp_unlink(rs->rs_rgd, &last_unlinked, 1629 ip->i_no_addr); 1630 } 1631 if (!rg_locked) 1632 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1633 /* fall through */ 1634 case GLR_TRYFAILED: 1635 rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd); 1636 rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */ 1637 if (rs->rs_rgd != begin) /* If we didn't wrap */ 1638 break; 1639 1640 flags &= ~LM_FLAG_TRY; 1641 loops++; 1642 /* Check that fs hasn't grown if writing to rindex */ 1643 if (ip == GFS2_I(sdp->sd_rindex) && 1644 !sdp->sd_rindex_uptodate) { 1645 error = gfs2_ri_update(ip); 1646 if (error) 1647 goto out; 1648 } else if (loops == 2) 1649 /* Flushing the log may release space */ 1650 gfs2_log_flush(sdp, NULL); 1651 break; 1652 default: 1653 goto out; 1654 } 1655 } 1656 error = -ENOSPC; 1657 1658 out: 1659 if (error) 1660 rs->rs_requested = 0; 1661 return error; 1662 } 1663 1664 /** 1665 * gfs2_inplace_release - release an inplace reservation 1666 * @ip: the inode the reservation was taken out on 1667 * 1668 * Release a reservation made by gfs2_inplace_reserve(). 1669 */ 1670 1671 void gfs2_inplace_release(struct gfs2_inode *ip) 1672 { 1673 struct gfs2_blkreserv *rs = ip->i_res; 1674 1675 if (!rs) 1676 return; 1677 1678 if (!rs->rs_free) 1679 gfs2_rs_deltree(rs); 1680 1681 if (rs->rs_rgd_gh.gh_gl) 1682 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1683 rs->rs_requested = 0; 1684 } 1685 1686 /** 1687 * gfs2_get_block_type - Check a block in a RG is of given type 1688 * @rgd: the resource group holding the block 1689 * @block: the block number 1690 * 1691 * Returns: The block type (GFS2_BLKST_*) 1692 */ 1693 1694 static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) 1695 { 1696 struct gfs2_bitmap *bi = NULL; 1697 u32 length, rgrp_block, buf_block; 1698 unsigned int buf; 1699 unsigned char type; 1700 1701 length = rgd->rd_length; 1702 rgrp_block = block - rgd->rd_data0; 1703 1704 for (buf = 0; buf < length; buf++) { 1705 bi = rgd->rd_bits + buf; 1706 if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY) 1707 break; 1708 } 1709 1710 gfs2_assert(rgd->rd_sbd, buf < length); 1711 buf_block = rgrp_block - bi->bi_start * GFS2_NBBY; 1712 1713 type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, 1714 bi->bi_len, buf_block); 1715 1716 return type; 1717 } 1718 1719 /** 1720 * rgblk_search - find a block in @state 1721 * @rgd: the resource group descriptor 1722 * @goal: the goal block within the RG (start here to search for avail block) 1723 * @state: GFS2_BLKST_XXX the before-allocation state to find 1724 * @rbi: address of the pointer to the bitmap containing the block found 1725 * 1726 * Walk rgrp's bitmap to find bits that represent a block in @state. 1727 * 1728 * This function never fails, because we wouldn't call it unless we 1729 * know (from reservation results, etc.) that a block is available. 1730 * 1731 * Scope of @goal is just within rgrp, not the whole filesystem. 1732 * Scope of @returned block is just within bitmap, not the whole filesystem. 1733 * 1734 * Returns: the block number found relative to the bitmap rbi 1735 */ 1736 1737 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state, 1738 struct gfs2_bitmap **rbi) 1739 { 1740 struct gfs2_bitmap *bi = NULL; 1741 const u32 length = rgd->rd_length; 1742 u32 biblk = BFITNOENT; 1743 unsigned int buf, x; 1744 const u8 *buffer = NULL; 1745 1746 *rbi = NULL; 1747 /* Find bitmap block that contains bits for goal block */ 1748 for (buf = 0; buf < length; buf++) { 1749 bi = rgd->rd_bits + buf; 1750 /* Convert scope of "goal" from rgrp-wide to within found bit block */ 1751 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { 1752 goal -= bi->bi_start * GFS2_NBBY; 1753 goto do_search; 1754 } 1755 } 1756 buf = 0; 1757 goal = 0; 1758 1759 do_search: 1760 /* Search (up to entire) bitmap in this rgrp for allocatable block. 1761 "x <= length", instead of "x < length", because we typically start 1762 the search in the middle of a bit block, but if we can't find an 1763 allocatable block anywhere else, we want to be able wrap around and 1764 search in the first part of our first-searched bit block. */ 1765 for (x = 0; x <= length; x++) { 1766 bi = rgd->rd_bits + buf; 1767 1768 if (test_bit(GBF_FULL, &bi->bi_flags) && 1769 (state == GFS2_BLKST_FREE)) 1770 goto skip; 1771 1772 /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone 1773 bitmaps, so we must search the originals for that. */ 1774 buffer = bi->bi_bh->b_data + bi->bi_offset; 1775 WARN_ON(!buffer_uptodate(bi->bi_bh)); 1776 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) 1777 buffer = bi->bi_clone + bi->bi_offset; 1778 1779 while (1) { 1780 struct gfs2_blkreserv *rs; 1781 u32 rgblk; 1782 1783 biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); 1784 if (biblk == BFITNOENT) 1785 break; 1786 /* Check if this block is reserved() */ 1787 rgblk = gfs2_bi2rgd_blk(bi, biblk); 1788 rs = rs_find(rgd, rgblk); 1789 if (rs == NULL) 1790 break; 1791 1792 BUG_ON(rs->rs_bi != bi); 1793 biblk = BFITNOENT; 1794 /* This should jump to the first block after the 1795 reservation. */ 1796 goal = rs->rs_biblk + rs->rs_free; 1797 if (goal >= bi->bi_len * GFS2_NBBY) 1798 break; 1799 } 1800 if (biblk != BFITNOENT) 1801 break; 1802 1803 if ((goal == 0) && (state == GFS2_BLKST_FREE)) 1804 set_bit(GBF_FULL, &bi->bi_flags); 1805 1806 /* Try next bitmap block (wrap back to rgrp header if at end) */ 1807 skip: 1808 buf++; 1809 buf %= length; 1810 goal = 0; 1811 } 1812 1813 if (biblk != BFITNOENT) 1814 *rbi = bi; 1815 1816 return biblk; 1817 } 1818 1819 /** 1820 * gfs2_alloc_extent - allocate an extent from a given bitmap 1821 * @rgd: the resource group descriptor 1822 * @bi: the bitmap within the rgrp 1823 * @blk: the block within the bitmap 1824 * @dinode: TRUE if the first block we allocate is for a dinode 1825 * @n: The extent length 1826 * 1827 * Add the found bitmap buffer to the transaction. 1828 * Set the found bits to @new_state to change block's allocation state. 1829 * Returns: starting block number of the extent (fs scope) 1830 */ 1831 static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, 1832 u32 blk, bool dinode, unsigned int *n) 1833 { 1834 const unsigned int elen = *n; 1835 u32 goal, rgblk; 1836 const u8 *buffer = NULL; 1837 struct gfs2_blkreserv *rs; 1838 1839 *n = 0; 1840 buffer = bi->bi_bh->b_data + bi->bi_offset; 1841 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1842 gfs2_setbit(rgd, bi->bi_clone, bi, blk, 1843 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 1844 (*n)++; 1845 goal = blk; 1846 while (*n < elen) { 1847 goal++; 1848 if (goal >= (bi->bi_len * GFS2_NBBY)) 1849 break; 1850 rgblk = gfs2_bi2rgd_blk(bi, goal); 1851 rs = rs_find(rgd, rgblk); 1852 if (rs) /* Oops, we bumped into someone's reservation */ 1853 break; 1854 if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != 1855 GFS2_BLKST_FREE) 1856 break; 1857 gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED); 1858 (*n)++; 1859 } 1860 blk = gfs2_bi2rgd_blk(bi, blk); 1861 rgd->rd_last_alloc = blk + *n - 1; 1862 return rgd->rd_data0 + blk; 1863 } 1864 1865 /** 1866 * rgblk_free - Change alloc state of given block(s) 1867 * @sdp: the filesystem 1868 * @bstart: the start of a run of blocks to free 1869 * @blen: the length of the block run (all must lie within ONE RG!) 1870 * @new_state: GFS2_BLKST_XXX the after-allocation block state 1871 * 1872 * Returns: Resource group containing the block(s) 1873 */ 1874 1875 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, 1876 u32 blen, unsigned char new_state) 1877 { 1878 struct gfs2_rgrpd *rgd; 1879 struct gfs2_bitmap *bi = NULL; 1880 u32 length, rgrp_blk, buf_blk; 1881 unsigned int buf; 1882 1883 rgd = gfs2_blk2rgrpd(sdp, bstart, 1); 1884 if (!rgd) { 1885 if (gfs2_consist(sdp)) 1886 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); 1887 return NULL; 1888 } 1889 1890 length = rgd->rd_length; 1891 1892 rgrp_blk = bstart - rgd->rd_data0; 1893 1894 while (blen--) { 1895 for (buf = 0; buf < length; buf++) { 1896 bi = rgd->rd_bits + buf; 1897 if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY) 1898 break; 1899 } 1900 1901 gfs2_assert(rgd->rd_sbd, buf < length); 1902 1903 buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY; 1904 rgrp_blk++; 1905 1906 if (!bi->bi_clone) { 1907 bi->bi_clone = kmalloc(bi->bi_bh->b_size, 1908 GFP_NOFS | __GFP_NOFAIL); 1909 memcpy(bi->bi_clone + bi->bi_offset, 1910 bi->bi_bh->b_data + bi->bi_offset, 1911 bi->bi_len); 1912 } 1913 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1914 gfs2_setbit(rgd, NULL, bi, buf_blk, new_state); 1915 } 1916 1917 return rgd; 1918 } 1919 1920 /** 1921 * gfs2_rgrp_dump - print out an rgrp 1922 * @seq: The iterator 1923 * @gl: The glock in question 1924 * 1925 */ 1926 1927 int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) 1928 { 1929 struct gfs2_rgrpd *rgd = gl->gl_object; 1930 struct gfs2_blkreserv *trs; 1931 const struct rb_node *n; 1932 1933 if (rgd == NULL) 1934 return 0; 1935 gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n", 1936 (unsigned long long)rgd->rd_addr, rgd->rd_flags, 1937 rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, 1938 rgd->rd_reserved); 1939 spin_lock(&rgd->rd_rsspin); 1940 for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { 1941 trs = rb_entry(n, struct gfs2_blkreserv, rs_node); 1942 dump_rs(seq, trs); 1943 } 1944 spin_unlock(&rgd->rd_rsspin); 1945 return 0; 1946 } 1947 1948 static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) 1949 { 1950 struct gfs2_sbd *sdp = rgd->rd_sbd; 1951 fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", 1952 (unsigned long long)rgd->rd_addr); 1953 fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); 1954 gfs2_rgrp_dump(NULL, rgd->rd_gl); 1955 rgd->rd_flags |= GFS2_RDF_ERROR; 1956 } 1957 1958 /** 1959 * claim_reserved_blks - Claim previously reserved blocks 1960 * @ip: the inode that's claiming the reservation 1961 * @dinode: 1 if this block is a dinode block, otherwise data block 1962 * @nblocks: desired extent length 1963 * 1964 * Lay claim to previously allocated block reservation blocks. 1965 * Returns: Starting block number of the blocks claimed. 1966 * Sets *nblocks to the actual extent length allocated. 1967 */ 1968 static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, 1969 unsigned int *nblocks) 1970 { 1971 struct gfs2_blkreserv *rs = ip->i_res; 1972 struct gfs2_rgrpd *rgd = rs->rs_rgd; 1973 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1974 struct gfs2_bitmap *bi; 1975 u64 start_block = gfs2_rs_startblk(rs); 1976 const unsigned int elen = *nblocks; 1977 1978 /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ 1979 gfs2_assert_withdraw(sdp, rgd); 1980 /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ 1981 bi = rs->rs_bi; 1982 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1983 1984 for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { 1985 /* Make sure the bitmap hasn't changed */ 1986 gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk, 1987 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 1988 rs->rs_biblk++; 1989 rs->rs_free--; 1990 1991 BUG_ON(!rgd->rd_reserved); 1992 rgd->rd_reserved--; 1993 dinode = false; 1994 trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); 1995 } 1996 1997 if (!rs->rs_free) { 1998 struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd; 1999 2000 gfs2_rs_deltree(rs); 2001 /* -nblocks because we haven't returned to do the math yet. 2002 I'm doing the math backwards to prevent negative numbers, 2003 but think of it as: 2004 if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */ 2005 if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks) 2006 rg_mblk_search(rgd, ip); 2007 } 2008 return start_block; 2009 } 2010 2011 /** 2012 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode 2013 * @ip: the inode to allocate the block for 2014 * @bn: Used to return the starting block number 2015 * @nblocks: requested number of blocks/extent length (value/result) 2016 * @dinode: 1 if we're allocating a dinode block, else 0 2017 * @generation: the generation number of the inode 2018 * 2019 * Returns: 0 or error 2020 */ 2021 2022 int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, 2023 bool dinode, u64 *generation) 2024 { 2025 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2026 struct buffer_head *dibh; 2027 struct gfs2_rgrpd *rgd; 2028 unsigned int ndata; 2029 u32 goal, blk; /* block, within the rgrp scope */ 2030 u64 block; /* block, within the file system scope */ 2031 int error; 2032 struct gfs2_bitmap *bi; 2033 2034 /* Only happens if there is a bug in gfs2, return something distinctive 2035 * to ensure that it is noticed. 2036 */ 2037 if (ip->i_res->rs_requested == 0) 2038 return -ECANCELED; 2039 2040 /* Check if we have a multi-block reservation, and if so, claim the 2041 next free block from it. */ 2042 if (gfs2_rs_active(ip->i_res)) { 2043 BUG_ON(!ip->i_res->rs_free); 2044 rgd = ip->i_res->rs_rgd; 2045 block = claim_reserved_blks(ip, dinode, nblocks); 2046 } else { 2047 rgd = ip->i_rgd; 2048 2049 if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) 2050 goal = ip->i_goal - rgd->rd_data0; 2051 else 2052 goal = rgd->rd_last_alloc; 2053 2054 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); 2055 2056 /* Since all blocks are reserved in advance, this shouldn't 2057 happen */ 2058 if (blk == BFITNOENT) { 2059 printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", 2060 *nblocks); 2061 printk(KERN_WARNING "FULL=%d\n", 2062 test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); 2063 goto rgrp_error; 2064 } 2065 2066 block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); 2067 } 2068 ndata = *nblocks; 2069 if (dinode) 2070 ndata--; 2071 2072 if (!dinode) { 2073 ip->i_goal = block + ndata - 1; 2074 error = gfs2_meta_inode_buffer(ip, &dibh); 2075 if (error == 0) { 2076 struct gfs2_dinode *di = 2077 (struct gfs2_dinode *)dibh->b_data; 2078 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 2079 di->di_goal_meta = di->di_goal_data = 2080 cpu_to_be64(ip->i_goal); 2081 brelse(dibh); 2082 } 2083 } 2084 if (rgd->rd_free < *nblocks) { 2085 printk(KERN_WARNING "nblocks=%u\n", *nblocks); 2086 goto rgrp_error; 2087 } 2088 2089 rgd->rd_free -= *nblocks; 2090 if (dinode) { 2091 rgd->rd_dinodes++; 2092 *generation = rgd->rd_igeneration++; 2093 if (*generation == 0) 2094 *generation = rgd->rd_igeneration++; 2095 } 2096 2097 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2098 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2099 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2100 2101 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); 2102 if (dinode) 2103 gfs2_trans_add_unrevoke(sdp, block, 1); 2104 2105 /* 2106 * This needs reviewing to see why we cannot do the quota change 2107 * at this point in the dinode case. 2108 */ 2109 if (ndata) 2110 gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, 2111 ip->i_inode.i_gid); 2112 2113 rgd->rd_free_clone -= *nblocks; 2114 trace_gfs2_block_alloc(ip, rgd, block, *nblocks, 2115 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 2116 *bn = block; 2117 return 0; 2118 2119 rgrp_error: 2120 gfs2_rgrp_error(rgd); 2121 return -EIO; 2122 } 2123 2124 /** 2125 * __gfs2_free_blocks - free a contiguous run of block(s) 2126 * @ip: the inode these blocks are being freed from 2127 * @bstart: first block of a run of contiguous blocks 2128 * @blen: the length of the block run 2129 * @meta: 1 if the blocks represent metadata 2130 * 2131 */ 2132 2133 void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) 2134 { 2135 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2136 struct gfs2_rgrpd *rgd; 2137 2138 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); 2139 if (!rgd) 2140 return; 2141 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); 2142 rgd->rd_free += blen; 2143 rgd->rd_flags &= ~GFS2_RGF_TRIMMED; 2144 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2145 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2146 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2147 2148 /* Directories keep their data in the metadata address space */ 2149 if (meta || ip->i_depth) 2150 gfs2_meta_wipe(ip, bstart, blen); 2151 } 2152 2153 /** 2154 * gfs2_free_meta - free a contiguous run of data block(s) 2155 * @ip: the inode these blocks are being freed from 2156 * @bstart: first block of a run of contiguous blocks 2157 * @blen: the length of the block run 2158 * 2159 */ 2160 2161 void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) 2162 { 2163 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2164 2165 __gfs2_free_blocks(ip, bstart, blen, 1); 2166 gfs2_statfs_change(sdp, 0, +blen, 0); 2167 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 2168 } 2169 2170 void gfs2_unlink_di(struct inode *inode) 2171 { 2172 struct gfs2_inode *ip = GFS2_I(inode); 2173 struct gfs2_sbd *sdp = GFS2_SB(inode); 2174 struct gfs2_rgrpd *rgd; 2175 u64 blkno = ip->i_no_addr; 2176 2177 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); 2178 if (!rgd) 2179 return; 2180 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); 2181 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2182 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2183 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2184 update_rgrp_lvb_unlinked(rgd, 1); 2185 } 2186 2187 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) 2188 { 2189 struct gfs2_sbd *sdp = rgd->rd_sbd; 2190 struct gfs2_rgrpd *tmp_rgd; 2191 2192 tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE); 2193 if (!tmp_rgd) 2194 return; 2195 gfs2_assert_withdraw(sdp, rgd == tmp_rgd); 2196 2197 if (!rgd->rd_dinodes) 2198 gfs2_consist_rgrpd(rgd); 2199 rgd->rd_dinodes--; 2200 rgd->rd_free++; 2201 2202 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2203 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2204 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2205 update_rgrp_lvb_unlinked(rgd, -1); 2206 2207 gfs2_statfs_change(sdp, 0, +1, -1); 2208 } 2209 2210 2211 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 2212 { 2213 gfs2_free_uninit_di(rgd, ip->i_no_addr); 2214 trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); 2215 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); 2216 gfs2_meta_wipe(ip, ip->i_no_addr, 1); 2217 } 2218 2219 /** 2220 * gfs2_check_blk_type - Check the type of a block 2221 * @sdp: The superblock 2222 * @no_addr: The block number to check 2223 * @type: The block type we are looking for 2224 * 2225 * Returns: 0 if the block type matches the expected type 2226 * -ESTALE if it doesn't match 2227 * or -ve errno if something went wrong while checking 2228 */ 2229 2230 int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) 2231 { 2232 struct gfs2_rgrpd *rgd; 2233 struct gfs2_holder rgd_gh; 2234 int error = -EINVAL; 2235 2236 rgd = gfs2_blk2rgrpd(sdp, no_addr, 1); 2237 if (!rgd) 2238 goto fail; 2239 2240 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); 2241 if (error) 2242 goto fail; 2243 2244 if (gfs2_get_block_type(rgd, no_addr) != type) 2245 error = -ESTALE; 2246 2247 gfs2_glock_dq_uninit(&rgd_gh); 2248 fail: 2249 return error; 2250 } 2251 2252 /** 2253 * gfs2_rlist_add - add a RG to a list of RGs 2254 * @ip: the inode 2255 * @rlist: the list of resource groups 2256 * @block: the block 2257 * 2258 * Figure out what RG a block belongs to and add that RG to the list 2259 * 2260 * FIXME: Don't use NOFAIL 2261 * 2262 */ 2263 2264 void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, 2265 u64 block) 2266 { 2267 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2268 struct gfs2_rgrpd *rgd; 2269 struct gfs2_rgrpd **tmp; 2270 unsigned int new_space; 2271 unsigned int x; 2272 2273 if (gfs2_assert_warn(sdp, !rlist->rl_ghs)) 2274 return; 2275 2276 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block)) 2277 rgd = ip->i_rgd; 2278 else 2279 rgd = gfs2_blk2rgrpd(sdp, block, 1); 2280 if (!rgd) { 2281 fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block); 2282 return; 2283 } 2284 ip->i_rgd = rgd; 2285 2286 for (x = 0; x < rlist->rl_rgrps; x++) 2287 if (rlist->rl_rgd[x] == rgd) 2288 return; 2289 2290 if (rlist->rl_rgrps == rlist->rl_space) { 2291 new_space = rlist->rl_space + 10; 2292 2293 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *), 2294 GFP_NOFS | __GFP_NOFAIL); 2295 2296 if (rlist->rl_rgd) { 2297 memcpy(tmp, rlist->rl_rgd, 2298 rlist->rl_space * sizeof(struct gfs2_rgrpd *)); 2299 kfree(rlist->rl_rgd); 2300 } 2301 2302 rlist->rl_space = new_space; 2303 rlist->rl_rgd = tmp; 2304 } 2305 2306 rlist->rl_rgd[rlist->rl_rgrps++] = rgd; 2307 } 2308 2309 /** 2310 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate 2311 * and initialize an array of glock holders for them 2312 * @rlist: the list of resource groups 2313 * @state: the lock state to acquire the RG lock in 2314 * 2315 * FIXME: Don't use NOFAIL 2316 * 2317 */ 2318 2319 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state) 2320 { 2321 unsigned int x; 2322 2323 rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder), 2324 GFP_NOFS | __GFP_NOFAIL); 2325 for (x = 0; x < rlist->rl_rgrps; x++) 2326 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, 2327 state, 0, 2328 &rlist->rl_ghs[x]); 2329 } 2330 2331 /** 2332 * gfs2_rlist_free - free a resource group list 2333 * @list: the list of resource groups 2334 * 2335 */ 2336 2337 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) 2338 { 2339 unsigned int x; 2340 2341 kfree(rlist->rl_rgd); 2342 2343 if (rlist->rl_ghs) { 2344 for (x = 0; x < rlist->rl_rgrps; x++) 2345 gfs2_holder_uninit(&rlist->rl_ghs[x]); 2346 kfree(rlist->rl_ghs); 2347 rlist->rl_ghs = NULL; 2348 } 2349 } 2350 2351