1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/slab.h> 11 #include <linux/spinlock.h> 12 #include <linux/completion.h> 13 #include <linux/buffer_head.h> 14 #include <linux/fs.h> 15 #include <linux/gfs2_ondisk.h> 16 #include <linux/prefetch.h> 17 #include <linux/blkdev.h> 18 #include <linux/rbtree.h> 19 20 #include "gfs2.h" 21 #include "incore.h" 22 #include "glock.h" 23 #include "glops.h" 24 #include "lops.h" 25 #include "meta_io.h" 26 #include "quota.h" 27 #include "rgrp.h" 28 #include "super.h" 29 #include "trans.h" 30 #include "util.h" 31 #include "log.h" 32 #include "inode.h" 33 #include "trace_gfs2.h" 34 35 #define BFITNOENT ((u32)~0) 36 #define NO_BLOCK ((u64)~0) 37 38 #if BITS_PER_LONG == 32 39 #define LBITMASK (0x55555555UL) 40 #define LBITSKIP55 (0x55555555UL) 41 #define LBITSKIP00 (0x00000000UL) 42 #else 43 #define LBITMASK (0x5555555555555555UL) 44 #define LBITSKIP55 (0x5555555555555555UL) 45 #define LBITSKIP00 (0x0000000000000000UL) 46 #endif 47 48 /* 49 * These routines are used by the resource group routines (rgrp.c) 50 * to keep track of block allocation. Each block is represented by two 51 * bits. So, each byte represents GFS2_NBBY (i.e. 4) blocks. 52 * 53 * 0 = Free 54 * 1 = Used (not metadata) 55 * 2 = Unlinked (still in use) inode 56 * 3 = Used (metadata) 57 */ 58 59 static const char valid_change[16] = { 60 /* current */ 61 /* n */ 0, 1, 1, 1, 62 /* e */ 1, 0, 0, 0, 63 /* w */ 0, 0, 0, 1, 64 1, 0, 0, 0 65 }; 66 67 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, 68 const struct gfs2_inode *ip, bool nowrap); 69 70 71 /** 72 * gfs2_setbit - Set a bit in the bitmaps 73 * @rbm: The position of the bit to set 74 * @do_clone: Also set the clone bitmap, if it exists 75 * @new_state: the new state of the block 76 * 77 */ 78 79 static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, 80 unsigned char new_state) 81 { 82 unsigned char *byte1, *byte2, *end, cur_state; 83 unsigned int buflen = rbm->bi->bi_len; 84 const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; 85 86 byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); 87 end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen; 88 89 BUG_ON(byte1 >= end); 90 91 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; 92 93 if (unlikely(!valid_change[new_state * 4 + cur_state])) { 94 printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " 95 "new_state=%d\n", rbm->offset, cur_state, new_state); 96 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", 97 (unsigned long long)rbm->rgd->rd_addr, 98 rbm->bi->bi_start); 99 printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", 100 rbm->bi->bi_offset, rbm->bi->bi_len); 101 dump_stack(); 102 gfs2_consist_rgrpd(rbm->rgd); 103 return; 104 } 105 *byte1 ^= (cur_state ^ new_state) << bit; 106 107 if (do_clone && rbm->bi->bi_clone) { 108 byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); 109 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; 110 *byte2 ^= (cur_state ^ new_state) << bit; 111 } 112 } 113 114 /** 115 * gfs2_testbit - test a bit in the bitmaps 116 * @rbm: The bit to test 117 * 118 * Returns: The two bit block state of the requested bit 119 */ 120 121 static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) 122 { 123 const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset; 124 const u8 *byte; 125 unsigned int bit; 126 127 byte = buffer + (rbm->offset / GFS2_NBBY); 128 bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; 129 130 return (*byte >> bit) & GFS2_BIT_MASK; 131 } 132 133 /** 134 * gfs2_bit_search 135 * @ptr: Pointer to bitmap data 136 * @mask: Mask to use (normally 0x55555.... but adjusted for search start) 137 * @state: The state we are searching for 138 * 139 * We xor the bitmap data with a patter which is the bitwise opposite 140 * of what we are looking for, this gives rise to a pattern of ones 141 * wherever there is a match. Since we have two bits per entry, we 142 * take this pattern, shift it down by one place and then and it with 143 * the original. All the even bit positions (0,2,4, etc) then represent 144 * successful matches, so we mask with 0x55555..... to remove the unwanted 145 * odd bit positions. 146 * 147 * This allows searching of a whole u64 at once (32 blocks) with a 148 * single test (on 64 bit arches). 149 */ 150 151 static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) 152 { 153 u64 tmp; 154 static const u64 search[] = { 155 [0] = 0xffffffffffffffffULL, 156 [1] = 0xaaaaaaaaaaaaaaaaULL, 157 [2] = 0x5555555555555555ULL, 158 [3] = 0x0000000000000000ULL, 159 }; 160 tmp = le64_to_cpu(*ptr) ^ search[state]; 161 tmp &= (tmp >> 1); 162 tmp &= mask; 163 return tmp; 164 } 165 166 /** 167 * rs_cmp - multi-block reservation range compare 168 * @blk: absolute file system block number of the new reservation 169 * @len: number of blocks in the new reservation 170 * @rs: existing reservation to compare against 171 * 172 * returns: 1 if the block range is beyond the reach of the reservation 173 * -1 if the block range is before the start of the reservation 174 * 0 if the block range overlaps with the reservation 175 */ 176 static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) 177 { 178 u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm); 179 180 if (blk >= startblk + rs->rs_free) 181 return 1; 182 if (blk + len - 1 < startblk) 183 return -1; 184 return 0; 185 } 186 187 /** 188 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing 189 * a block in a given allocation state. 190 * @buf: the buffer that holds the bitmaps 191 * @len: the length (in bytes) of the buffer 192 * @goal: start search at this block's bit-pair (within @buffer) 193 * @state: GFS2_BLKST_XXX the state of the block we're looking for. 194 * 195 * Scope of @goal and returned block number is only within this bitmap buffer, 196 * not entire rgrp or filesystem. @buffer will be offset from the actual 197 * beginning of a bitmap block buffer, skipping any header structures, but 198 * headers are always a multiple of 64 bits long so that the buffer is 199 * always aligned to a 64 bit boundary. 200 * 201 * The size of the buffer is in bytes, but is it assumed that it is 202 * always ok to read a complete multiple of 64 bits at the end 203 * of the block in case the end is no aligned to a natural boundary. 204 * 205 * Return: the block number (bitmap buffer scope) that was found 206 */ 207 208 static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, 209 u32 goal, u8 state) 210 { 211 u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1); 212 const __le64 *ptr = ((__le64 *)buf) + (goal >> 5); 213 const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64))); 214 u64 tmp; 215 u64 mask = 0x5555555555555555ULL; 216 u32 bit; 217 218 /* Mask off bits we don't care about at the start of the search */ 219 mask <<= spoint; 220 tmp = gfs2_bit_search(ptr, mask, state); 221 ptr++; 222 while(tmp == 0 && ptr < end) { 223 tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state); 224 ptr++; 225 } 226 /* Mask off any bits which are more than len bytes from the start */ 227 if (ptr == end && (len & (sizeof(u64) - 1))) 228 tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1)))); 229 /* Didn't find anything, so return */ 230 if (tmp == 0) 231 return BFITNOENT; 232 ptr--; 233 bit = __ffs64(tmp); 234 bit /= 2; /* two bits per entry in the bitmap */ 235 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; 236 } 237 238 /** 239 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number 240 * @rbm: The rbm with rgd already set correctly 241 * @block: The block number (filesystem relative) 242 * 243 * This sets the bi and offset members of an rbm based on a 244 * resource group and a filesystem relative block number. The 245 * resource group must be set in the rbm on entry, the bi and 246 * offset members will be set by this function. 247 * 248 * Returns: 0 on success, or an error code 249 */ 250 251 static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) 252 { 253 u64 rblock = block - rbm->rgd->rd_data0; 254 u32 goal = (u32)rblock; 255 int x; 256 257 if (WARN_ON_ONCE(rblock > UINT_MAX)) 258 return -EINVAL; 259 if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) 260 return -E2BIG; 261 262 for (x = 0; x < rbm->rgd->rd_length; x++) { 263 rbm->bi = rbm->rgd->rd_bits + x; 264 if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { 265 rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); 266 break; 267 } 268 } 269 270 return 0; 271 } 272 273 /** 274 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned 275 * @rbm: Position to search (value/result) 276 * @n_unaligned: Number of unaligned blocks to check 277 * @len: Decremented for each block found (terminate on zero) 278 * 279 * Returns: true if a non-free block is encountered 280 */ 281 282 static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) 283 { 284 u64 block; 285 u32 n; 286 u8 res; 287 288 for (n = 0; n < n_unaligned; n++) { 289 res = gfs2_testbit(rbm); 290 if (res != GFS2_BLKST_FREE) 291 return true; 292 (*len)--; 293 if (*len == 0) 294 return true; 295 block = gfs2_rbm_to_block(rbm); 296 if (gfs2_rbm_from_block(rbm, block + 1)) 297 return true; 298 } 299 300 return false; 301 } 302 303 /** 304 * gfs2_free_extlen - Return extent length of free blocks 305 * @rbm: Starting position 306 * @len: Max length to check 307 * 308 * Starting at the block specified by the rbm, see how many free blocks 309 * there are, not reading more than len blocks ahead. This can be done 310 * using memchr_inv when the blocks are byte aligned, but has to be done 311 * on a block by block basis in case of unaligned blocks. Also this 312 * function can cope with bitmap boundaries (although it must stop on 313 * a resource group boundary) 314 * 315 * Returns: Number of free blocks in the extent 316 */ 317 318 static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) 319 { 320 struct gfs2_rbm rbm = *rrbm; 321 u32 n_unaligned = rbm.offset & 3; 322 u32 size = len; 323 u32 bytes; 324 u32 chunk_size; 325 u8 *ptr, *start, *end; 326 u64 block; 327 328 if (n_unaligned && 329 gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) 330 goto out; 331 332 n_unaligned = len & 3; 333 /* Start is now byte aligned */ 334 while (len > 3) { 335 start = rbm.bi->bi_bh->b_data; 336 if (rbm.bi->bi_clone) 337 start = rbm.bi->bi_clone; 338 end = start + rbm.bi->bi_bh->b_size; 339 start += rbm.bi->bi_offset; 340 BUG_ON(rbm.offset & 3); 341 start += (rbm.offset / GFS2_NBBY); 342 bytes = min_t(u32, len / GFS2_NBBY, (end - start)); 343 ptr = memchr_inv(start, 0, bytes); 344 chunk_size = ((ptr == NULL) ? bytes : (ptr - start)); 345 chunk_size *= GFS2_NBBY; 346 BUG_ON(len < chunk_size); 347 len -= chunk_size; 348 block = gfs2_rbm_to_block(&rbm); 349 gfs2_rbm_from_block(&rbm, block + chunk_size); 350 n_unaligned = 3; 351 if (ptr) 352 break; 353 n_unaligned = len & 3; 354 } 355 356 /* Deal with any bits left over at the end */ 357 if (n_unaligned) 358 gfs2_unaligned_extlen(&rbm, n_unaligned, &len); 359 out: 360 return size - len; 361 } 362 363 /** 364 * gfs2_bitcount - count the number of bits in a certain state 365 * @rgd: the resource group descriptor 366 * @buffer: the buffer that holds the bitmaps 367 * @buflen: the length (in bytes) of the buffer 368 * @state: the state of the block we're looking for 369 * 370 * Returns: The number of bits 371 */ 372 373 static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer, 374 unsigned int buflen, u8 state) 375 { 376 const u8 *byte = buffer; 377 const u8 *end = buffer + buflen; 378 const u8 state1 = state << 2; 379 const u8 state2 = state << 4; 380 const u8 state3 = state << 6; 381 u32 count = 0; 382 383 for (; byte < end; byte++) { 384 if (((*byte) & 0x03) == state) 385 count++; 386 if (((*byte) & 0x0C) == state1) 387 count++; 388 if (((*byte) & 0x30) == state2) 389 count++; 390 if (((*byte) & 0xC0) == state3) 391 count++; 392 } 393 394 return count; 395 } 396 397 /** 398 * gfs2_rgrp_verify - Verify that a resource group is consistent 399 * @rgd: the rgrp 400 * 401 */ 402 403 void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) 404 { 405 struct gfs2_sbd *sdp = rgd->rd_sbd; 406 struct gfs2_bitmap *bi = NULL; 407 u32 length = rgd->rd_length; 408 u32 count[4], tmp; 409 int buf, x; 410 411 memset(count, 0, 4 * sizeof(u32)); 412 413 /* Count # blocks in each of 4 possible allocation states */ 414 for (buf = 0; buf < length; buf++) { 415 bi = rgd->rd_bits + buf; 416 for (x = 0; x < 4; x++) 417 count[x] += gfs2_bitcount(rgd, 418 bi->bi_bh->b_data + 419 bi->bi_offset, 420 bi->bi_len, x); 421 } 422 423 if (count[0] != rgd->rd_free) { 424 if (gfs2_consist_rgrpd(rgd)) 425 fs_err(sdp, "free data mismatch: %u != %u\n", 426 count[0], rgd->rd_free); 427 return; 428 } 429 430 tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes; 431 if (count[1] != tmp) { 432 if (gfs2_consist_rgrpd(rgd)) 433 fs_err(sdp, "used data mismatch: %u != %u\n", 434 count[1], tmp); 435 return; 436 } 437 438 if (count[2] + count[3] != rgd->rd_dinodes) { 439 if (gfs2_consist_rgrpd(rgd)) 440 fs_err(sdp, "used metadata mismatch: %u != %u\n", 441 count[2] + count[3], rgd->rd_dinodes); 442 return; 443 } 444 } 445 446 static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) 447 { 448 u64 first = rgd->rd_data0; 449 u64 last = first + rgd->rd_data; 450 return first <= block && block < last; 451 } 452 453 /** 454 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number 455 * @sdp: The GFS2 superblock 456 * @blk: The data block number 457 * @exact: True if this needs to be an exact match 458 * 459 * Returns: The resource group, or NULL if not found 460 */ 461 462 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact) 463 { 464 struct rb_node *n, *next; 465 struct gfs2_rgrpd *cur; 466 467 spin_lock(&sdp->sd_rindex_spin); 468 n = sdp->sd_rindex_tree.rb_node; 469 while (n) { 470 cur = rb_entry(n, struct gfs2_rgrpd, rd_node); 471 next = NULL; 472 if (blk < cur->rd_addr) 473 next = n->rb_left; 474 else if (blk >= cur->rd_data0 + cur->rd_data) 475 next = n->rb_right; 476 if (next == NULL) { 477 spin_unlock(&sdp->sd_rindex_spin); 478 if (exact) { 479 if (blk < cur->rd_addr) 480 return NULL; 481 if (blk >= cur->rd_data0 + cur->rd_data) 482 return NULL; 483 } 484 return cur; 485 } 486 n = next; 487 } 488 spin_unlock(&sdp->sd_rindex_spin); 489 490 return NULL; 491 } 492 493 /** 494 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem 495 * @sdp: The GFS2 superblock 496 * 497 * Returns: The first rgrp in the filesystem 498 */ 499 500 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp) 501 { 502 const struct rb_node *n; 503 struct gfs2_rgrpd *rgd; 504 505 spin_lock(&sdp->sd_rindex_spin); 506 n = rb_first(&sdp->sd_rindex_tree); 507 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 508 spin_unlock(&sdp->sd_rindex_spin); 509 510 return rgd; 511 } 512 513 /** 514 * gfs2_rgrpd_get_next - get the next RG 515 * @rgd: the resource group descriptor 516 * 517 * Returns: The next rgrp 518 */ 519 520 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd) 521 { 522 struct gfs2_sbd *sdp = rgd->rd_sbd; 523 const struct rb_node *n; 524 525 spin_lock(&sdp->sd_rindex_spin); 526 n = rb_next(&rgd->rd_node); 527 if (n == NULL) 528 n = rb_first(&sdp->sd_rindex_tree); 529 530 if (unlikely(&rgd->rd_node == n)) { 531 spin_unlock(&sdp->sd_rindex_spin); 532 return NULL; 533 } 534 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 535 spin_unlock(&sdp->sd_rindex_spin); 536 return rgd; 537 } 538 539 void gfs2_free_clones(struct gfs2_rgrpd *rgd) 540 { 541 int x; 542 543 for (x = 0; x < rgd->rd_length; x++) { 544 struct gfs2_bitmap *bi = rgd->rd_bits + x; 545 kfree(bi->bi_clone); 546 bi->bi_clone = NULL; 547 } 548 } 549 550 /** 551 * gfs2_rs_alloc - make sure we have a reservation assigned to the inode 552 * @ip: the inode for this reservation 553 */ 554 int gfs2_rs_alloc(struct gfs2_inode *ip) 555 { 556 struct gfs2_blkreserv *res; 557 558 if (ip->i_res) 559 return 0; 560 561 res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); 562 if (!res) 563 return -ENOMEM; 564 565 RB_CLEAR_NODE(&res->rs_node); 566 567 down_write(&ip->i_rw_mutex); 568 if (ip->i_res) 569 kmem_cache_free(gfs2_rsrv_cachep, res); 570 else 571 ip->i_res = res; 572 up_write(&ip->i_rw_mutex); 573 return 0; 574 } 575 576 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) 577 { 578 gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", 579 (unsigned long long)rs->rs_inum, 580 (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), 581 rs->rs_rbm.offset, rs->rs_free); 582 } 583 584 /** 585 * __rs_deltree - remove a multi-block reservation from the rgd tree 586 * @rs: The reservation to remove 587 * 588 */ 589 static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) 590 { 591 struct gfs2_rgrpd *rgd; 592 593 if (!gfs2_rs_active(rs)) 594 return; 595 596 rgd = rs->rs_rbm.rgd; 597 trace_gfs2_rs(rs, TRACE_RS_TREEDEL); 598 rb_erase(&rs->rs_node, &rgd->rd_rstree); 599 RB_CLEAR_NODE(&rs->rs_node); 600 601 if (rs->rs_free) { 602 /* return reserved blocks to the rgrp and the ip */ 603 BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); 604 rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; 605 rs->rs_free = 0; 606 clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags); 607 smp_mb__after_clear_bit(); 608 } 609 } 610 611 /** 612 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree 613 * @rs: The reservation to remove 614 * 615 */ 616 void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) 617 { 618 struct gfs2_rgrpd *rgd; 619 620 rgd = rs->rs_rbm.rgd; 621 if (rgd) { 622 spin_lock(&rgd->rd_rsspin); 623 __rs_deltree(ip, rs); 624 spin_unlock(&rgd->rd_rsspin); 625 } 626 } 627 628 /** 629 * gfs2_rs_delete - delete a multi-block reservation 630 * @ip: The inode for this reservation 631 * 632 */ 633 void gfs2_rs_delete(struct gfs2_inode *ip) 634 { 635 down_write(&ip->i_rw_mutex); 636 if (ip->i_res) { 637 gfs2_rs_deltree(ip, ip->i_res); 638 BUG_ON(ip->i_res->rs_free); 639 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); 640 ip->i_res = NULL; 641 } 642 up_write(&ip->i_rw_mutex); 643 } 644 645 /** 646 * return_all_reservations - return all reserved blocks back to the rgrp. 647 * @rgd: the rgrp that needs its space back 648 * 649 * We previously reserved a bunch of blocks for allocation. Now we need to 650 * give them back. This leave the reservation structures in tact, but removes 651 * all of their corresponding "no-fly zones". 652 */ 653 static void return_all_reservations(struct gfs2_rgrpd *rgd) 654 { 655 struct rb_node *n; 656 struct gfs2_blkreserv *rs; 657 658 spin_lock(&rgd->rd_rsspin); 659 while ((n = rb_first(&rgd->rd_rstree))) { 660 rs = rb_entry(n, struct gfs2_blkreserv, rs_node); 661 __rs_deltree(NULL, rs); 662 } 663 spin_unlock(&rgd->rd_rsspin); 664 } 665 666 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) 667 { 668 struct rb_node *n; 669 struct gfs2_rgrpd *rgd; 670 struct gfs2_glock *gl; 671 672 while ((n = rb_first(&sdp->sd_rindex_tree))) { 673 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 674 gl = rgd->rd_gl; 675 676 rb_erase(n, &sdp->sd_rindex_tree); 677 678 if (gl) { 679 spin_lock(&gl->gl_spin); 680 gl->gl_object = NULL; 681 spin_unlock(&gl->gl_spin); 682 gfs2_glock_add_to_lru(gl); 683 gfs2_glock_put(gl); 684 } 685 686 gfs2_free_clones(rgd); 687 kfree(rgd->rd_bits); 688 return_all_reservations(rgd); 689 kmem_cache_free(gfs2_rgrpd_cachep, rgd); 690 } 691 } 692 693 static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) 694 { 695 printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); 696 printk(KERN_INFO " ri_length = %u\n", rgd->rd_length); 697 printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); 698 printk(KERN_INFO " ri_data = %u\n", rgd->rd_data); 699 printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes); 700 } 701 702 /** 703 * gfs2_compute_bitstructs - Compute the bitmap sizes 704 * @rgd: The resource group descriptor 705 * 706 * Calculates bitmap descriptors, one for each block that contains bitmap data 707 * 708 * Returns: errno 709 */ 710 711 static int compute_bitstructs(struct gfs2_rgrpd *rgd) 712 { 713 struct gfs2_sbd *sdp = rgd->rd_sbd; 714 struct gfs2_bitmap *bi; 715 u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */ 716 u32 bytes_left, bytes; 717 int x; 718 719 if (!length) 720 return -EINVAL; 721 722 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS); 723 if (!rgd->rd_bits) 724 return -ENOMEM; 725 726 bytes_left = rgd->rd_bitbytes; 727 728 for (x = 0; x < length; x++) { 729 bi = rgd->rd_bits + x; 730 731 bi->bi_flags = 0; 732 /* small rgrp; bitmap stored completely in header block */ 733 if (length == 1) { 734 bytes = bytes_left; 735 bi->bi_offset = sizeof(struct gfs2_rgrp); 736 bi->bi_start = 0; 737 bi->bi_len = bytes; 738 /* header block */ 739 } else if (x == 0) { 740 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp); 741 bi->bi_offset = sizeof(struct gfs2_rgrp); 742 bi->bi_start = 0; 743 bi->bi_len = bytes; 744 /* last block */ 745 } else if (x + 1 == length) { 746 bytes = bytes_left; 747 bi->bi_offset = sizeof(struct gfs2_meta_header); 748 bi->bi_start = rgd->rd_bitbytes - bytes_left; 749 bi->bi_len = bytes; 750 /* other blocks */ 751 } else { 752 bytes = sdp->sd_sb.sb_bsize - 753 sizeof(struct gfs2_meta_header); 754 bi->bi_offset = sizeof(struct gfs2_meta_header); 755 bi->bi_start = rgd->rd_bitbytes - bytes_left; 756 bi->bi_len = bytes; 757 } 758 759 bytes_left -= bytes; 760 } 761 762 if (bytes_left) { 763 gfs2_consist_rgrpd(rgd); 764 return -EIO; 765 } 766 bi = rgd->rd_bits + (length - 1); 767 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) { 768 if (gfs2_consist_rgrpd(rgd)) { 769 gfs2_rindex_print(rgd); 770 fs_err(sdp, "start=%u len=%u offset=%u\n", 771 bi->bi_start, bi->bi_len, bi->bi_offset); 772 } 773 return -EIO; 774 } 775 776 return 0; 777 } 778 779 /** 780 * gfs2_ri_total - Total up the file system space, according to the rindex. 781 * @sdp: the filesystem 782 * 783 */ 784 u64 gfs2_ri_total(struct gfs2_sbd *sdp) 785 { 786 u64 total_data = 0; 787 struct inode *inode = sdp->sd_rindex; 788 struct gfs2_inode *ip = GFS2_I(inode); 789 char buf[sizeof(struct gfs2_rindex)]; 790 int error, rgrps; 791 792 for (rgrps = 0;; rgrps++) { 793 loff_t pos = rgrps * sizeof(struct gfs2_rindex); 794 795 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode)) 796 break; 797 error = gfs2_internal_read(ip, buf, &pos, 798 sizeof(struct gfs2_rindex)); 799 if (error != sizeof(struct gfs2_rindex)) 800 break; 801 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data); 802 } 803 return total_data; 804 } 805 806 static int rgd_insert(struct gfs2_rgrpd *rgd) 807 { 808 struct gfs2_sbd *sdp = rgd->rd_sbd; 809 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL; 810 811 /* Figure out where to put new node */ 812 while (*newn) { 813 struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd, 814 rd_node); 815 816 parent = *newn; 817 if (rgd->rd_addr < cur->rd_addr) 818 newn = &((*newn)->rb_left); 819 else if (rgd->rd_addr > cur->rd_addr) 820 newn = &((*newn)->rb_right); 821 else 822 return -EEXIST; 823 } 824 825 rb_link_node(&rgd->rd_node, parent, newn); 826 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree); 827 sdp->sd_rgrps++; 828 return 0; 829 } 830 831 /** 832 * read_rindex_entry - Pull in a new resource index entry from the disk 833 * @ip: Pointer to the rindex inode 834 * 835 * Returns: 0 on success, > 0 on EOF, error code otherwise 836 */ 837 838 static int read_rindex_entry(struct gfs2_inode *ip) 839 { 840 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 841 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); 842 struct gfs2_rindex buf; 843 int error; 844 struct gfs2_rgrpd *rgd; 845 846 if (pos >= i_size_read(&ip->i_inode)) 847 return 1; 848 849 error = gfs2_internal_read(ip, (char *)&buf, &pos, 850 sizeof(struct gfs2_rindex)); 851 852 if (error != sizeof(struct gfs2_rindex)) 853 return (error == 0) ? 1 : error; 854 855 rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS); 856 error = -ENOMEM; 857 if (!rgd) 858 return error; 859 860 rgd->rd_sbd = sdp; 861 rgd->rd_addr = be64_to_cpu(buf.ri_addr); 862 rgd->rd_length = be32_to_cpu(buf.ri_length); 863 rgd->rd_data0 = be64_to_cpu(buf.ri_data0); 864 rgd->rd_data = be32_to_cpu(buf.ri_data); 865 rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); 866 spin_lock_init(&rgd->rd_rsspin); 867 868 error = compute_bitstructs(rgd); 869 if (error) 870 goto fail; 871 872 error = gfs2_glock_get(sdp, rgd->rd_addr, 873 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); 874 if (error) 875 goto fail; 876 877 rgd->rd_gl->gl_object = rgd; 878 rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; 879 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 880 if (rgd->rd_data > sdp->sd_max_rg_data) 881 sdp->sd_max_rg_data = rgd->rd_data; 882 spin_lock(&sdp->sd_rindex_spin); 883 error = rgd_insert(rgd); 884 spin_unlock(&sdp->sd_rindex_spin); 885 if (!error) 886 return 0; 887 888 error = 0; /* someone else read in the rgrp; free it and ignore it */ 889 gfs2_glock_put(rgd->rd_gl); 890 891 fail: 892 kfree(rgd->rd_bits); 893 kmem_cache_free(gfs2_rgrpd_cachep, rgd); 894 return error; 895 } 896 897 /** 898 * gfs2_ri_update - Pull in a new resource index from the disk 899 * @ip: pointer to the rindex inode 900 * 901 * Returns: 0 on successful update, error code otherwise 902 */ 903 904 static int gfs2_ri_update(struct gfs2_inode *ip) 905 { 906 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 907 int error; 908 909 do { 910 error = read_rindex_entry(ip); 911 } while (error == 0); 912 913 if (error < 0) 914 return error; 915 916 sdp->sd_rindex_uptodate = 1; 917 return 0; 918 } 919 920 /** 921 * gfs2_rindex_update - Update the rindex if required 922 * @sdp: The GFS2 superblock 923 * 924 * We grab a lock on the rindex inode to make sure that it doesn't 925 * change whilst we are performing an operation. We keep this lock 926 * for quite long periods of time compared to other locks. This 927 * doesn't matter, since it is shared and it is very, very rarely 928 * accessed in the exclusive mode (i.e. only when expanding the filesystem). 929 * 930 * This makes sure that we're using the latest copy of the resource index 931 * special file, which might have been updated if someone expanded the 932 * filesystem (via gfs2_grow utility), which adds new resource groups. 933 * 934 * Returns: 0 on succeess, error code otherwise 935 */ 936 937 int gfs2_rindex_update(struct gfs2_sbd *sdp) 938 { 939 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); 940 struct gfs2_glock *gl = ip->i_gl; 941 struct gfs2_holder ri_gh; 942 int error = 0; 943 int unlock_required = 0; 944 945 /* Read new copy from disk if we don't have the latest */ 946 if (!sdp->sd_rindex_uptodate) { 947 if (!gfs2_glock_is_locked_by_me(gl)) { 948 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh); 949 if (error) 950 return error; 951 unlock_required = 1; 952 } 953 if (!sdp->sd_rindex_uptodate) 954 error = gfs2_ri_update(ip); 955 if (unlock_required) 956 gfs2_glock_dq_uninit(&ri_gh); 957 } 958 959 return error; 960 } 961 962 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) 963 { 964 const struct gfs2_rgrp *str = buf; 965 u32 rg_flags; 966 967 rg_flags = be32_to_cpu(str->rg_flags); 968 rg_flags &= ~GFS2_RDF_MASK; 969 rgd->rd_flags &= GFS2_RDF_MASK; 970 rgd->rd_flags |= rg_flags; 971 rgd->rd_free = be32_to_cpu(str->rg_free); 972 rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); 973 rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); 974 } 975 976 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) 977 { 978 struct gfs2_rgrp *str = buf; 979 980 str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK); 981 str->rg_free = cpu_to_be32(rgd->rd_free); 982 str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes); 983 str->__pad = cpu_to_be32(0); 984 str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration); 985 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); 986 } 987 988 static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) 989 { 990 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; 991 struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data; 992 993 if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free || 994 rgl->rl_dinodes != str->rg_dinodes || 995 rgl->rl_igeneration != str->rg_igeneration) 996 return 0; 997 return 1; 998 } 999 1000 static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) 1001 { 1002 const struct gfs2_rgrp *str = buf; 1003 1004 rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); 1005 rgl->rl_flags = str->rg_flags; 1006 rgl->rl_free = str->rg_free; 1007 rgl->rl_dinodes = str->rg_dinodes; 1008 rgl->rl_igeneration = str->rg_igeneration; 1009 rgl->__pad = 0UL; 1010 } 1011 1012 static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change) 1013 { 1014 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; 1015 u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change; 1016 rgl->rl_unlinked = cpu_to_be32(unlinked); 1017 } 1018 1019 static u32 count_unlinked(struct gfs2_rgrpd *rgd) 1020 { 1021 struct gfs2_bitmap *bi; 1022 const u32 length = rgd->rd_length; 1023 const u8 *buffer = NULL; 1024 u32 i, goal, count = 0; 1025 1026 for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) { 1027 goal = 0; 1028 buffer = bi->bi_bh->b_data + bi->bi_offset; 1029 WARN_ON(!buffer_uptodate(bi->bi_bh)); 1030 while (goal < bi->bi_len * GFS2_NBBY) { 1031 goal = gfs2_bitfit(buffer, bi->bi_len, goal, 1032 GFS2_BLKST_UNLINKED); 1033 if (goal == BFITNOENT) 1034 break; 1035 count++; 1036 goal++; 1037 } 1038 } 1039 1040 return count; 1041 } 1042 1043 1044 /** 1045 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps 1046 * @rgd: the struct gfs2_rgrpd describing the RG to read in 1047 * 1048 * Read in all of a Resource Group's header and bitmap blocks. 1049 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. 1050 * 1051 * Returns: errno 1052 */ 1053 1054 int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) 1055 { 1056 struct gfs2_sbd *sdp = rgd->rd_sbd; 1057 struct gfs2_glock *gl = rgd->rd_gl; 1058 unsigned int length = rgd->rd_length; 1059 struct gfs2_bitmap *bi; 1060 unsigned int x, y; 1061 int error; 1062 1063 if (rgd->rd_bits[0].bi_bh != NULL) 1064 return 0; 1065 1066 for (x = 0; x < length; x++) { 1067 bi = rgd->rd_bits + x; 1068 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); 1069 if (error) 1070 goto fail; 1071 } 1072 1073 for (y = length; y--;) { 1074 bi = rgd->rd_bits + y; 1075 error = gfs2_meta_wait(sdp, bi->bi_bh); 1076 if (error) 1077 goto fail; 1078 if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB : 1079 GFS2_METATYPE_RG)) { 1080 error = -EIO; 1081 goto fail; 1082 } 1083 } 1084 1085 if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { 1086 for (x = 0; x < length; x++) 1087 clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); 1088 gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); 1089 rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); 1090 rgd->rd_free_clone = rgd->rd_free; 1091 } 1092 if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { 1093 rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); 1094 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, 1095 rgd->rd_bits[0].bi_bh->b_data); 1096 } 1097 else if (sdp->sd_args.ar_rgrplvb) { 1098 if (!gfs2_rgrp_lvb_valid(rgd)){ 1099 gfs2_consist_rgrpd(rgd); 1100 error = -EIO; 1101 goto fail; 1102 } 1103 if (rgd->rd_rgl->rl_unlinked == 0) 1104 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1105 } 1106 return 0; 1107 1108 fail: 1109 while (x--) { 1110 bi = rgd->rd_bits + x; 1111 brelse(bi->bi_bh); 1112 bi->bi_bh = NULL; 1113 gfs2_assert_warn(sdp, !bi->bi_clone); 1114 } 1115 1116 return error; 1117 } 1118 1119 int update_rgrp_lvb(struct gfs2_rgrpd *rgd) 1120 { 1121 u32 rl_flags; 1122 1123 if (rgd->rd_flags & GFS2_RDF_UPTODATE) 1124 return 0; 1125 1126 if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) 1127 return gfs2_rgrp_bh_get(rgd); 1128 1129 rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); 1130 rl_flags &= ~GFS2_RDF_MASK; 1131 rgd->rd_flags &= GFS2_RDF_MASK; 1132 rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); 1133 if (rgd->rd_rgl->rl_unlinked == 0) 1134 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1135 rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); 1136 rgd->rd_free_clone = rgd->rd_free; 1137 rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); 1138 rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration); 1139 return 0; 1140 } 1141 1142 int gfs2_rgrp_go_lock(struct gfs2_holder *gh) 1143 { 1144 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; 1145 struct gfs2_sbd *sdp = rgd->rd_sbd; 1146 1147 if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) 1148 return 0; 1149 return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object); 1150 } 1151 1152 /** 1153 * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() 1154 * @gh: The glock holder for the resource group 1155 * 1156 */ 1157 1158 void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) 1159 { 1160 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; 1161 int x, length = rgd->rd_length; 1162 1163 for (x = 0; x < length; x++) { 1164 struct gfs2_bitmap *bi = rgd->rd_bits + x; 1165 if (bi->bi_bh) { 1166 brelse(bi->bi_bh); 1167 bi->bi_bh = NULL; 1168 } 1169 } 1170 1171 } 1172 1173 int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 1174 struct buffer_head *bh, 1175 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed) 1176 { 1177 struct super_block *sb = sdp->sd_vfs; 1178 struct block_device *bdev = sb->s_bdev; 1179 const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize / 1180 bdev_logical_block_size(sb->s_bdev); 1181 u64 blk; 1182 sector_t start = 0; 1183 sector_t nr_sects = 0; 1184 int rv; 1185 unsigned int x; 1186 u32 trimmed = 0; 1187 u8 diff; 1188 1189 for (x = 0; x < bi->bi_len; x++) { 1190 const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data; 1191 clone += bi->bi_offset; 1192 clone += x; 1193 if (bh) { 1194 const u8 *orig = bh->b_data + bi->bi_offset + x; 1195 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); 1196 } else { 1197 diff = ~(*clone | (*clone >> 1)); 1198 } 1199 diff &= 0x55; 1200 if (diff == 0) 1201 continue; 1202 blk = offset + ((bi->bi_start + x) * GFS2_NBBY); 1203 blk *= sects_per_blk; /* convert to sectors */ 1204 while(diff) { 1205 if (diff & 1) { 1206 if (nr_sects == 0) 1207 goto start_new_extent; 1208 if ((start + nr_sects) != blk) { 1209 if (nr_sects >= minlen) { 1210 rv = blkdev_issue_discard(bdev, 1211 start, nr_sects, 1212 GFP_NOFS, 0); 1213 if (rv) 1214 goto fail; 1215 trimmed += nr_sects; 1216 } 1217 nr_sects = 0; 1218 start_new_extent: 1219 start = blk; 1220 } 1221 nr_sects += sects_per_blk; 1222 } 1223 diff >>= 2; 1224 blk += sects_per_blk; 1225 } 1226 } 1227 if (nr_sects >= minlen) { 1228 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); 1229 if (rv) 1230 goto fail; 1231 trimmed += nr_sects; 1232 } 1233 if (ptrimmed) 1234 *ptrimmed = trimmed; 1235 return 0; 1236 1237 fail: 1238 if (sdp->sd_args.ar_discard) 1239 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv); 1240 sdp->sd_args.ar_discard = 0; 1241 return -EIO; 1242 } 1243 1244 /** 1245 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem 1246 * @filp: Any file on the filesystem 1247 * @argp: Pointer to the arguments (also used to pass result) 1248 * 1249 * Returns: 0 on success, otherwise error code 1250 */ 1251 1252 int gfs2_fitrim(struct file *filp, void __user *argp) 1253 { 1254 struct inode *inode = filp->f_dentry->d_inode; 1255 struct gfs2_sbd *sdp = GFS2_SB(inode); 1256 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev); 1257 struct buffer_head *bh; 1258 struct gfs2_rgrpd *rgd; 1259 struct gfs2_rgrpd *rgd_end; 1260 struct gfs2_holder gh; 1261 struct fstrim_range r; 1262 int ret = 0; 1263 u64 amt; 1264 u64 trimmed = 0; 1265 u64 start, end, minlen; 1266 unsigned int x; 1267 unsigned bs_shift = sdp->sd_sb.sb_bsize_shift; 1268 1269 if (!capable(CAP_SYS_ADMIN)) 1270 return -EPERM; 1271 1272 if (!blk_queue_discard(q)) 1273 return -EOPNOTSUPP; 1274 1275 if (copy_from_user(&r, argp, sizeof(r))) 1276 return -EFAULT; 1277 1278 ret = gfs2_rindex_update(sdp); 1279 if (ret) 1280 return ret; 1281 1282 start = r.start >> bs_shift; 1283 end = start + (r.len >> bs_shift); 1284 minlen = max_t(u64, r.minlen, 1285 q->limits.discard_granularity) >> bs_shift; 1286 1287 rgd = gfs2_blk2rgrpd(sdp, start, 0); 1288 rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0); 1289 1290 if (end <= start || 1291 minlen > sdp->sd_max_rg_data || 1292 start > rgd_end->rd_data0 + rgd_end->rd_data) 1293 return -EINVAL; 1294 1295 while (1) { 1296 1297 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); 1298 if (ret) 1299 goto out; 1300 1301 if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) { 1302 /* Trim each bitmap in the rgrp */ 1303 for (x = 0; x < rgd->rd_length; x++) { 1304 struct gfs2_bitmap *bi = rgd->rd_bits + x; 1305 ret = gfs2_rgrp_send_discards(sdp, 1306 rgd->rd_data0, NULL, bi, minlen, 1307 &amt); 1308 if (ret) { 1309 gfs2_glock_dq_uninit(&gh); 1310 goto out; 1311 } 1312 trimmed += amt; 1313 } 1314 1315 /* Mark rgrp as having been trimmed */ 1316 ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0); 1317 if (ret == 0) { 1318 bh = rgd->rd_bits[0].bi_bh; 1319 rgd->rd_flags |= GFS2_RGF_TRIMMED; 1320 gfs2_trans_add_bh(rgd->rd_gl, bh, 1); 1321 gfs2_rgrp_out(rgd, bh->b_data); 1322 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); 1323 gfs2_trans_end(sdp); 1324 } 1325 } 1326 gfs2_glock_dq_uninit(&gh); 1327 1328 if (rgd == rgd_end) 1329 break; 1330 1331 rgd = gfs2_rgrpd_get_next(rgd); 1332 } 1333 1334 out: 1335 r.len = trimmed << 9; 1336 if (copy_to_user(argp, &r, sizeof(r))) 1337 return -EFAULT; 1338 1339 return ret; 1340 } 1341 1342 /** 1343 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree 1344 * @ip: the inode structure 1345 * 1346 */ 1347 static void rs_insert(struct gfs2_inode *ip) 1348 { 1349 struct rb_node **newn, *parent = NULL; 1350 int rc; 1351 struct gfs2_blkreserv *rs = ip->i_res; 1352 struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; 1353 u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); 1354 1355 BUG_ON(gfs2_rs_active(rs)); 1356 1357 spin_lock(&rgd->rd_rsspin); 1358 newn = &rgd->rd_rstree.rb_node; 1359 while (*newn) { 1360 struct gfs2_blkreserv *cur = 1361 rb_entry(*newn, struct gfs2_blkreserv, rs_node); 1362 1363 parent = *newn; 1364 rc = rs_cmp(fsblock, rs->rs_free, cur); 1365 if (rc > 0) 1366 newn = &((*newn)->rb_right); 1367 else if (rc < 0) 1368 newn = &((*newn)->rb_left); 1369 else { 1370 spin_unlock(&rgd->rd_rsspin); 1371 WARN_ON(1); 1372 return; 1373 } 1374 } 1375 1376 rb_link_node(&rs->rs_node, parent, newn); 1377 rb_insert_color(&rs->rs_node, &rgd->rd_rstree); 1378 1379 /* Do our rgrp accounting for the reservation */ 1380 rgd->rd_reserved += rs->rs_free; /* blocks reserved */ 1381 spin_unlock(&rgd->rd_rsspin); 1382 trace_gfs2_rs(rs, TRACE_RS_INSERT); 1383 } 1384 1385 /** 1386 * rg_mblk_search - find a group of multiple free blocks to form a reservation 1387 * @rgd: the resource group descriptor 1388 * @ip: pointer to the inode for which we're reserving blocks 1389 * @requested: number of blocks required for this allocation 1390 * 1391 */ 1392 1393 static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, 1394 unsigned requested) 1395 { 1396 struct gfs2_rbm rbm = { .rgd = rgd, }; 1397 u64 goal; 1398 struct gfs2_blkreserv *rs = ip->i_res; 1399 u32 extlen; 1400 u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; 1401 int ret; 1402 1403 extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested); 1404 extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); 1405 if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) 1406 return; 1407 1408 /* Find bitmap block that contains bits for goal block */ 1409 if (rgrp_contains_block(rgd, ip->i_goal)) 1410 goal = ip->i_goal; 1411 else 1412 goal = rgd->rd_last_alloc + rgd->rd_data0; 1413 1414 if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) 1415 return; 1416 1417 ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true); 1418 if (ret == 0) { 1419 rs->rs_rbm = rbm; 1420 rs->rs_free = extlen; 1421 rs->rs_inum = ip->i_no_addr; 1422 rs_insert(ip); 1423 } 1424 } 1425 1426 /** 1427 * gfs2_next_unreserved_block - Return next block that is not reserved 1428 * @rgd: The resource group 1429 * @block: The starting block 1430 * @length: The required length 1431 * @ip: Ignore any reservations for this inode 1432 * 1433 * If the block does not appear in any reservation, then return the 1434 * block number unchanged. If it does appear in the reservation, then 1435 * keep looking through the tree of reservations in order to find the 1436 * first block number which is not reserved. 1437 */ 1438 1439 static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, 1440 u32 length, 1441 const struct gfs2_inode *ip) 1442 { 1443 struct gfs2_blkreserv *rs; 1444 struct rb_node *n; 1445 int rc; 1446 1447 spin_lock(&rgd->rd_rsspin); 1448 n = rgd->rd_rstree.rb_node; 1449 while (n) { 1450 rs = rb_entry(n, struct gfs2_blkreserv, rs_node); 1451 rc = rs_cmp(block, length, rs); 1452 if (rc < 0) 1453 n = n->rb_left; 1454 else if (rc > 0) 1455 n = n->rb_right; 1456 else 1457 break; 1458 } 1459 1460 if (n) { 1461 while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) { 1462 block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; 1463 n = n->rb_right; 1464 if (n == NULL) 1465 break; 1466 rs = rb_entry(n, struct gfs2_blkreserv, rs_node); 1467 } 1468 } 1469 1470 spin_unlock(&rgd->rd_rsspin); 1471 return block; 1472 } 1473 1474 /** 1475 * gfs2_reservation_check_and_update - Check for reservations during block alloc 1476 * @rbm: The current position in the resource group 1477 * @ip: The inode for which we are searching for blocks 1478 * @minext: The minimum extent length 1479 * 1480 * This checks the current position in the rgrp to see whether there is 1481 * a reservation covering this block. If not then this function is a 1482 * no-op. If there is, then the position is moved to the end of the 1483 * contiguous reservation(s) so that we are pointing at the first 1484 * non-reserved block. 1485 * 1486 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error 1487 */ 1488 1489 static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, 1490 const struct gfs2_inode *ip, 1491 u32 minext) 1492 { 1493 u64 block = gfs2_rbm_to_block(rbm); 1494 u32 extlen = 1; 1495 u64 nblock; 1496 int ret; 1497 1498 /* 1499 * If we have a minimum extent length, then skip over any extent 1500 * which is less than the min extent length in size. 1501 */ 1502 if (minext) { 1503 extlen = gfs2_free_extlen(rbm, minext); 1504 nblock = block + extlen; 1505 if (extlen < minext) 1506 goto fail; 1507 } 1508 1509 /* 1510 * Check the extent which has been found against the reservations 1511 * and skip if parts of it are already reserved 1512 */ 1513 nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); 1514 if (nblock == block) 1515 return 0; 1516 fail: 1517 ret = gfs2_rbm_from_block(rbm, nblock); 1518 if (ret < 0) 1519 return ret; 1520 return 1; 1521 } 1522 1523 /** 1524 * gfs2_rbm_find - Look for blocks of a particular state 1525 * @rbm: Value/result starting position and final position 1526 * @state: The state which we want to find 1527 * @minext: The requested extent length (0 for a single block) 1528 * @ip: If set, check for reservations 1529 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping 1530 * around until we've reached the starting point. 1531 * 1532 * Side effects: 1533 * - If looking for free blocks, we set GBF_FULL on each bitmap which 1534 * has no free blocks in it. 1535 * 1536 * Returns: 0 on success, -ENOSPC if there is no block of the requested state 1537 */ 1538 1539 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, 1540 const struct gfs2_inode *ip, bool nowrap) 1541 { 1542 struct buffer_head *bh; 1543 struct gfs2_bitmap *initial_bi; 1544 u32 initial_offset; 1545 u32 offset; 1546 u8 *buffer; 1547 int index; 1548 int n = 0; 1549 int iters = rbm->rgd->rd_length; 1550 int ret; 1551 1552 /* If we are not starting at the beginning of a bitmap, then we 1553 * need to add one to the bitmap count to ensure that we search 1554 * the starting bitmap twice. 1555 */ 1556 if (rbm->offset != 0) 1557 iters++; 1558 1559 while(1) { 1560 if (test_bit(GBF_FULL, &rbm->bi->bi_flags) && 1561 (state == GFS2_BLKST_FREE)) 1562 goto next_bitmap; 1563 1564 bh = rbm->bi->bi_bh; 1565 buffer = bh->b_data + rbm->bi->bi_offset; 1566 WARN_ON(!buffer_uptodate(bh)); 1567 if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) 1568 buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; 1569 initial_offset = rbm->offset; 1570 offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); 1571 if (offset == BFITNOENT) 1572 goto bitmap_full; 1573 rbm->offset = offset; 1574 if (ip == NULL) 1575 return 0; 1576 1577 initial_bi = rbm->bi; 1578 ret = gfs2_reservation_check_and_update(rbm, ip, minext); 1579 if (ret == 0) 1580 return 0; 1581 if (ret > 0) { 1582 n += (rbm->bi - initial_bi); 1583 goto next_iter; 1584 } 1585 if (ret == -E2BIG) { 1586 index = 0; 1587 rbm->offset = 0; 1588 n += (rbm->bi - initial_bi); 1589 goto res_covered_end_of_rgrp; 1590 } 1591 return ret; 1592 1593 bitmap_full: /* Mark bitmap as full and fall through */ 1594 if ((state == GFS2_BLKST_FREE) && initial_offset == 0) 1595 set_bit(GBF_FULL, &rbm->bi->bi_flags); 1596 1597 next_bitmap: /* Find next bitmap in the rgrp */ 1598 rbm->offset = 0; 1599 index = rbm->bi - rbm->rgd->rd_bits; 1600 index++; 1601 if (index == rbm->rgd->rd_length) 1602 index = 0; 1603 res_covered_end_of_rgrp: 1604 rbm->bi = &rbm->rgd->rd_bits[index]; 1605 if ((index == 0) && nowrap) 1606 break; 1607 n++; 1608 next_iter: 1609 if (n >= iters) 1610 break; 1611 } 1612 1613 return -ENOSPC; 1614 } 1615 1616 /** 1617 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes 1618 * @rgd: The rgrp 1619 * @last_unlinked: block address of the last dinode we unlinked 1620 * @skip: block address we should explicitly not unlink 1621 * 1622 * Returns: 0 if no error 1623 * The inode, if one has been found, in inode. 1624 */ 1625 1626 static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) 1627 { 1628 u64 block; 1629 struct gfs2_sbd *sdp = rgd->rd_sbd; 1630 struct gfs2_glock *gl; 1631 struct gfs2_inode *ip; 1632 int error; 1633 int found = 0; 1634 struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 }; 1635 1636 while (1) { 1637 down_write(&sdp->sd_log_flush_lock); 1638 error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true); 1639 up_write(&sdp->sd_log_flush_lock); 1640 if (error == -ENOSPC) 1641 break; 1642 if (WARN_ON_ONCE(error)) 1643 break; 1644 1645 block = gfs2_rbm_to_block(&rbm); 1646 if (gfs2_rbm_from_block(&rbm, block + 1)) 1647 break; 1648 if (*last_unlinked != NO_BLOCK && block <= *last_unlinked) 1649 continue; 1650 if (block == skip) 1651 continue; 1652 *last_unlinked = block; 1653 1654 error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl); 1655 if (error) 1656 continue; 1657 1658 /* If the inode is already in cache, we can ignore it here 1659 * because the existing inode disposal code will deal with 1660 * it when all refs have gone away. Accessing gl_object like 1661 * this is not safe in general. Here it is ok because we do 1662 * not dereference the pointer, and we only need an approx 1663 * answer to whether it is NULL or not. 1664 */ 1665 ip = gl->gl_object; 1666 1667 if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) 1668 gfs2_glock_put(gl); 1669 else 1670 found++; 1671 1672 /* Limit reclaim to sensible number of tasks */ 1673 if (found > NR_CPUS) 1674 return; 1675 } 1676 1677 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1678 return; 1679 } 1680 1681 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) 1682 { 1683 struct gfs2_rgrpd *rgd = *pos; 1684 1685 rgd = gfs2_rgrpd_get_next(rgd); 1686 if (rgd == NULL) 1687 rgd = gfs2_rgrpd_get_next(NULL); 1688 *pos = rgd; 1689 if (rgd != begin) /* If we didn't wrap */ 1690 return true; 1691 return false; 1692 } 1693 1694 /** 1695 * gfs2_inplace_reserve - Reserve space in the filesystem 1696 * @ip: the inode to reserve space for 1697 * @requested: the number of blocks to be reserved 1698 * 1699 * Returns: errno 1700 */ 1701 1702 int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) 1703 { 1704 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1705 struct gfs2_rgrpd *begin = NULL; 1706 struct gfs2_blkreserv *rs = ip->i_res; 1707 int error = 0, rg_locked, flags = LM_FLAG_TRY; 1708 u64 last_unlinked = NO_BLOCK; 1709 int loops = 0; 1710 1711 if (sdp->sd_args.ar_rgrplvb) 1712 flags |= GL_SKIP; 1713 if (gfs2_assert_warn(sdp, requested)) 1714 return -EINVAL; 1715 if (gfs2_rs_active(rs)) { 1716 begin = rs->rs_rbm.rgd; 1717 flags = 0; /* Yoda: Do or do not. There is no try */ 1718 } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { 1719 rs->rs_rbm.rgd = begin = ip->i_rgd; 1720 } else { 1721 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); 1722 } 1723 if (rs->rs_rbm.rgd == NULL) 1724 return -EBADSLT; 1725 1726 while (loops < 3) { 1727 rg_locked = 1; 1728 1729 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { 1730 rg_locked = 0; 1731 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, 1732 LM_ST_EXCLUSIVE, flags, 1733 &rs->rs_rgd_gh); 1734 if (error == GLR_TRYFAILED) 1735 goto next_rgrp; 1736 if (unlikely(error)) 1737 return error; 1738 if (sdp->sd_args.ar_rgrplvb) { 1739 error = update_rgrp_lvb(rs->rs_rbm.rgd); 1740 if (unlikely(error)) { 1741 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1742 return error; 1743 } 1744 } 1745 } 1746 1747 /* Skip unuseable resource groups */ 1748 if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) 1749 goto skip_rgrp; 1750 1751 if (sdp->sd_args.ar_rgrplvb) 1752 gfs2_rgrp_bh_get(rs->rs_rbm.rgd); 1753 1754 /* Get a reservation if we don't already have one */ 1755 if (!gfs2_rs_active(rs)) 1756 rg_mblk_search(rs->rs_rbm.rgd, ip, requested); 1757 1758 /* Skip rgrps when we can't get a reservation on first pass */ 1759 if (!gfs2_rs_active(rs) && (loops < 1)) 1760 goto check_rgrp; 1761 1762 /* If rgrp has enough free space, use it */ 1763 if (rs->rs_rbm.rgd->rd_free_clone >= requested) { 1764 ip->i_rgd = rs->rs_rbm.rgd; 1765 return 0; 1766 } 1767 1768 /* Drop reservation, if we couldn't use reserved rgrp */ 1769 if (gfs2_rs_active(rs)) 1770 gfs2_rs_deltree(ip, rs); 1771 check_rgrp: 1772 /* Check for unlinked inodes which can be reclaimed */ 1773 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) 1774 try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, 1775 ip->i_no_addr); 1776 skip_rgrp: 1777 /* Unlock rgrp if required */ 1778 if (!rg_locked) 1779 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1780 next_rgrp: 1781 /* Find the next rgrp, and continue looking */ 1782 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) 1783 continue; 1784 1785 /* If we've scanned all the rgrps, but found no free blocks 1786 * then this checks for some less likely conditions before 1787 * trying again. 1788 */ 1789 flags &= ~LM_FLAG_TRY; 1790 loops++; 1791 /* Check that fs hasn't grown if writing to rindex */ 1792 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { 1793 error = gfs2_ri_update(ip); 1794 if (error) 1795 return error; 1796 } 1797 /* Flushing the log may release space */ 1798 if (loops == 2) 1799 gfs2_log_flush(sdp, NULL); 1800 } 1801 1802 return -ENOSPC; 1803 } 1804 1805 /** 1806 * gfs2_inplace_release - release an inplace reservation 1807 * @ip: the inode the reservation was taken out on 1808 * 1809 * Release a reservation made by gfs2_inplace_reserve(). 1810 */ 1811 1812 void gfs2_inplace_release(struct gfs2_inode *ip) 1813 { 1814 struct gfs2_blkreserv *rs = ip->i_res; 1815 1816 if (rs->rs_rgd_gh.gh_gl) 1817 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1818 } 1819 1820 /** 1821 * gfs2_get_block_type - Check a block in a RG is of given type 1822 * @rgd: the resource group holding the block 1823 * @block: the block number 1824 * 1825 * Returns: The block type (GFS2_BLKST_*) 1826 */ 1827 1828 static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) 1829 { 1830 struct gfs2_rbm rbm = { .rgd = rgd, }; 1831 int ret; 1832 1833 ret = gfs2_rbm_from_block(&rbm, block); 1834 WARN_ON_ONCE(ret != 0); 1835 1836 return gfs2_testbit(&rbm); 1837 } 1838 1839 1840 /** 1841 * gfs2_alloc_extent - allocate an extent from a given bitmap 1842 * @rbm: the resource group information 1843 * @dinode: TRUE if the first block we allocate is for a dinode 1844 * @n: The extent length (value/result) 1845 * 1846 * Add the bitmap buffer to the transaction. 1847 * Set the found bits to @new_state to change block's allocation state. 1848 */ 1849 static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, 1850 unsigned int *n) 1851 { 1852 struct gfs2_rbm pos = { .rgd = rbm->rgd, }; 1853 const unsigned int elen = *n; 1854 u64 block; 1855 int ret; 1856 1857 *n = 1; 1858 block = gfs2_rbm_to_block(rbm); 1859 gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); 1860 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 1861 block++; 1862 while (*n < elen) { 1863 ret = gfs2_rbm_from_block(&pos, block); 1864 if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) 1865 break; 1866 gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); 1867 gfs2_setbit(&pos, true, GFS2_BLKST_USED); 1868 (*n)++; 1869 block++; 1870 } 1871 } 1872 1873 /** 1874 * rgblk_free - Change alloc state of given block(s) 1875 * @sdp: the filesystem 1876 * @bstart: the start of a run of blocks to free 1877 * @blen: the length of the block run (all must lie within ONE RG!) 1878 * @new_state: GFS2_BLKST_XXX the after-allocation block state 1879 * 1880 * Returns: Resource group containing the block(s) 1881 */ 1882 1883 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, 1884 u32 blen, unsigned char new_state) 1885 { 1886 struct gfs2_rbm rbm; 1887 1888 rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); 1889 if (!rbm.rgd) { 1890 if (gfs2_consist(sdp)) 1891 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); 1892 return NULL; 1893 } 1894 1895 while (blen--) { 1896 gfs2_rbm_from_block(&rbm, bstart); 1897 bstart++; 1898 if (!rbm.bi->bi_clone) { 1899 rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size, 1900 GFP_NOFS | __GFP_NOFAIL); 1901 memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset, 1902 rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, 1903 rbm.bi->bi_len); 1904 } 1905 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); 1906 gfs2_setbit(&rbm, false, new_state); 1907 } 1908 1909 return rbm.rgd; 1910 } 1911 1912 /** 1913 * gfs2_rgrp_dump - print out an rgrp 1914 * @seq: The iterator 1915 * @gl: The glock in question 1916 * 1917 */ 1918 1919 int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) 1920 { 1921 struct gfs2_rgrpd *rgd = gl->gl_object; 1922 struct gfs2_blkreserv *trs; 1923 const struct rb_node *n; 1924 1925 if (rgd == NULL) 1926 return 0; 1927 gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n", 1928 (unsigned long long)rgd->rd_addr, rgd->rd_flags, 1929 rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, 1930 rgd->rd_reserved); 1931 spin_lock(&rgd->rd_rsspin); 1932 for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { 1933 trs = rb_entry(n, struct gfs2_blkreserv, rs_node); 1934 dump_rs(seq, trs); 1935 } 1936 spin_unlock(&rgd->rd_rsspin); 1937 return 0; 1938 } 1939 1940 static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) 1941 { 1942 struct gfs2_sbd *sdp = rgd->rd_sbd; 1943 fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", 1944 (unsigned long long)rgd->rd_addr); 1945 fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); 1946 gfs2_rgrp_dump(NULL, rgd->rd_gl); 1947 rgd->rd_flags |= GFS2_RDF_ERROR; 1948 } 1949 1950 /** 1951 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation 1952 * @ip: The inode we have just allocated blocks for 1953 * @rbm: The start of the allocated blocks 1954 * @len: The extent length 1955 * 1956 * Adjusts a reservation after an allocation has taken place. If the 1957 * reservation does not match the allocation, or if it is now empty 1958 * then it is removed. 1959 */ 1960 1961 static void gfs2_adjust_reservation(struct gfs2_inode *ip, 1962 const struct gfs2_rbm *rbm, unsigned len) 1963 { 1964 struct gfs2_blkreserv *rs = ip->i_res; 1965 struct gfs2_rgrpd *rgd = rbm->rgd; 1966 unsigned rlen; 1967 u64 block; 1968 int ret; 1969 1970 spin_lock(&rgd->rd_rsspin); 1971 if (gfs2_rs_active(rs)) { 1972 if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) { 1973 block = gfs2_rbm_to_block(rbm); 1974 ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len); 1975 rlen = min(rs->rs_free, len); 1976 rs->rs_free -= rlen; 1977 rgd->rd_reserved -= rlen; 1978 trace_gfs2_rs(rs, TRACE_RS_CLAIM); 1979 if (rs->rs_free && !ret) 1980 goto out; 1981 } 1982 __rs_deltree(ip, rs); 1983 } 1984 out: 1985 spin_unlock(&rgd->rd_rsspin); 1986 } 1987 1988 /** 1989 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode 1990 * @ip: the inode to allocate the block for 1991 * @bn: Used to return the starting block number 1992 * @nblocks: requested number of blocks/extent length (value/result) 1993 * @dinode: 1 if we're allocating a dinode block, else 0 1994 * @generation: the generation number of the inode 1995 * 1996 * Returns: 0 or error 1997 */ 1998 1999 int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, 2000 bool dinode, u64 *generation) 2001 { 2002 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2003 struct buffer_head *dibh; 2004 struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; 2005 unsigned int ndata; 2006 u64 goal; 2007 u64 block; /* block, within the file system scope */ 2008 int error; 2009 2010 if (gfs2_rs_active(ip->i_res)) 2011 goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm); 2012 else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) 2013 goal = ip->i_goal; 2014 else 2015 goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; 2016 2017 gfs2_rbm_from_block(&rbm, goal); 2018 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); 2019 2020 if (error == -ENOSPC) { 2021 gfs2_rbm_from_block(&rbm, goal); 2022 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); 2023 } 2024 2025 /* Since all blocks are reserved in advance, this shouldn't happen */ 2026 if (error) { 2027 fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n", 2028 (unsigned long long)ip->i_no_addr, error, *nblocks, 2029 test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); 2030 goto rgrp_error; 2031 } 2032 2033 gfs2_alloc_extent(&rbm, dinode, nblocks); 2034 block = gfs2_rbm_to_block(&rbm); 2035 rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; 2036 if (gfs2_rs_active(ip->i_res)) 2037 gfs2_adjust_reservation(ip, &rbm, *nblocks); 2038 ndata = *nblocks; 2039 if (dinode) 2040 ndata--; 2041 2042 if (!dinode) { 2043 ip->i_goal = block + ndata - 1; 2044 error = gfs2_meta_inode_buffer(ip, &dibh); 2045 if (error == 0) { 2046 struct gfs2_dinode *di = 2047 (struct gfs2_dinode *)dibh->b_data; 2048 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 2049 di->di_goal_meta = di->di_goal_data = 2050 cpu_to_be64(ip->i_goal); 2051 brelse(dibh); 2052 } 2053 } 2054 if (rbm.rgd->rd_free < *nblocks) { 2055 printk(KERN_WARNING "nblocks=%u\n", *nblocks); 2056 goto rgrp_error; 2057 } 2058 2059 rbm.rgd->rd_free -= *nblocks; 2060 if (dinode) { 2061 rbm.rgd->rd_dinodes++; 2062 *generation = rbm.rgd->rd_igeneration++; 2063 if (*generation == 0) 2064 *generation = rbm.rgd->rd_igeneration++; 2065 } 2066 2067 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1); 2068 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); 2069 gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); 2070 2071 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); 2072 if (dinode) 2073 gfs2_trans_add_unrevoke(sdp, block, 1); 2074 2075 /* 2076 * This needs reviewing to see why we cannot do the quota change 2077 * at this point in the dinode case. 2078 */ 2079 if (ndata) 2080 gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, 2081 ip->i_inode.i_gid); 2082 2083 rbm.rgd->rd_free_clone -= *nblocks; 2084 trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, 2085 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 2086 *bn = block; 2087 return 0; 2088 2089 rgrp_error: 2090 gfs2_rgrp_error(rbm.rgd); 2091 return -EIO; 2092 } 2093 2094 /** 2095 * __gfs2_free_blocks - free a contiguous run of block(s) 2096 * @ip: the inode these blocks are being freed from 2097 * @bstart: first block of a run of contiguous blocks 2098 * @blen: the length of the block run 2099 * @meta: 1 if the blocks represent metadata 2100 * 2101 */ 2102 2103 void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) 2104 { 2105 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2106 struct gfs2_rgrpd *rgd; 2107 2108 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); 2109 if (!rgd) 2110 return; 2111 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); 2112 rgd->rd_free += blen; 2113 rgd->rd_flags &= ~GFS2_RGF_TRIMMED; 2114 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2115 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2116 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2117 2118 /* Directories keep their data in the metadata address space */ 2119 if (meta || ip->i_depth) 2120 gfs2_meta_wipe(ip, bstart, blen); 2121 } 2122 2123 /** 2124 * gfs2_free_meta - free a contiguous run of data block(s) 2125 * @ip: the inode these blocks are being freed from 2126 * @bstart: first block of a run of contiguous blocks 2127 * @blen: the length of the block run 2128 * 2129 */ 2130 2131 void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) 2132 { 2133 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2134 2135 __gfs2_free_blocks(ip, bstart, blen, 1); 2136 gfs2_statfs_change(sdp, 0, +blen, 0); 2137 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 2138 } 2139 2140 void gfs2_unlink_di(struct inode *inode) 2141 { 2142 struct gfs2_inode *ip = GFS2_I(inode); 2143 struct gfs2_sbd *sdp = GFS2_SB(inode); 2144 struct gfs2_rgrpd *rgd; 2145 u64 blkno = ip->i_no_addr; 2146 2147 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); 2148 if (!rgd) 2149 return; 2150 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); 2151 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2152 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2153 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2154 update_rgrp_lvb_unlinked(rgd, 1); 2155 } 2156 2157 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) 2158 { 2159 struct gfs2_sbd *sdp = rgd->rd_sbd; 2160 struct gfs2_rgrpd *tmp_rgd; 2161 2162 tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE); 2163 if (!tmp_rgd) 2164 return; 2165 gfs2_assert_withdraw(sdp, rgd == tmp_rgd); 2166 2167 if (!rgd->rd_dinodes) 2168 gfs2_consist_rgrpd(rgd); 2169 rgd->rd_dinodes--; 2170 rgd->rd_free++; 2171 2172 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2173 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2174 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2175 update_rgrp_lvb_unlinked(rgd, -1); 2176 2177 gfs2_statfs_change(sdp, 0, +1, -1); 2178 } 2179 2180 2181 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 2182 { 2183 gfs2_free_uninit_di(rgd, ip->i_no_addr); 2184 trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); 2185 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); 2186 gfs2_meta_wipe(ip, ip->i_no_addr, 1); 2187 } 2188 2189 /** 2190 * gfs2_check_blk_type - Check the type of a block 2191 * @sdp: The superblock 2192 * @no_addr: The block number to check 2193 * @type: The block type we are looking for 2194 * 2195 * Returns: 0 if the block type matches the expected type 2196 * -ESTALE if it doesn't match 2197 * or -ve errno if something went wrong while checking 2198 */ 2199 2200 int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) 2201 { 2202 struct gfs2_rgrpd *rgd; 2203 struct gfs2_holder rgd_gh; 2204 int error = -EINVAL; 2205 2206 rgd = gfs2_blk2rgrpd(sdp, no_addr, 1); 2207 if (!rgd) 2208 goto fail; 2209 2210 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); 2211 if (error) 2212 goto fail; 2213 2214 if (gfs2_get_block_type(rgd, no_addr) != type) 2215 error = -ESTALE; 2216 2217 gfs2_glock_dq_uninit(&rgd_gh); 2218 fail: 2219 return error; 2220 } 2221 2222 /** 2223 * gfs2_rlist_add - add a RG to a list of RGs 2224 * @ip: the inode 2225 * @rlist: the list of resource groups 2226 * @block: the block 2227 * 2228 * Figure out what RG a block belongs to and add that RG to the list 2229 * 2230 * FIXME: Don't use NOFAIL 2231 * 2232 */ 2233 2234 void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, 2235 u64 block) 2236 { 2237 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2238 struct gfs2_rgrpd *rgd; 2239 struct gfs2_rgrpd **tmp; 2240 unsigned int new_space; 2241 unsigned int x; 2242 2243 if (gfs2_assert_warn(sdp, !rlist->rl_ghs)) 2244 return; 2245 2246 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block)) 2247 rgd = ip->i_rgd; 2248 else 2249 rgd = gfs2_blk2rgrpd(sdp, block, 1); 2250 if (!rgd) { 2251 fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block); 2252 return; 2253 } 2254 ip->i_rgd = rgd; 2255 2256 for (x = 0; x < rlist->rl_rgrps; x++) 2257 if (rlist->rl_rgd[x] == rgd) 2258 return; 2259 2260 if (rlist->rl_rgrps == rlist->rl_space) { 2261 new_space = rlist->rl_space + 10; 2262 2263 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *), 2264 GFP_NOFS | __GFP_NOFAIL); 2265 2266 if (rlist->rl_rgd) { 2267 memcpy(tmp, rlist->rl_rgd, 2268 rlist->rl_space * sizeof(struct gfs2_rgrpd *)); 2269 kfree(rlist->rl_rgd); 2270 } 2271 2272 rlist->rl_space = new_space; 2273 rlist->rl_rgd = tmp; 2274 } 2275 2276 rlist->rl_rgd[rlist->rl_rgrps++] = rgd; 2277 } 2278 2279 /** 2280 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate 2281 * and initialize an array of glock holders for them 2282 * @rlist: the list of resource groups 2283 * @state: the lock state to acquire the RG lock in 2284 * 2285 * FIXME: Don't use NOFAIL 2286 * 2287 */ 2288 2289 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state) 2290 { 2291 unsigned int x; 2292 2293 rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder), 2294 GFP_NOFS | __GFP_NOFAIL); 2295 for (x = 0; x < rlist->rl_rgrps; x++) 2296 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, 2297 state, 0, 2298 &rlist->rl_ghs[x]); 2299 } 2300 2301 /** 2302 * gfs2_rlist_free - free a resource group list 2303 * @list: the list of resource groups 2304 * 2305 */ 2306 2307 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) 2308 { 2309 unsigned int x; 2310 2311 kfree(rlist->rl_rgd); 2312 2313 if (rlist->rl_ghs) { 2314 for (x = 0; x < rlist->rl_rgrps; x++) 2315 gfs2_holder_uninit(&rlist->rl_ghs[x]); 2316 kfree(rlist->rl_ghs); 2317 rlist->rl_ghs = NULL; 2318 } 2319 } 2320 2321