1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12 #include <linux/slab.h> 13 #include <linux/spinlock.h> 14 #include <linux/completion.h> 15 #include <linux/buffer_head.h> 16 #include <linux/fs.h> 17 #include <linux/gfs2_ondisk.h> 18 #include <linux/prefetch.h> 19 #include <linux/blkdev.h> 20 #include <linux/rbtree.h> 21 #include <linux/random.h> 22 23 #include "gfs2.h" 24 #include "incore.h" 25 #include "glock.h" 26 #include "glops.h" 27 #include "lops.h" 28 #include "meta_io.h" 29 #include "quota.h" 30 #include "rgrp.h" 31 #include "super.h" 32 #include "trans.h" 33 #include "util.h" 34 #include "log.h" 35 #include "inode.h" 36 #include "trace_gfs2.h" 37 #include "dir.h" 38 39 #define BFITNOENT ((u32)~0) 40 #define NO_BLOCK ((u64)~0) 41 42 #if BITS_PER_LONG == 32 43 #define LBITMASK (0x55555555UL) 44 #define LBITSKIP55 (0x55555555UL) 45 #define LBITSKIP00 (0x00000000UL) 46 #else 47 #define LBITMASK (0x5555555555555555UL) 48 #define LBITSKIP55 (0x5555555555555555UL) 49 #define LBITSKIP00 (0x0000000000000000UL) 50 #endif 51 52 /* 53 * These routines are used by the resource group routines (rgrp.c) 54 * to keep track of block allocation. Each block is represented by two 55 * bits. So, each byte represents GFS2_NBBY (i.e. 4) blocks. 56 * 57 * 0 = Free 58 * 1 = Used (not metadata) 59 * 2 = Unlinked (still in use) inode 60 * 3 = Used (metadata) 61 */ 62 63 struct gfs2_extent { 64 struct gfs2_rbm rbm; 65 u32 len; 66 }; 67 68 static const char valid_change[16] = { 69 /* current */ 70 /* n */ 0, 1, 1, 1, 71 /* e */ 1, 0, 0, 0, 72 /* w */ 0, 0, 0, 1, 73 1, 0, 0, 0 74 }; 75 76 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, 77 const struct gfs2_inode *ip, bool nowrap); 78 79 80 /** 81 * gfs2_setbit - Set a bit in the bitmaps 82 * @rbm: The position of the bit to set 83 * @do_clone: Also set the clone bitmap, if it exists 84 * @new_state: the new state of the block 85 * 86 */ 87 88 static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, 89 unsigned char new_state) 90 { 91 unsigned char *byte1, *byte2, *end, cur_state; 92 struct gfs2_bitmap *bi = rbm_bi(rbm); 93 unsigned int buflen = bi->bi_len; 94 const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; 95 96 byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY); 97 end = bi->bi_bh->b_data + bi->bi_offset + buflen; 98 99 BUG_ON(byte1 >= end); 100 101 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; 102 103 if (unlikely(!valid_change[new_state * 4 + cur_state])) { 104 pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n", 105 rbm->offset, cur_state, new_state); 106 pr_warn("rgrp=0x%llx bi_start=0x%x\n", 107 (unsigned long long)rbm->rgd->rd_addr, bi->bi_start); 108 pr_warn("bi_offset=0x%x bi_len=0x%x\n", 109 bi->bi_offset, bi->bi_len); 110 dump_stack(); 111 gfs2_consist_rgrpd(rbm->rgd); 112 return; 113 } 114 *byte1 ^= (cur_state ^ new_state) << bit; 115 116 if (do_clone && bi->bi_clone) { 117 byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY); 118 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; 119 *byte2 ^= (cur_state ^ new_state) << bit; 120 } 121 } 122 123 /** 124 * gfs2_testbit - test a bit in the bitmaps 125 * @rbm: The bit to test 126 * @use_clone: If true, test the clone bitmap, not the official bitmap. 127 * 128 * Some callers like gfs2_unaligned_extlen need to test the clone bitmaps, 129 * not the "real" bitmaps, to avoid allocating recently freed blocks. 130 * 131 * Returns: The two bit block state of the requested bit 132 */ 133 134 static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone) 135 { 136 struct gfs2_bitmap *bi = rbm_bi(rbm); 137 const u8 *buffer; 138 const u8 *byte; 139 unsigned int bit; 140 141 if (use_clone && bi->bi_clone) 142 buffer = bi->bi_clone; 143 else 144 buffer = bi->bi_bh->b_data; 145 buffer += bi->bi_offset; 146 byte = buffer + (rbm->offset / GFS2_NBBY); 147 bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; 148 149 return (*byte >> bit) & GFS2_BIT_MASK; 150 } 151 152 /** 153 * gfs2_bit_search 154 * @ptr: Pointer to bitmap data 155 * @mask: Mask to use (normally 0x55555.... but adjusted for search start) 156 * @state: The state we are searching for 157 * 158 * We xor the bitmap data with a patter which is the bitwise opposite 159 * of what we are looking for, this gives rise to a pattern of ones 160 * wherever there is a match. Since we have two bits per entry, we 161 * take this pattern, shift it down by one place and then and it with 162 * the original. All the even bit positions (0,2,4, etc) then represent 163 * successful matches, so we mask with 0x55555..... to remove the unwanted 164 * odd bit positions. 165 * 166 * This allows searching of a whole u64 at once (32 blocks) with a 167 * single test (on 64 bit arches). 168 */ 169 170 static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) 171 { 172 u64 tmp; 173 static const u64 search[] = { 174 [0] = 0xffffffffffffffffULL, 175 [1] = 0xaaaaaaaaaaaaaaaaULL, 176 [2] = 0x5555555555555555ULL, 177 [3] = 0x0000000000000000ULL, 178 }; 179 tmp = le64_to_cpu(*ptr) ^ search[state]; 180 tmp &= (tmp >> 1); 181 tmp &= mask; 182 return tmp; 183 } 184 185 /** 186 * rs_cmp - multi-block reservation range compare 187 * @blk: absolute file system block number of the new reservation 188 * @len: number of blocks in the new reservation 189 * @rs: existing reservation to compare against 190 * 191 * returns: 1 if the block range is beyond the reach of the reservation 192 * -1 if the block range is before the start of the reservation 193 * 0 if the block range overlaps with the reservation 194 */ 195 static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) 196 { 197 u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm); 198 199 if (blk >= startblk + rs->rs_free) 200 return 1; 201 if (blk + len - 1 < startblk) 202 return -1; 203 return 0; 204 } 205 206 /** 207 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing 208 * a block in a given allocation state. 209 * @buf: the buffer that holds the bitmaps 210 * @len: the length (in bytes) of the buffer 211 * @goal: start search at this block's bit-pair (within @buffer) 212 * @state: GFS2_BLKST_XXX the state of the block we're looking for. 213 * 214 * Scope of @goal and returned block number is only within this bitmap buffer, 215 * not entire rgrp or filesystem. @buffer will be offset from the actual 216 * beginning of a bitmap block buffer, skipping any header structures, but 217 * headers are always a multiple of 64 bits long so that the buffer is 218 * always aligned to a 64 bit boundary. 219 * 220 * The size of the buffer is in bytes, but is it assumed that it is 221 * always ok to read a complete multiple of 64 bits at the end 222 * of the block in case the end is no aligned to a natural boundary. 223 * 224 * Return: the block number (bitmap buffer scope) that was found 225 */ 226 227 static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, 228 u32 goal, u8 state) 229 { 230 u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1); 231 const __le64 *ptr = ((__le64 *)buf) + (goal >> 5); 232 const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64))); 233 u64 tmp; 234 u64 mask = 0x5555555555555555ULL; 235 u32 bit; 236 237 /* Mask off bits we don't care about at the start of the search */ 238 mask <<= spoint; 239 tmp = gfs2_bit_search(ptr, mask, state); 240 ptr++; 241 while(tmp == 0 && ptr < end) { 242 tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state); 243 ptr++; 244 } 245 /* Mask off any bits which are more than len bytes from the start */ 246 if (ptr == end && (len & (sizeof(u64) - 1))) 247 tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1)))); 248 /* Didn't find anything, so return */ 249 if (tmp == 0) 250 return BFITNOENT; 251 ptr--; 252 bit = __ffs64(tmp); 253 bit /= 2; /* two bits per entry in the bitmap */ 254 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; 255 } 256 257 /** 258 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number 259 * @rbm: The rbm with rgd already set correctly 260 * @block: The block number (filesystem relative) 261 * 262 * This sets the bi and offset members of an rbm based on a 263 * resource group and a filesystem relative block number. The 264 * resource group must be set in the rbm on entry, the bi and 265 * offset members will be set by this function. 266 * 267 * Returns: 0 on success, or an error code 268 */ 269 270 static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) 271 { 272 u64 rblock = block - rbm->rgd->rd_data0; 273 274 if (WARN_ON_ONCE(rblock > UINT_MAX)) 275 return -EINVAL; 276 if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) 277 return -E2BIG; 278 279 rbm->bii = 0; 280 rbm->offset = (u32)(rblock); 281 /* Check if the block is within the first block */ 282 if (rbm->offset < rbm_bi(rbm)->bi_blocks) 283 return 0; 284 285 /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ 286 rbm->offset += (sizeof(struct gfs2_rgrp) - 287 sizeof(struct gfs2_meta_header)) * GFS2_NBBY; 288 rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; 289 rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; 290 return 0; 291 } 292 293 /** 294 * gfs2_rbm_incr - increment an rbm structure 295 * @rbm: The rbm with rgd already set correctly 296 * 297 * This function takes an existing rbm structure and increments it to the next 298 * viable block offset. 299 * 300 * Returns: If incrementing the offset would cause the rbm to go past the 301 * end of the rgrp, true is returned, otherwise false. 302 * 303 */ 304 305 static bool gfs2_rbm_incr(struct gfs2_rbm *rbm) 306 { 307 if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */ 308 rbm->offset++; 309 return false; 310 } 311 if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */ 312 return true; 313 314 rbm->offset = 0; 315 rbm->bii++; 316 return false; 317 } 318 319 /** 320 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned 321 * @rbm: Position to search (value/result) 322 * @n_unaligned: Number of unaligned blocks to check 323 * @len: Decremented for each block found (terminate on zero) 324 * 325 * Returns: true if a non-free block is encountered 326 */ 327 328 static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) 329 { 330 u32 n; 331 u8 res; 332 333 for (n = 0; n < n_unaligned; n++) { 334 res = gfs2_testbit(rbm, true); 335 if (res != GFS2_BLKST_FREE) 336 return true; 337 (*len)--; 338 if (*len == 0) 339 return true; 340 if (gfs2_rbm_incr(rbm)) 341 return true; 342 } 343 344 return false; 345 } 346 347 /** 348 * gfs2_free_extlen - Return extent length of free blocks 349 * @rrbm: Starting position 350 * @len: Max length to check 351 * 352 * Starting at the block specified by the rbm, see how many free blocks 353 * there are, not reading more than len blocks ahead. This can be done 354 * using memchr_inv when the blocks are byte aligned, but has to be done 355 * on a block by block basis in case of unaligned blocks. Also this 356 * function can cope with bitmap boundaries (although it must stop on 357 * a resource group boundary) 358 * 359 * Returns: Number of free blocks in the extent 360 */ 361 362 static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) 363 { 364 struct gfs2_rbm rbm = *rrbm; 365 u32 n_unaligned = rbm.offset & 3; 366 u32 size = len; 367 u32 bytes; 368 u32 chunk_size; 369 u8 *ptr, *start, *end; 370 u64 block; 371 struct gfs2_bitmap *bi; 372 373 if (n_unaligned && 374 gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) 375 goto out; 376 377 n_unaligned = len & 3; 378 /* Start is now byte aligned */ 379 while (len > 3) { 380 bi = rbm_bi(&rbm); 381 start = bi->bi_bh->b_data; 382 if (bi->bi_clone) 383 start = bi->bi_clone; 384 start += bi->bi_offset; 385 end = start + bi->bi_len; 386 BUG_ON(rbm.offset & 3); 387 start += (rbm.offset / GFS2_NBBY); 388 bytes = min_t(u32, len / GFS2_NBBY, (end - start)); 389 ptr = memchr_inv(start, 0, bytes); 390 chunk_size = ((ptr == NULL) ? bytes : (ptr - start)); 391 chunk_size *= GFS2_NBBY; 392 BUG_ON(len < chunk_size); 393 len -= chunk_size; 394 block = gfs2_rbm_to_block(&rbm); 395 if (gfs2_rbm_from_block(&rbm, block + chunk_size)) { 396 n_unaligned = 0; 397 break; 398 } 399 if (ptr) { 400 n_unaligned = 3; 401 break; 402 } 403 n_unaligned = len & 3; 404 } 405 406 /* Deal with any bits left over at the end */ 407 if (n_unaligned) 408 gfs2_unaligned_extlen(&rbm, n_unaligned, &len); 409 out: 410 return size - len; 411 } 412 413 /** 414 * gfs2_bitcount - count the number of bits in a certain state 415 * @rgd: the resource group descriptor 416 * @buffer: the buffer that holds the bitmaps 417 * @buflen: the length (in bytes) of the buffer 418 * @state: the state of the block we're looking for 419 * 420 * Returns: The number of bits 421 */ 422 423 static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer, 424 unsigned int buflen, u8 state) 425 { 426 const u8 *byte = buffer; 427 const u8 *end = buffer + buflen; 428 const u8 state1 = state << 2; 429 const u8 state2 = state << 4; 430 const u8 state3 = state << 6; 431 u32 count = 0; 432 433 for (; byte < end; byte++) { 434 if (((*byte) & 0x03) == state) 435 count++; 436 if (((*byte) & 0x0C) == state1) 437 count++; 438 if (((*byte) & 0x30) == state2) 439 count++; 440 if (((*byte) & 0xC0) == state3) 441 count++; 442 } 443 444 return count; 445 } 446 447 /** 448 * gfs2_rgrp_verify - Verify that a resource group is consistent 449 * @rgd: the rgrp 450 * 451 */ 452 453 void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) 454 { 455 struct gfs2_sbd *sdp = rgd->rd_sbd; 456 struct gfs2_bitmap *bi = NULL; 457 u32 length = rgd->rd_length; 458 u32 count[4], tmp; 459 int buf, x; 460 461 memset(count, 0, 4 * sizeof(u32)); 462 463 /* Count # blocks in each of 4 possible allocation states */ 464 for (buf = 0; buf < length; buf++) { 465 bi = rgd->rd_bits + buf; 466 for (x = 0; x < 4; x++) 467 count[x] += gfs2_bitcount(rgd, 468 bi->bi_bh->b_data + 469 bi->bi_offset, 470 bi->bi_len, x); 471 } 472 473 if (count[0] != rgd->rd_free) { 474 if (gfs2_consist_rgrpd(rgd)) 475 fs_err(sdp, "free data mismatch: %u != %u\n", 476 count[0], rgd->rd_free); 477 return; 478 } 479 480 tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes; 481 if (count[1] != tmp) { 482 if (gfs2_consist_rgrpd(rgd)) 483 fs_err(sdp, "used data mismatch: %u != %u\n", 484 count[1], tmp); 485 return; 486 } 487 488 if (count[2] + count[3] != rgd->rd_dinodes) { 489 if (gfs2_consist_rgrpd(rgd)) 490 fs_err(sdp, "used metadata mismatch: %u != %u\n", 491 count[2] + count[3], rgd->rd_dinodes); 492 return; 493 } 494 } 495 496 /** 497 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number 498 * @sdp: The GFS2 superblock 499 * @blk: The data block number 500 * @exact: True if this needs to be an exact match 501 * 502 * The @exact argument should be set to true by most callers. The exception 503 * is when we need to match blocks which are not represented by the rgrp 504 * bitmap, but which are part of the rgrp (i.e. padding blocks) which are 505 * there for alignment purposes. Another way of looking at it is that @exact 506 * matches only valid data/metadata blocks, but with @exact false, it will 507 * match any block within the extent of the rgrp. 508 * 509 * Returns: The resource group, or NULL if not found 510 */ 511 512 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact) 513 { 514 struct rb_node *n, *next; 515 struct gfs2_rgrpd *cur; 516 517 spin_lock(&sdp->sd_rindex_spin); 518 n = sdp->sd_rindex_tree.rb_node; 519 while (n) { 520 cur = rb_entry(n, struct gfs2_rgrpd, rd_node); 521 next = NULL; 522 if (blk < cur->rd_addr) 523 next = n->rb_left; 524 else if (blk >= cur->rd_data0 + cur->rd_data) 525 next = n->rb_right; 526 if (next == NULL) { 527 spin_unlock(&sdp->sd_rindex_spin); 528 if (exact) { 529 if (blk < cur->rd_addr) 530 return NULL; 531 if (blk >= cur->rd_data0 + cur->rd_data) 532 return NULL; 533 } 534 return cur; 535 } 536 n = next; 537 } 538 spin_unlock(&sdp->sd_rindex_spin); 539 540 return NULL; 541 } 542 543 /** 544 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem 545 * @sdp: The GFS2 superblock 546 * 547 * Returns: The first rgrp in the filesystem 548 */ 549 550 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp) 551 { 552 const struct rb_node *n; 553 struct gfs2_rgrpd *rgd; 554 555 spin_lock(&sdp->sd_rindex_spin); 556 n = rb_first(&sdp->sd_rindex_tree); 557 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 558 spin_unlock(&sdp->sd_rindex_spin); 559 560 return rgd; 561 } 562 563 /** 564 * gfs2_rgrpd_get_next - get the next RG 565 * @rgd: the resource group descriptor 566 * 567 * Returns: The next rgrp 568 */ 569 570 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd) 571 { 572 struct gfs2_sbd *sdp = rgd->rd_sbd; 573 const struct rb_node *n; 574 575 spin_lock(&sdp->sd_rindex_spin); 576 n = rb_next(&rgd->rd_node); 577 if (n == NULL) 578 n = rb_first(&sdp->sd_rindex_tree); 579 580 if (unlikely(&rgd->rd_node == n)) { 581 spin_unlock(&sdp->sd_rindex_spin); 582 return NULL; 583 } 584 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 585 spin_unlock(&sdp->sd_rindex_spin); 586 return rgd; 587 } 588 589 void check_and_update_goal(struct gfs2_inode *ip) 590 { 591 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 592 if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL) 593 ip->i_goal = ip->i_no_addr; 594 } 595 596 void gfs2_free_clones(struct gfs2_rgrpd *rgd) 597 { 598 int x; 599 600 for (x = 0; x < rgd->rd_length; x++) { 601 struct gfs2_bitmap *bi = rgd->rd_bits + x; 602 kfree(bi->bi_clone); 603 bi->bi_clone = NULL; 604 } 605 } 606 607 /** 608 * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode 609 * plus a quota allocations data structure, if necessary 610 * @ip: the inode for this reservation 611 */ 612 int gfs2_rsqa_alloc(struct gfs2_inode *ip) 613 { 614 return gfs2_qa_alloc(ip); 615 } 616 617 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) 618 { 619 struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res); 620 621 gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", 622 (unsigned long long)ip->i_no_addr, 623 (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), 624 rs->rs_rbm.offset, rs->rs_free); 625 } 626 627 /** 628 * __rs_deltree - remove a multi-block reservation from the rgd tree 629 * @rs: The reservation to remove 630 * 631 */ 632 static void __rs_deltree(struct gfs2_blkreserv *rs) 633 { 634 struct gfs2_rgrpd *rgd; 635 636 if (!gfs2_rs_active(rs)) 637 return; 638 639 rgd = rs->rs_rbm.rgd; 640 trace_gfs2_rs(rs, TRACE_RS_TREEDEL); 641 rb_erase(&rs->rs_node, &rgd->rd_rstree); 642 RB_CLEAR_NODE(&rs->rs_node); 643 644 if (rs->rs_free) { 645 struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm); 646 647 /* return reserved blocks to the rgrp */ 648 BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); 649 rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; 650 /* The rgrp extent failure point is likely not to increase; 651 it will only do so if the freed blocks are somehow 652 contiguous with a span of free blocks that follows. Still, 653 it will force the number to be recalculated later. */ 654 rgd->rd_extfail_pt += rs->rs_free; 655 rs->rs_free = 0; 656 clear_bit(GBF_FULL, &bi->bi_flags); 657 } 658 } 659 660 /** 661 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree 662 * @rs: The reservation to remove 663 * 664 */ 665 void gfs2_rs_deltree(struct gfs2_blkreserv *rs) 666 { 667 struct gfs2_rgrpd *rgd; 668 669 rgd = rs->rs_rbm.rgd; 670 if (rgd) { 671 spin_lock(&rgd->rd_rsspin); 672 __rs_deltree(rs); 673 BUG_ON(rs->rs_free); 674 spin_unlock(&rgd->rd_rsspin); 675 } 676 } 677 678 /** 679 * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation 680 * @ip: The inode for this reservation 681 * @wcount: The inode's write count, or NULL 682 * 683 */ 684 void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount) 685 { 686 down_write(&ip->i_rw_mutex); 687 if ((wcount == NULL) || (atomic_read(wcount) <= 1)) 688 gfs2_rs_deltree(&ip->i_res); 689 up_write(&ip->i_rw_mutex); 690 gfs2_qa_delete(ip, wcount); 691 } 692 693 /** 694 * return_all_reservations - return all reserved blocks back to the rgrp. 695 * @rgd: the rgrp that needs its space back 696 * 697 * We previously reserved a bunch of blocks for allocation. Now we need to 698 * give them back. This leave the reservation structures in tact, but removes 699 * all of their corresponding "no-fly zones". 700 */ 701 static void return_all_reservations(struct gfs2_rgrpd *rgd) 702 { 703 struct rb_node *n; 704 struct gfs2_blkreserv *rs; 705 706 spin_lock(&rgd->rd_rsspin); 707 while ((n = rb_first(&rgd->rd_rstree))) { 708 rs = rb_entry(n, struct gfs2_blkreserv, rs_node); 709 __rs_deltree(rs); 710 } 711 spin_unlock(&rgd->rd_rsspin); 712 } 713 714 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) 715 { 716 struct rb_node *n; 717 struct gfs2_rgrpd *rgd; 718 struct gfs2_glock *gl; 719 720 while ((n = rb_first(&sdp->sd_rindex_tree))) { 721 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 722 gl = rgd->rd_gl; 723 724 rb_erase(n, &sdp->sd_rindex_tree); 725 726 if (gl) { 727 glock_clear_object(gl, rgd); 728 gfs2_glock_put(gl); 729 } 730 731 gfs2_free_clones(rgd); 732 kfree(rgd->rd_bits); 733 rgd->rd_bits = NULL; 734 return_all_reservations(rgd); 735 kmem_cache_free(gfs2_rgrpd_cachep, rgd); 736 } 737 } 738 739 static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) 740 { 741 pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); 742 pr_info("ri_length = %u\n", rgd->rd_length); 743 pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); 744 pr_info("ri_data = %u\n", rgd->rd_data); 745 pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes); 746 } 747 748 /** 749 * gfs2_compute_bitstructs - Compute the bitmap sizes 750 * @rgd: The resource group descriptor 751 * 752 * Calculates bitmap descriptors, one for each block that contains bitmap data 753 * 754 * Returns: errno 755 */ 756 757 static int compute_bitstructs(struct gfs2_rgrpd *rgd) 758 { 759 struct gfs2_sbd *sdp = rgd->rd_sbd; 760 struct gfs2_bitmap *bi; 761 u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */ 762 u32 bytes_left, bytes; 763 int x; 764 765 if (!length) 766 return -EINVAL; 767 768 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS); 769 if (!rgd->rd_bits) 770 return -ENOMEM; 771 772 bytes_left = rgd->rd_bitbytes; 773 774 for (x = 0; x < length; x++) { 775 bi = rgd->rd_bits + x; 776 777 bi->bi_flags = 0; 778 /* small rgrp; bitmap stored completely in header block */ 779 if (length == 1) { 780 bytes = bytes_left; 781 bi->bi_offset = sizeof(struct gfs2_rgrp); 782 bi->bi_start = 0; 783 bi->bi_len = bytes; 784 bi->bi_blocks = bytes * GFS2_NBBY; 785 /* header block */ 786 } else if (x == 0) { 787 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp); 788 bi->bi_offset = sizeof(struct gfs2_rgrp); 789 bi->bi_start = 0; 790 bi->bi_len = bytes; 791 bi->bi_blocks = bytes * GFS2_NBBY; 792 /* last block */ 793 } else if (x + 1 == length) { 794 bytes = bytes_left; 795 bi->bi_offset = sizeof(struct gfs2_meta_header); 796 bi->bi_start = rgd->rd_bitbytes - bytes_left; 797 bi->bi_len = bytes; 798 bi->bi_blocks = bytes * GFS2_NBBY; 799 /* other blocks */ 800 } else { 801 bytes = sdp->sd_sb.sb_bsize - 802 sizeof(struct gfs2_meta_header); 803 bi->bi_offset = sizeof(struct gfs2_meta_header); 804 bi->bi_start = rgd->rd_bitbytes - bytes_left; 805 bi->bi_len = bytes; 806 bi->bi_blocks = bytes * GFS2_NBBY; 807 } 808 809 bytes_left -= bytes; 810 } 811 812 if (bytes_left) { 813 gfs2_consist_rgrpd(rgd); 814 return -EIO; 815 } 816 bi = rgd->rd_bits + (length - 1); 817 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) { 818 if (gfs2_consist_rgrpd(rgd)) { 819 gfs2_rindex_print(rgd); 820 fs_err(sdp, "start=%u len=%u offset=%u\n", 821 bi->bi_start, bi->bi_len, bi->bi_offset); 822 } 823 return -EIO; 824 } 825 826 return 0; 827 } 828 829 /** 830 * gfs2_ri_total - Total up the file system space, according to the rindex. 831 * @sdp: the filesystem 832 * 833 */ 834 u64 gfs2_ri_total(struct gfs2_sbd *sdp) 835 { 836 u64 total_data = 0; 837 struct inode *inode = sdp->sd_rindex; 838 struct gfs2_inode *ip = GFS2_I(inode); 839 char buf[sizeof(struct gfs2_rindex)]; 840 int error, rgrps; 841 842 for (rgrps = 0;; rgrps++) { 843 loff_t pos = rgrps * sizeof(struct gfs2_rindex); 844 845 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode)) 846 break; 847 error = gfs2_internal_read(ip, buf, &pos, 848 sizeof(struct gfs2_rindex)); 849 if (error != sizeof(struct gfs2_rindex)) 850 break; 851 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data); 852 } 853 return total_data; 854 } 855 856 static int rgd_insert(struct gfs2_rgrpd *rgd) 857 { 858 struct gfs2_sbd *sdp = rgd->rd_sbd; 859 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL; 860 861 /* Figure out where to put new node */ 862 while (*newn) { 863 struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd, 864 rd_node); 865 866 parent = *newn; 867 if (rgd->rd_addr < cur->rd_addr) 868 newn = &((*newn)->rb_left); 869 else if (rgd->rd_addr > cur->rd_addr) 870 newn = &((*newn)->rb_right); 871 else 872 return -EEXIST; 873 } 874 875 rb_link_node(&rgd->rd_node, parent, newn); 876 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree); 877 sdp->sd_rgrps++; 878 return 0; 879 } 880 881 /** 882 * read_rindex_entry - Pull in a new resource index entry from the disk 883 * @ip: Pointer to the rindex inode 884 * 885 * Returns: 0 on success, > 0 on EOF, error code otherwise 886 */ 887 888 static int read_rindex_entry(struct gfs2_inode *ip) 889 { 890 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 891 const unsigned bsize = sdp->sd_sb.sb_bsize; 892 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); 893 struct gfs2_rindex buf; 894 int error; 895 struct gfs2_rgrpd *rgd; 896 897 if (pos >= i_size_read(&ip->i_inode)) 898 return 1; 899 900 error = gfs2_internal_read(ip, (char *)&buf, &pos, 901 sizeof(struct gfs2_rindex)); 902 903 if (error != sizeof(struct gfs2_rindex)) 904 return (error == 0) ? 1 : error; 905 906 rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS); 907 error = -ENOMEM; 908 if (!rgd) 909 return error; 910 911 rgd->rd_sbd = sdp; 912 rgd->rd_addr = be64_to_cpu(buf.ri_addr); 913 rgd->rd_length = be32_to_cpu(buf.ri_length); 914 rgd->rd_data0 = be64_to_cpu(buf.ri_data0); 915 rgd->rd_data = be32_to_cpu(buf.ri_data); 916 rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); 917 spin_lock_init(&rgd->rd_rsspin); 918 919 error = compute_bitstructs(rgd); 920 if (error) 921 goto fail; 922 923 error = gfs2_glock_get(sdp, rgd->rd_addr, 924 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); 925 if (error) 926 goto fail; 927 928 rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; 929 rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED); 930 if (rgd->rd_data > sdp->sd_max_rg_data) 931 sdp->sd_max_rg_data = rgd->rd_data; 932 spin_lock(&sdp->sd_rindex_spin); 933 error = rgd_insert(rgd); 934 spin_unlock(&sdp->sd_rindex_spin); 935 if (!error) { 936 glock_set_object(rgd->rd_gl, rgd); 937 rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK; 938 rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr + 939 rgd->rd_length) * bsize) - 1; 940 return 0; 941 } 942 943 error = 0; /* someone else read in the rgrp; free it and ignore it */ 944 gfs2_glock_put(rgd->rd_gl); 945 946 fail: 947 kfree(rgd->rd_bits); 948 rgd->rd_bits = NULL; 949 kmem_cache_free(gfs2_rgrpd_cachep, rgd); 950 return error; 951 } 952 953 /** 954 * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use 955 * @sdp: the GFS2 superblock 956 * 957 * The purpose of this function is to select a subset of the resource groups 958 * and mark them as PREFERRED. We do it in such a way that each node prefers 959 * to use a unique set of rgrps to minimize glock contention. 960 */ 961 static void set_rgrp_preferences(struct gfs2_sbd *sdp) 962 { 963 struct gfs2_rgrpd *rgd, *first; 964 int i; 965 966 /* Skip an initial number of rgrps, based on this node's journal ID. 967 That should start each node out on its own set. */ 968 rgd = gfs2_rgrpd_get_first(sdp); 969 for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++) 970 rgd = gfs2_rgrpd_get_next(rgd); 971 first = rgd; 972 973 do { 974 rgd->rd_flags |= GFS2_RDF_PREFERRED; 975 for (i = 0; i < sdp->sd_journals; i++) { 976 rgd = gfs2_rgrpd_get_next(rgd); 977 if (!rgd || rgd == first) 978 break; 979 } 980 } while (rgd && rgd != first); 981 } 982 983 /** 984 * gfs2_ri_update - Pull in a new resource index from the disk 985 * @ip: pointer to the rindex inode 986 * 987 * Returns: 0 on successful update, error code otherwise 988 */ 989 990 static int gfs2_ri_update(struct gfs2_inode *ip) 991 { 992 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 993 int error; 994 995 do { 996 error = read_rindex_entry(ip); 997 } while (error == 0); 998 999 if (error < 0) 1000 return error; 1001 1002 set_rgrp_preferences(sdp); 1003 1004 sdp->sd_rindex_uptodate = 1; 1005 return 0; 1006 } 1007 1008 /** 1009 * gfs2_rindex_update - Update the rindex if required 1010 * @sdp: The GFS2 superblock 1011 * 1012 * We grab a lock on the rindex inode to make sure that it doesn't 1013 * change whilst we are performing an operation. We keep this lock 1014 * for quite long periods of time compared to other locks. This 1015 * doesn't matter, since it is shared and it is very, very rarely 1016 * accessed in the exclusive mode (i.e. only when expanding the filesystem). 1017 * 1018 * This makes sure that we're using the latest copy of the resource index 1019 * special file, which might have been updated if someone expanded the 1020 * filesystem (via gfs2_grow utility), which adds new resource groups. 1021 * 1022 * Returns: 0 on succeess, error code otherwise 1023 */ 1024 1025 int gfs2_rindex_update(struct gfs2_sbd *sdp) 1026 { 1027 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); 1028 struct gfs2_glock *gl = ip->i_gl; 1029 struct gfs2_holder ri_gh; 1030 int error = 0; 1031 int unlock_required = 0; 1032 1033 /* Read new copy from disk if we don't have the latest */ 1034 if (!sdp->sd_rindex_uptodate) { 1035 if (!gfs2_glock_is_locked_by_me(gl)) { 1036 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh); 1037 if (error) 1038 return error; 1039 unlock_required = 1; 1040 } 1041 if (!sdp->sd_rindex_uptodate) 1042 error = gfs2_ri_update(ip); 1043 if (unlock_required) 1044 gfs2_glock_dq_uninit(&ri_gh); 1045 } 1046 1047 return error; 1048 } 1049 1050 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) 1051 { 1052 const struct gfs2_rgrp *str = buf; 1053 u32 rg_flags; 1054 1055 rg_flags = be32_to_cpu(str->rg_flags); 1056 rg_flags &= ~GFS2_RDF_MASK; 1057 rgd->rd_flags &= GFS2_RDF_MASK; 1058 rgd->rd_flags |= rg_flags; 1059 rgd->rd_free = be32_to_cpu(str->rg_free); 1060 rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); 1061 rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); 1062 /* rd_data0, rd_data and rd_bitbytes already set from rindex */ 1063 } 1064 1065 static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) 1066 { 1067 const struct gfs2_rgrp *str = buf; 1068 1069 rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); 1070 rgl->rl_flags = str->rg_flags; 1071 rgl->rl_free = str->rg_free; 1072 rgl->rl_dinodes = str->rg_dinodes; 1073 rgl->rl_igeneration = str->rg_igeneration; 1074 rgl->__pad = 0UL; 1075 } 1076 1077 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) 1078 { 1079 struct gfs2_rgrpd *next = gfs2_rgrpd_get_next(rgd); 1080 struct gfs2_rgrp *str = buf; 1081 u32 crc; 1082 1083 str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK); 1084 str->rg_free = cpu_to_be32(rgd->rd_free); 1085 str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes); 1086 if (next == NULL) 1087 str->rg_skip = 0; 1088 else if (next->rd_addr > rgd->rd_addr) 1089 str->rg_skip = cpu_to_be32(next->rd_addr - rgd->rd_addr); 1090 str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration); 1091 str->rg_data0 = cpu_to_be64(rgd->rd_data0); 1092 str->rg_data = cpu_to_be32(rgd->rd_data); 1093 str->rg_bitbytes = cpu_to_be32(rgd->rd_bitbytes); 1094 str->rg_crc = 0; 1095 crc = gfs2_disk_hash(buf, sizeof(struct gfs2_rgrp)); 1096 str->rg_crc = cpu_to_be32(crc); 1097 1098 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); 1099 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, buf); 1100 } 1101 1102 static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) 1103 { 1104 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; 1105 struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data; 1106 1107 if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free || 1108 rgl->rl_dinodes != str->rg_dinodes || 1109 rgl->rl_igeneration != str->rg_igeneration) 1110 return 0; 1111 return 1; 1112 } 1113 1114 static u32 count_unlinked(struct gfs2_rgrpd *rgd) 1115 { 1116 struct gfs2_bitmap *bi; 1117 const u32 length = rgd->rd_length; 1118 const u8 *buffer = NULL; 1119 u32 i, goal, count = 0; 1120 1121 for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) { 1122 goal = 0; 1123 buffer = bi->bi_bh->b_data + bi->bi_offset; 1124 WARN_ON(!buffer_uptodate(bi->bi_bh)); 1125 while (goal < bi->bi_len * GFS2_NBBY) { 1126 goal = gfs2_bitfit(buffer, bi->bi_len, goal, 1127 GFS2_BLKST_UNLINKED); 1128 if (goal == BFITNOENT) 1129 break; 1130 count++; 1131 goal++; 1132 } 1133 } 1134 1135 return count; 1136 } 1137 1138 1139 /** 1140 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps 1141 * @rgd: the struct gfs2_rgrpd describing the RG to read in 1142 * 1143 * Read in all of a Resource Group's header and bitmap blocks. 1144 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. 1145 * 1146 * Returns: errno 1147 */ 1148 1149 static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) 1150 { 1151 struct gfs2_sbd *sdp = rgd->rd_sbd; 1152 struct gfs2_glock *gl = rgd->rd_gl; 1153 unsigned int length = rgd->rd_length; 1154 struct gfs2_bitmap *bi; 1155 unsigned int x, y; 1156 int error; 1157 1158 if (rgd->rd_bits[0].bi_bh != NULL) 1159 return 0; 1160 1161 for (x = 0; x < length; x++) { 1162 bi = rgd->rd_bits + x; 1163 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, 0, &bi->bi_bh); 1164 if (error) 1165 goto fail; 1166 } 1167 1168 for (y = length; y--;) { 1169 bi = rgd->rd_bits + y; 1170 error = gfs2_meta_wait(sdp, bi->bi_bh); 1171 if (error) 1172 goto fail; 1173 if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB : 1174 GFS2_METATYPE_RG)) { 1175 error = -EIO; 1176 goto fail; 1177 } 1178 } 1179 1180 if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { 1181 for (x = 0; x < length; x++) 1182 clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); 1183 gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); 1184 rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); 1185 rgd->rd_free_clone = rgd->rd_free; 1186 /* max out the rgrp allocation failure point */ 1187 rgd->rd_extfail_pt = rgd->rd_free; 1188 } 1189 if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { 1190 rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); 1191 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, 1192 rgd->rd_bits[0].bi_bh->b_data); 1193 } 1194 else if (sdp->sd_args.ar_rgrplvb) { 1195 if (!gfs2_rgrp_lvb_valid(rgd)){ 1196 gfs2_consist_rgrpd(rgd); 1197 error = -EIO; 1198 goto fail; 1199 } 1200 if (rgd->rd_rgl->rl_unlinked == 0) 1201 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1202 } 1203 return 0; 1204 1205 fail: 1206 while (x--) { 1207 bi = rgd->rd_bits + x; 1208 brelse(bi->bi_bh); 1209 bi->bi_bh = NULL; 1210 gfs2_assert_warn(sdp, !bi->bi_clone); 1211 } 1212 1213 return error; 1214 } 1215 1216 static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) 1217 { 1218 u32 rl_flags; 1219 1220 if (rgd->rd_flags & GFS2_RDF_UPTODATE) 1221 return 0; 1222 1223 if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) 1224 return gfs2_rgrp_bh_get(rgd); 1225 1226 rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); 1227 rl_flags &= ~GFS2_RDF_MASK; 1228 rgd->rd_flags &= GFS2_RDF_MASK; 1229 rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); 1230 if (rgd->rd_rgl->rl_unlinked == 0) 1231 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1232 rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); 1233 rgd->rd_free_clone = rgd->rd_free; 1234 rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); 1235 rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration); 1236 return 0; 1237 } 1238 1239 int gfs2_rgrp_go_lock(struct gfs2_holder *gh) 1240 { 1241 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; 1242 struct gfs2_sbd *sdp = rgd->rd_sbd; 1243 1244 if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) 1245 return 0; 1246 return gfs2_rgrp_bh_get(rgd); 1247 } 1248 1249 /** 1250 * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get() 1251 * @rgd: The resource group 1252 * 1253 */ 1254 1255 void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd) 1256 { 1257 int x, length = rgd->rd_length; 1258 1259 for (x = 0; x < length; x++) { 1260 struct gfs2_bitmap *bi = rgd->rd_bits + x; 1261 if (bi->bi_bh) { 1262 brelse(bi->bi_bh); 1263 bi->bi_bh = NULL; 1264 } 1265 } 1266 1267 } 1268 1269 /** 1270 * gfs2_rgrp_go_unlock - Unlock a rgrp glock 1271 * @gh: The glock holder for the resource group 1272 * 1273 */ 1274 1275 void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) 1276 { 1277 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; 1278 int demote_requested = test_bit(GLF_DEMOTE, &gh->gh_gl->gl_flags) | 1279 test_bit(GLF_PENDING_DEMOTE, &gh->gh_gl->gl_flags); 1280 1281 if (rgd && demote_requested) 1282 gfs2_rgrp_brelse(rgd); 1283 } 1284 1285 int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 1286 struct buffer_head *bh, 1287 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed) 1288 { 1289 struct super_block *sb = sdp->sd_vfs; 1290 u64 blk; 1291 sector_t start = 0; 1292 sector_t nr_blks = 0; 1293 int rv; 1294 unsigned int x; 1295 u32 trimmed = 0; 1296 u8 diff; 1297 1298 for (x = 0; x < bi->bi_len; x++) { 1299 const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data; 1300 clone += bi->bi_offset; 1301 clone += x; 1302 if (bh) { 1303 const u8 *orig = bh->b_data + bi->bi_offset + x; 1304 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); 1305 } else { 1306 diff = ~(*clone | (*clone >> 1)); 1307 } 1308 diff &= 0x55; 1309 if (diff == 0) 1310 continue; 1311 blk = offset + ((bi->bi_start + x) * GFS2_NBBY); 1312 while(diff) { 1313 if (diff & 1) { 1314 if (nr_blks == 0) 1315 goto start_new_extent; 1316 if ((start + nr_blks) != blk) { 1317 if (nr_blks >= minlen) { 1318 rv = sb_issue_discard(sb, 1319 start, nr_blks, 1320 GFP_NOFS, 0); 1321 if (rv) 1322 goto fail; 1323 trimmed += nr_blks; 1324 } 1325 nr_blks = 0; 1326 start_new_extent: 1327 start = blk; 1328 } 1329 nr_blks++; 1330 } 1331 diff >>= 2; 1332 blk++; 1333 } 1334 } 1335 if (nr_blks >= minlen) { 1336 rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0); 1337 if (rv) 1338 goto fail; 1339 trimmed += nr_blks; 1340 } 1341 if (ptrimmed) 1342 *ptrimmed = trimmed; 1343 return 0; 1344 1345 fail: 1346 if (sdp->sd_args.ar_discard) 1347 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem\n", rv); 1348 sdp->sd_args.ar_discard = 0; 1349 return -EIO; 1350 } 1351 1352 /** 1353 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem 1354 * @filp: Any file on the filesystem 1355 * @argp: Pointer to the arguments (also used to pass result) 1356 * 1357 * Returns: 0 on success, otherwise error code 1358 */ 1359 1360 int gfs2_fitrim(struct file *filp, void __user *argp) 1361 { 1362 struct inode *inode = file_inode(filp); 1363 struct gfs2_sbd *sdp = GFS2_SB(inode); 1364 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev); 1365 struct buffer_head *bh; 1366 struct gfs2_rgrpd *rgd; 1367 struct gfs2_rgrpd *rgd_end; 1368 struct gfs2_holder gh; 1369 struct fstrim_range r; 1370 int ret = 0; 1371 u64 amt; 1372 u64 trimmed = 0; 1373 u64 start, end, minlen; 1374 unsigned int x; 1375 unsigned bs_shift = sdp->sd_sb.sb_bsize_shift; 1376 1377 if (!capable(CAP_SYS_ADMIN)) 1378 return -EPERM; 1379 1380 if (!blk_queue_discard(q)) 1381 return -EOPNOTSUPP; 1382 1383 if (copy_from_user(&r, argp, sizeof(r))) 1384 return -EFAULT; 1385 1386 ret = gfs2_rindex_update(sdp); 1387 if (ret) 1388 return ret; 1389 1390 start = r.start >> bs_shift; 1391 end = start + (r.len >> bs_shift); 1392 minlen = max_t(u64, r.minlen, 1393 q->limits.discard_granularity) >> bs_shift; 1394 1395 if (end <= start || minlen > sdp->sd_max_rg_data) 1396 return -EINVAL; 1397 1398 rgd = gfs2_blk2rgrpd(sdp, start, 0); 1399 rgd_end = gfs2_blk2rgrpd(sdp, end, 0); 1400 1401 if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end)) 1402 && (start > rgd_end->rd_data0 + rgd_end->rd_data)) 1403 return -EINVAL; /* start is beyond the end of the fs */ 1404 1405 while (1) { 1406 1407 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); 1408 if (ret) 1409 goto out; 1410 1411 if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) { 1412 /* Trim each bitmap in the rgrp */ 1413 for (x = 0; x < rgd->rd_length; x++) { 1414 struct gfs2_bitmap *bi = rgd->rd_bits + x; 1415 ret = gfs2_rgrp_send_discards(sdp, 1416 rgd->rd_data0, NULL, bi, minlen, 1417 &amt); 1418 if (ret) { 1419 gfs2_glock_dq_uninit(&gh); 1420 goto out; 1421 } 1422 trimmed += amt; 1423 } 1424 1425 /* Mark rgrp as having been trimmed */ 1426 ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0); 1427 if (ret == 0) { 1428 bh = rgd->rd_bits[0].bi_bh; 1429 rgd->rd_flags |= GFS2_RGF_TRIMMED; 1430 gfs2_trans_add_meta(rgd->rd_gl, bh); 1431 gfs2_rgrp_out(rgd, bh->b_data); 1432 gfs2_trans_end(sdp); 1433 } 1434 } 1435 gfs2_glock_dq_uninit(&gh); 1436 1437 if (rgd == rgd_end) 1438 break; 1439 1440 rgd = gfs2_rgrpd_get_next(rgd); 1441 } 1442 1443 out: 1444 r.len = trimmed << bs_shift; 1445 if (copy_to_user(argp, &r, sizeof(r))) 1446 return -EFAULT; 1447 1448 return ret; 1449 } 1450 1451 /** 1452 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree 1453 * @ip: the inode structure 1454 * 1455 */ 1456 static void rs_insert(struct gfs2_inode *ip) 1457 { 1458 struct rb_node **newn, *parent = NULL; 1459 int rc; 1460 struct gfs2_blkreserv *rs = &ip->i_res; 1461 struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; 1462 u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); 1463 1464 BUG_ON(gfs2_rs_active(rs)); 1465 1466 spin_lock(&rgd->rd_rsspin); 1467 newn = &rgd->rd_rstree.rb_node; 1468 while (*newn) { 1469 struct gfs2_blkreserv *cur = 1470 rb_entry(*newn, struct gfs2_blkreserv, rs_node); 1471 1472 parent = *newn; 1473 rc = rs_cmp(fsblock, rs->rs_free, cur); 1474 if (rc > 0) 1475 newn = &((*newn)->rb_right); 1476 else if (rc < 0) 1477 newn = &((*newn)->rb_left); 1478 else { 1479 spin_unlock(&rgd->rd_rsspin); 1480 WARN_ON(1); 1481 return; 1482 } 1483 } 1484 1485 rb_link_node(&rs->rs_node, parent, newn); 1486 rb_insert_color(&rs->rs_node, &rgd->rd_rstree); 1487 1488 /* Do our rgrp accounting for the reservation */ 1489 rgd->rd_reserved += rs->rs_free; /* blocks reserved */ 1490 spin_unlock(&rgd->rd_rsspin); 1491 trace_gfs2_rs(rs, TRACE_RS_INSERT); 1492 } 1493 1494 /** 1495 * rgd_free - return the number of free blocks we can allocate. 1496 * @rgd: the resource group 1497 * 1498 * This function returns the number of free blocks for an rgrp. 1499 * That's the clone-free blocks (blocks that are free, not including those 1500 * still being used for unlinked files that haven't been deleted.) 1501 * 1502 * It also subtracts any blocks reserved by someone else, but does not 1503 * include free blocks that are still part of our current reservation, 1504 * because obviously we can (and will) allocate them. 1505 */ 1506 static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs) 1507 { 1508 u32 tot_reserved, tot_free; 1509 1510 if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free)) 1511 return 0; 1512 tot_reserved = rgd->rd_reserved - rs->rs_free; 1513 1514 if (rgd->rd_free_clone < tot_reserved) 1515 tot_reserved = 0; 1516 1517 tot_free = rgd->rd_free_clone - tot_reserved; 1518 1519 return tot_free; 1520 } 1521 1522 /** 1523 * rg_mblk_search - find a group of multiple free blocks to form a reservation 1524 * @rgd: the resource group descriptor 1525 * @ip: pointer to the inode for which we're reserving blocks 1526 * @ap: the allocation parameters 1527 * 1528 */ 1529 1530 static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, 1531 const struct gfs2_alloc_parms *ap) 1532 { 1533 struct gfs2_rbm rbm = { .rgd = rgd, }; 1534 u64 goal; 1535 struct gfs2_blkreserv *rs = &ip->i_res; 1536 u32 extlen; 1537 u32 free_blocks = rgd_free(rgd, rs); 1538 int ret; 1539 struct inode *inode = &ip->i_inode; 1540 1541 if (S_ISDIR(inode->i_mode)) 1542 extlen = 1; 1543 else { 1544 extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target); 1545 extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); 1546 } 1547 if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) 1548 return; 1549 1550 /* Find bitmap block that contains bits for goal block */ 1551 if (rgrp_contains_block(rgd, ip->i_goal)) 1552 goal = ip->i_goal; 1553 else 1554 goal = rgd->rd_last_alloc + rgd->rd_data0; 1555 1556 if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) 1557 return; 1558 1559 ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true); 1560 if (ret == 0) { 1561 rs->rs_rbm = rbm; 1562 rs->rs_free = extlen; 1563 rs_insert(ip); 1564 } else { 1565 if (goal == rgd->rd_last_alloc + rgd->rd_data0) 1566 rgd->rd_last_alloc = 0; 1567 } 1568 } 1569 1570 /** 1571 * gfs2_next_unreserved_block - Return next block that is not reserved 1572 * @rgd: The resource group 1573 * @block: The starting block 1574 * @length: The required length 1575 * @ip: Ignore any reservations for this inode 1576 * 1577 * If the block does not appear in any reservation, then return the 1578 * block number unchanged. If it does appear in the reservation, then 1579 * keep looking through the tree of reservations in order to find the 1580 * first block number which is not reserved. 1581 */ 1582 1583 static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, 1584 u32 length, 1585 const struct gfs2_inode *ip) 1586 { 1587 struct gfs2_blkreserv *rs; 1588 struct rb_node *n; 1589 int rc; 1590 1591 spin_lock(&rgd->rd_rsspin); 1592 n = rgd->rd_rstree.rb_node; 1593 while (n) { 1594 rs = rb_entry(n, struct gfs2_blkreserv, rs_node); 1595 rc = rs_cmp(block, length, rs); 1596 if (rc < 0) 1597 n = n->rb_left; 1598 else if (rc > 0) 1599 n = n->rb_right; 1600 else 1601 break; 1602 } 1603 1604 if (n) { 1605 while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) { 1606 block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; 1607 n = n->rb_right; 1608 if (n == NULL) 1609 break; 1610 rs = rb_entry(n, struct gfs2_blkreserv, rs_node); 1611 } 1612 } 1613 1614 spin_unlock(&rgd->rd_rsspin); 1615 return block; 1616 } 1617 1618 /** 1619 * gfs2_reservation_check_and_update - Check for reservations during block alloc 1620 * @rbm: The current position in the resource group 1621 * @ip: The inode for which we are searching for blocks 1622 * @minext: The minimum extent length 1623 * @maxext: A pointer to the maximum extent structure 1624 * 1625 * This checks the current position in the rgrp to see whether there is 1626 * a reservation covering this block. If not then this function is a 1627 * no-op. If there is, then the position is moved to the end of the 1628 * contiguous reservation(s) so that we are pointing at the first 1629 * non-reserved block. 1630 * 1631 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error 1632 */ 1633 1634 static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, 1635 const struct gfs2_inode *ip, 1636 u32 minext, 1637 struct gfs2_extent *maxext) 1638 { 1639 u64 block = gfs2_rbm_to_block(rbm); 1640 u32 extlen = 1; 1641 u64 nblock; 1642 int ret; 1643 1644 /* 1645 * If we have a minimum extent length, then skip over any extent 1646 * which is less than the min extent length in size. 1647 */ 1648 if (minext) { 1649 extlen = gfs2_free_extlen(rbm, minext); 1650 if (extlen <= maxext->len) 1651 goto fail; 1652 } 1653 1654 /* 1655 * Check the extent which has been found against the reservations 1656 * and skip if parts of it are already reserved 1657 */ 1658 nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); 1659 if (nblock == block) { 1660 if (!minext || extlen >= minext) 1661 return 0; 1662 1663 if (extlen > maxext->len) { 1664 maxext->len = extlen; 1665 maxext->rbm = *rbm; 1666 } 1667 fail: 1668 nblock = block + extlen; 1669 } 1670 ret = gfs2_rbm_from_block(rbm, nblock); 1671 if (ret < 0) 1672 return ret; 1673 return 1; 1674 } 1675 1676 /** 1677 * gfs2_rbm_find - Look for blocks of a particular state 1678 * @rbm: Value/result starting position and final position 1679 * @state: The state which we want to find 1680 * @minext: Pointer to the requested extent length (NULL for a single block) 1681 * This is updated to be the actual reservation size. 1682 * @ip: If set, check for reservations 1683 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping 1684 * around until we've reached the starting point. 1685 * 1686 * Side effects: 1687 * - If looking for free blocks, we set GBF_FULL on each bitmap which 1688 * has no free blocks in it. 1689 * - If looking for free blocks, we set rd_extfail_pt on each rgrp which 1690 * has come up short on a free block search. 1691 * 1692 * Returns: 0 on success, -ENOSPC if there is no block of the requested state 1693 */ 1694 1695 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, 1696 const struct gfs2_inode *ip, bool nowrap) 1697 { 1698 struct buffer_head *bh; 1699 int initial_bii; 1700 u32 initial_offset; 1701 int first_bii = rbm->bii; 1702 u32 first_offset = rbm->offset; 1703 u32 offset; 1704 u8 *buffer; 1705 int n = 0; 1706 int iters = rbm->rgd->rd_length; 1707 int ret; 1708 struct gfs2_bitmap *bi; 1709 struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, }; 1710 1711 /* If we are not starting at the beginning of a bitmap, then we 1712 * need to add one to the bitmap count to ensure that we search 1713 * the starting bitmap twice. 1714 */ 1715 if (rbm->offset != 0) 1716 iters++; 1717 1718 while(1) { 1719 bi = rbm_bi(rbm); 1720 if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) && 1721 test_bit(GBF_FULL, &bi->bi_flags) && 1722 (state == GFS2_BLKST_FREE)) 1723 goto next_bitmap; 1724 1725 bh = bi->bi_bh; 1726 buffer = bh->b_data + bi->bi_offset; 1727 WARN_ON(!buffer_uptodate(bh)); 1728 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) 1729 buffer = bi->bi_clone + bi->bi_offset; 1730 initial_offset = rbm->offset; 1731 offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state); 1732 if (offset == BFITNOENT) 1733 goto bitmap_full; 1734 rbm->offset = offset; 1735 if (ip == NULL) 1736 return 0; 1737 1738 initial_bii = rbm->bii; 1739 ret = gfs2_reservation_check_and_update(rbm, ip, 1740 minext ? *minext : 0, 1741 &maxext); 1742 if (ret == 0) 1743 return 0; 1744 if (ret > 0) { 1745 n += (rbm->bii - initial_bii); 1746 goto next_iter; 1747 } 1748 if (ret == -E2BIG) { 1749 rbm->bii = 0; 1750 rbm->offset = 0; 1751 n += (rbm->bii - initial_bii); 1752 goto res_covered_end_of_rgrp; 1753 } 1754 return ret; 1755 1756 bitmap_full: /* Mark bitmap as full and fall through */ 1757 if ((state == GFS2_BLKST_FREE) && initial_offset == 0) 1758 set_bit(GBF_FULL, &bi->bi_flags); 1759 1760 next_bitmap: /* Find next bitmap in the rgrp */ 1761 rbm->offset = 0; 1762 rbm->bii++; 1763 if (rbm->bii == rbm->rgd->rd_length) 1764 rbm->bii = 0; 1765 res_covered_end_of_rgrp: 1766 if ((rbm->bii == 0) && nowrap) 1767 break; 1768 n++; 1769 next_iter: 1770 if (n >= iters) 1771 break; 1772 } 1773 1774 if (minext == NULL || state != GFS2_BLKST_FREE) 1775 return -ENOSPC; 1776 1777 /* If the extent was too small, and it's smaller than the smallest 1778 to have failed before, remember for future reference that it's 1779 useless to search this rgrp again for this amount or more. */ 1780 if ((first_offset == 0) && (first_bii == 0) && 1781 (*minext < rbm->rgd->rd_extfail_pt)) 1782 rbm->rgd->rd_extfail_pt = *minext; 1783 1784 /* If the maximum extent we found is big enough to fulfill the 1785 minimum requirements, use it anyway. */ 1786 if (maxext.len) { 1787 *rbm = maxext.rbm; 1788 *minext = maxext.len; 1789 return 0; 1790 } 1791 1792 return -ENOSPC; 1793 } 1794 1795 /** 1796 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes 1797 * @rgd: The rgrp 1798 * @last_unlinked: block address of the last dinode we unlinked 1799 * @skip: block address we should explicitly not unlink 1800 * 1801 * Returns: 0 if no error 1802 * The inode, if one has been found, in inode. 1803 */ 1804 1805 static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) 1806 { 1807 u64 block; 1808 struct gfs2_sbd *sdp = rgd->rd_sbd; 1809 struct gfs2_glock *gl; 1810 struct gfs2_inode *ip; 1811 int error; 1812 int found = 0; 1813 struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 }; 1814 1815 while (1) { 1816 down_write(&sdp->sd_log_flush_lock); 1817 error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL, 1818 true); 1819 up_write(&sdp->sd_log_flush_lock); 1820 if (error == -ENOSPC) 1821 break; 1822 if (WARN_ON_ONCE(error)) 1823 break; 1824 1825 block = gfs2_rbm_to_block(&rbm); 1826 if (gfs2_rbm_from_block(&rbm, block + 1)) 1827 break; 1828 if (*last_unlinked != NO_BLOCK && block <= *last_unlinked) 1829 continue; 1830 if (block == skip) 1831 continue; 1832 *last_unlinked = block; 1833 1834 error = gfs2_glock_get(sdp, block, &gfs2_iopen_glops, CREATE, &gl); 1835 if (error) 1836 continue; 1837 1838 /* If the inode is already in cache, we can ignore it here 1839 * because the existing inode disposal code will deal with 1840 * it when all refs have gone away. Accessing gl_object like 1841 * this is not safe in general. Here it is ok because we do 1842 * not dereference the pointer, and we only need an approx 1843 * answer to whether it is NULL or not. 1844 */ 1845 ip = gl->gl_object; 1846 1847 if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) 1848 gfs2_glock_put(gl); 1849 else 1850 found++; 1851 1852 /* Limit reclaim to sensible number of tasks */ 1853 if (found > NR_CPUS) 1854 return; 1855 } 1856 1857 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1858 return; 1859 } 1860 1861 /** 1862 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested 1863 * @rgd: The rgrp in question 1864 * @loops: An indication of how picky we can be (0=very, 1=less so) 1865 * 1866 * This function uses the recently added glock statistics in order to 1867 * figure out whether a parciular resource group is suffering from 1868 * contention from multiple nodes. This is done purely on the basis 1869 * of timings, since this is the only data we have to work with and 1870 * our aim here is to reject a resource group which is highly contended 1871 * but (very important) not to do this too often in order to ensure that 1872 * we do not land up introducing fragmentation by changing resource 1873 * groups when not actually required. 1874 * 1875 * The calculation is fairly simple, we want to know whether the SRTTB 1876 * (i.e. smoothed round trip time for blocking operations) to acquire 1877 * the lock for this rgrp's glock is significantly greater than the 1878 * time taken for resource groups on average. We introduce a margin in 1879 * the form of the variable @var which is computed as the sum of the two 1880 * respective variences, and multiplied by a factor depending on @loops 1881 * and whether we have a lot of data to base the decision on. This is 1882 * then tested against the square difference of the means in order to 1883 * decide whether the result is statistically significant or not. 1884 * 1885 * Returns: A boolean verdict on the congestion status 1886 */ 1887 1888 static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) 1889 { 1890 const struct gfs2_glock *gl = rgd->rd_gl; 1891 const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1892 struct gfs2_lkstats *st; 1893 u64 r_dcount, l_dcount; 1894 u64 l_srttb, a_srttb = 0; 1895 s64 srttb_diff; 1896 u64 sqr_diff; 1897 u64 var; 1898 int cpu, nonzero = 0; 1899 1900 preempt_disable(); 1901 for_each_present_cpu(cpu) { 1902 st = &per_cpu_ptr(sdp->sd_lkstats, cpu)->lkstats[LM_TYPE_RGRP]; 1903 if (st->stats[GFS2_LKS_SRTTB]) { 1904 a_srttb += st->stats[GFS2_LKS_SRTTB]; 1905 nonzero++; 1906 } 1907 } 1908 st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP]; 1909 if (nonzero) 1910 do_div(a_srttb, nonzero); 1911 r_dcount = st->stats[GFS2_LKS_DCOUNT]; 1912 var = st->stats[GFS2_LKS_SRTTVARB] + 1913 gl->gl_stats.stats[GFS2_LKS_SRTTVARB]; 1914 preempt_enable(); 1915 1916 l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB]; 1917 l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT]; 1918 1919 if ((l_dcount < 1) || (r_dcount < 1) || (a_srttb == 0)) 1920 return false; 1921 1922 srttb_diff = a_srttb - l_srttb; 1923 sqr_diff = srttb_diff * srttb_diff; 1924 1925 var *= 2; 1926 if (l_dcount < 8 || r_dcount < 8) 1927 var *= 2; 1928 if (loops == 1) 1929 var *= 2; 1930 1931 return ((srttb_diff < 0) && (sqr_diff > var)); 1932 } 1933 1934 /** 1935 * gfs2_rgrp_used_recently 1936 * @rs: The block reservation with the rgrp to test 1937 * @msecs: The time limit in milliseconds 1938 * 1939 * Returns: True if the rgrp glock has been used within the time limit 1940 */ 1941 static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, 1942 u64 msecs) 1943 { 1944 u64 tdiff; 1945 1946 tdiff = ktime_to_ns(ktime_sub(ktime_get_real(), 1947 rs->rs_rbm.rgd->rd_gl->gl_dstamp)); 1948 1949 return tdiff > (msecs * 1000 * 1000); 1950 } 1951 1952 static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) 1953 { 1954 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1955 u32 skip; 1956 1957 get_random_bytes(&skip, sizeof(skip)); 1958 return skip % sdp->sd_rgrps; 1959 } 1960 1961 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) 1962 { 1963 struct gfs2_rgrpd *rgd = *pos; 1964 struct gfs2_sbd *sdp = rgd->rd_sbd; 1965 1966 rgd = gfs2_rgrpd_get_next(rgd); 1967 if (rgd == NULL) 1968 rgd = gfs2_rgrpd_get_first(sdp); 1969 *pos = rgd; 1970 if (rgd != begin) /* If we didn't wrap */ 1971 return true; 1972 return false; 1973 } 1974 1975 /** 1976 * fast_to_acquire - determine if a resource group will be fast to acquire 1977 * 1978 * If this is one of our preferred rgrps, it should be quicker to acquire, 1979 * because we tried to set ourselves up as dlm lock master. 1980 */ 1981 static inline int fast_to_acquire(struct gfs2_rgrpd *rgd) 1982 { 1983 struct gfs2_glock *gl = rgd->rd_gl; 1984 1985 if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) && 1986 !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && 1987 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1988 return 1; 1989 if (rgd->rd_flags & GFS2_RDF_PREFERRED) 1990 return 1; 1991 return 0; 1992 } 1993 1994 /** 1995 * gfs2_inplace_reserve - Reserve space in the filesystem 1996 * @ip: the inode to reserve space for 1997 * @ap: the allocation parameters 1998 * 1999 * We try our best to find an rgrp that has at least ap->target blocks 2000 * available. After a couple of passes (loops == 2), the prospects of finding 2001 * such an rgrp diminish. At this stage, we return the first rgrp that has 2002 * atleast ap->min_target blocks available. Either way, we set ap->allowed to 2003 * the number of blocks available in the chosen rgrp. 2004 * 2005 * Returns: 0 on success, 2006 * -ENOMEM if a suitable rgrp can't be found 2007 * errno otherwise 2008 */ 2009 2010 int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) 2011 { 2012 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2013 struct gfs2_rgrpd *begin = NULL; 2014 struct gfs2_blkreserv *rs = &ip->i_res; 2015 int error = 0, rg_locked, flags = 0; 2016 u64 last_unlinked = NO_BLOCK; 2017 int loops = 0; 2018 u32 free_blocks, skip = 0; 2019 2020 if (sdp->sd_args.ar_rgrplvb) 2021 flags |= GL_SKIP; 2022 if (gfs2_assert_warn(sdp, ap->target)) 2023 return -EINVAL; 2024 if (gfs2_rs_active(rs)) { 2025 begin = rs->rs_rbm.rgd; 2026 } else if (rs->rs_rbm.rgd && 2027 rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) { 2028 begin = rs->rs_rbm.rgd; 2029 } else { 2030 check_and_update_goal(ip); 2031 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); 2032 } 2033 if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV)) 2034 skip = gfs2_orlov_skip(ip); 2035 if (rs->rs_rbm.rgd == NULL) 2036 return -EBADSLT; 2037 2038 while (loops < 3) { 2039 rg_locked = 1; 2040 2041 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { 2042 rg_locked = 0; 2043 if (skip && skip--) 2044 goto next_rgrp; 2045 if (!gfs2_rs_active(rs)) { 2046 if (loops == 0 && 2047 !fast_to_acquire(rs->rs_rbm.rgd)) 2048 goto next_rgrp; 2049 if ((loops < 2) && 2050 gfs2_rgrp_used_recently(rs, 1000) && 2051 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) 2052 goto next_rgrp; 2053 } 2054 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, 2055 LM_ST_EXCLUSIVE, flags, 2056 &rs->rs_rgd_gh); 2057 if (unlikely(error)) 2058 return error; 2059 if (!gfs2_rs_active(rs) && (loops < 2) && 2060 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) 2061 goto skip_rgrp; 2062 if (sdp->sd_args.ar_rgrplvb) { 2063 error = update_rgrp_lvb(rs->rs_rbm.rgd); 2064 if (unlikely(error)) { 2065 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 2066 return error; 2067 } 2068 } 2069 } 2070 2071 /* Skip unuseable resource groups */ 2072 if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | 2073 GFS2_RDF_ERROR)) || 2074 (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) 2075 goto skip_rgrp; 2076 2077 if (sdp->sd_args.ar_rgrplvb) 2078 gfs2_rgrp_bh_get(rs->rs_rbm.rgd); 2079 2080 /* Get a reservation if we don't already have one */ 2081 if (!gfs2_rs_active(rs)) 2082 rg_mblk_search(rs->rs_rbm.rgd, ip, ap); 2083 2084 /* Skip rgrps when we can't get a reservation on first pass */ 2085 if (!gfs2_rs_active(rs) && (loops < 1)) 2086 goto check_rgrp; 2087 2088 /* If rgrp has enough free space, use it */ 2089 free_blocks = rgd_free(rs->rs_rbm.rgd, rs); 2090 if (free_blocks >= ap->target || 2091 (loops == 2 && ap->min_target && 2092 free_blocks >= ap->min_target)) { 2093 ap->allowed = free_blocks; 2094 return 0; 2095 } 2096 check_rgrp: 2097 /* Check for unlinked inodes which can be reclaimed */ 2098 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) 2099 try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, 2100 ip->i_no_addr); 2101 skip_rgrp: 2102 /* Drop reservation, if we couldn't use reserved rgrp */ 2103 if (gfs2_rs_active(rs)) 2104 gfs2_rs_deltree(rs); 2105 2106 /* Unlock rgrp if required */ 2107 if (!rg_locked) 2108 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 2109 next_rgrp: 2110 /* Find the next rgrp, and continue looking */ 2111 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) 2112 continue; 2113 if (skip) 2114 continue; 2115 2116 /* If we've scanned all the rgrps, but found no free blocks 2117 * then this checks for some less likely conditions before 2118 * trying again. 2119 */ 2120 loops++; 2121 /* Check that fs hasn't grown if writing to rindex */ 2122 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { 2123 error = gfs2_ri_update(ip); 2124 if (error) 2125 return error; 2126 } 2127 /* Flushing the log may release space */ 2128 if (loops == 2) 2129 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | 2130 GFS2_LFC_INPLACE_RESERVE); 2131 } 2132 2133 return -ENOSPC; 2134 } 2135 2136 /** 2137 * gfs2_inplace_release - release an inplace reservation 2138 * @ip: the inode the reservation was taken out on 2139 * 2140 * Release a reservation made by gfs2_inplace_reserve(). 2141 */ 2142 2143 void gfs2_inplace_release(struct gfs2_inode *ip) 2144 { 2145 struct gfs2_blkreserv *rs = &ip->i_res; 2146 2147 if (gfs2_holder_initialized(&rs->rs_rgd_gh)) 2148 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 2149 } 2150 2151 /** 2152 * gfs2_alloc_extent - allocate an extent from a given bitmap 2153 * @rbm: the resource group information 2154 * @dinode: TRUE if the first block we allocate is for a dinode 2155 * @n: The extent length (value/result) 2156 * 2157 * Add the bitmap buffer to the transaction. 2158 * Set the found bits to @new_state to change block's allocation state. 2159 */ 2160 static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, 2161 unsigned int *n) 2162 { 2163 struct gfs2_rbm pos = { .rgd = rbm->rgd, }; 2164 const unsigned int elen = *n; 2165 u64 block; 2166 int ret; 2167 2168 *n = 1; 2169 block = gfs2_rbm_to_block(rbm); 2170 gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh); 2171 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 2172 block++; 2173 while (*n < elen) { 2174 ret = gfs2_rbm_from_block(&pos, block); 2175 if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE) 2176 break; 2177 gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh); 2178 gfs2_setbit(&pos, true, GFS2_BLKST_USED); 2179 (*n)++; 2180 block++; 2181 } 2182 } 2183 2184 /** 2185 * rgblk_free - Change alloc state of given block(s) 2186 * @sdp: the filesystem 2187 * @bstart: the start of a run of blocks to free 2188 * @blen: the length of the block run (all must lie within ONE RG!) 2189 * @new_state: GFS2_BLKST_XXX the after-allocation block state 2190 * 2191 * Returns: Resource group containing the block(s) 2192 */ 2193 2194 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, 2195 u32 blen, unsigned char new_state) 2196 { 2197 struct gfs2_rbm rbm; 2198 struct gfs2_bitmap *bi, *bi_prev = NULL; 2199 2200 rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); 2201 if (!rbm.rgd) { 2202 if (gfs2_consist(sdp)) 2203 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); 2204 return NULL; 2205 } 2206 2207 gfs2_rbm_from_block(&rbm, bstart); 2208 while (blen--) { 2209 bi = rbm_bi(&rbm); 2210 if (bi != bi_prev) { 2211 if (!bi->bi_clone) { 2212 bi->bi_clone = kmalloc(bi->bi_bh->b_size, 2213 GFP_NOFS | __GFP_NOFAIL); 2214 memcpy(bi->bi_clone + bi->bi_offset, 2215 bi->bi_bh->b_data + bi->bi_offset, 2216 bi->bi_len); 2217 } 2218 gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh); 2219 bi_prev = bi; 2220 } 2221 gfs2_setbit(&rbm, false, new_state); 2222 gfs2_rbm_incr(&rbm); 2223 } 2224 2225 return rbm.rgd; 2226 } 2227 2228 /** 2229 * gfs2_rgrp_dump - print out an rgrp 2230 * @seq: The iterator 2231 * @gl: The glock in question 2232 * 2233 */ 2234 2235 void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) 2236 { 2237 struct gfs2_rgrpd *rgd = gl->gl_object; 2238 struct gfs2_blkreserv *trs; 2239 const struct rb_node *n; 2240 2241 if (rgd == NULL) 2242 return; 2243 gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n", 2244 (unsigned long long)rgd->rd_addr, rgd->rd_flags, 2245 rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, 2246 rgd->rd_reserved, rgd->rd_extfail_pt); 2247 spin_lock(&rgd->rd_rsspin); 2248 for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { 2249 trs = rb_entry(n, struct gfs2_blkreserv, rs_node); 2250 dump_rs(seq, trs); 2251 } 2252 spin_unlock(&rgd->rd_rsspin); 2253 } 2254 2255 static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) 2256 { 2257 struct gfs2_sbd *sdp = rgd->rd_sbd; 2258 fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", 2259 (unsigned long long)rgd->rd_addr); 2260 fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); 2261 gfs2_rgrp_dump(NULL, rgd->rd_gl); 2262 rgd->rd_flags |= GFS2_RDF_ERROR; 2263 } 2264 2265 /** 2266 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation 2267 * @ip: The inode we have just allocated blocks for 2268 * @rbm: The start of the allocated blocks 2269 * @len: The extent length 2270 * 2271 * Adjusts a reservation after an allocation has taken place. If the 2272 * reservation does not match the allocation, or if it is now empty 2273 * then it is removed. 2274 */ 2275 2276 static void gfs2_adjust_reservation(struct gfs2_inode *ip, 2277 const struct gfs2_rbm *rbm, unsigned len) 2278 { 2279 struct gfs2_blkreserv *rs = &ip->i_res; 2280 struct gfs2_rgrpd *rgd = rbm->rgd; 2281 unsigned rlen; 2282 u64 block; 2283 int ret; 2284 2285 spin_lock(&rgd->rd_rsspin); 2286 if (gfs2_rs_active(rs)) { 2287 if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) { 2288 block = gfs2_rbm_to_block(rbm); 2289 ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len); 2290 rlen = min(rs->rs_free, len); 2291 rs->rs_free -= rlen; 2292 rgd->rd_reserved -= rlen; 2293 trace_gfs2_rs(rs, TRACE_RS_CLAIM); 2294 if (rs->rs_free && !ret) 2295 goto out; 2296 /* We used up our block reservation, so we should 2297 reserve more blocks next time. */ 2298 atomic_add(RGRP_RSRV_ADDBLKS, &rs->rs_sizehint); 2299 } 2300 __rs_deltree(rs); 2301 } 2302 out: 2303 spin_unlock(&rgd->rd_rsspin); 2304 } 2305 2306 /** 2307 * gfs2_set_alloc_start - Set starting point for block allocation 2308 * @rbm: The rbm which will be set to the required location 2309 * @ip: The gfs2 inode 2310 * @dinode: Flag to say if allocation includes a new inode 2311 * 2312 * This sets the starting point from the reservation if one is active 2313 * otherwise it falls back to guessing a start point based on the 2314 * inode's goal block or the last allocation point in the rgrp. 2315 */ 2316 2317 static void gfs2_set_alloc_start(struct gfs2_rbm *rbm, 2318 const struct gfs2_inode *ip, bool dinode) 2319 { 2320 u64 goal; 2321 2322 if (gfs2_rs_active(&ip->i_res)) { 2323 *rbm = ip->i_res.rs_rbm; 2324 return; 2325 } 2326 2327 if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal)) 2328 goal = ip->i_goal; 2329 else 2330 goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0; 2331 2332 gfs2_rbm_from_block(rbm, goal); 2333 } 2334 2335 /** 2336 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode 2337 * @ip: the inode to allocate the block for 2338 * @bn: Used to return the starting block number 2339 * @nblocks: requested number of blocks/extent length (value/result) 2340 * @dinode: 1 if we're allocating a dinode block, else 0 2341 * @generation: the generation number of the inode 2342 * 2343 * Returns: 0 or error 2344 */ 2345 2346 int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, 2347 bool dinode, u64 *generation) 2348 { 2349 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2350 struct buffer_head *dibh; 2351 struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, }; 2352 unsigned int ndata; 2353 u64 block; /* block, within the file system scope */ 2354 int error; 2355 2356 gfs2_set_alloc_start(&rbm, ip, dinode); 2357 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false); 2358 2359 if (error == -ENOSPC) { 2360 gfs2_set_alloc_start(&rbm, ip, dinode); 2361 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false); 2362 } 2363 2364 /* Since all blocks are reserved in advance, this shouldn't happen */ 2365 if (error) { 2366 fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n", 2367 (unsigned long long)ip->i_no_addr, error, *nblocks, 2368 test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags), 2369 rbm.rgd->rd_extfail_pt); 2370 goto rgrp_error; 2371 } 2372 2373 gfs2_alloc_extent(&rbm, dinode, nblocks); 2374 block = gfs2_rbm_to_block(&rbm); 2375 rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; 2376 if (gfs2_rs_active(&ip->i_res)) 2377 gfs2_adjust_reservation(ip, &rbm, *nblocks); 2378 ndata = *nblocks; 2379 if (dinode) 2380 ndata--; 2381 2382 if (!dinode) { 2383 ip->i_goal = block + ndata - 1; 2384 error = gfs2_meta_inode_buffer(ip, &dibh); 2385 if (error == 0) { 2386 struct gfs2_dinode *di = 2387 (struct gfs2_dinode *)dibh->b_data; 2388 gfs2_trans_add_meta(ip->i_gl, dibh); 2389 di->di_goal_meta = di->di_goal_data = 2390 cpu_to_be64(ip->i_goal); 2391 brelse(dibh); 2392 } 2393 } 2394 if (rbm.rgd->rd_free < *nblocks) { 2395 pr_warn("nblocks=%u\n", *nblocks); 2396 goto rgrp_error; 2397 } 2398 2399 rbm.rgd->rd_free -= *nblocks; 2400 if (dinode) { 2401 rbm.rgd->rd_dinodes++; 2402 *generation = rbm.rgd->rd_igeneration++; 2403 if (*generation == 0) 2404 *generation = rbm.rgd->rd_igeneration++; 2405 } 2406 2407 gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh); 2408 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); 2409 2410 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); 2411 if (dinode) 2412 gfs2_trans_add_unrevoke(sdp, block, *nblocks); 2413 2414 gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid); 2415 2416 rbm.rgd->rd_free_clone -= *nblocks; 2417 trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, 2418 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 2419 *bn = block; 2420 return 0; 2421 2422 rgrp_error: 2423 gfs2_rgrp_error(rbm.rgd); 2424 return -EIO; 2425 } 2426 2427 /** 2428 * __gfs2_free_blocks - free a contiguous run of block(s) 2429 * @ip: the inode these blocks are being freed from 2430 * @bstart: first block of a run of contiguous blocks 2431 * @blen: the length of the block run 2432 * @meta: 1 if the blocks represent metadata 2433 * 2434 */ 2435 2436 void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) 2437 { 2438 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2439 struct gfs2_rgrpd *rgd; 2440 2441 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); 2442 if (!rgd) 2443 return; 2444 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); 2445 rgd->rd_free += blen; 2446 rgd->rd_flags &= ~GFS2_RGF_TRIMMED; 2447 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); 2448 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2449 2450 /* Directories keep their data in the metadata address space */ 2451 if (meta || ip->i_depth) 2452 gfs2_meta_wipe(ip, bstart, blen); 2453 } 2454 2455 /** 2456 * gfs2_free_meta - free a contiguous run of data block(s) 2457 * @ip: the inode these blocks are being freed from 2458 * @bstart: first block of a run of contiguous blocks 2459 * @blen: the length of the block run 2460 * 2461 */ 2462 2463 void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) 2464 { 2465 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2466 2467 __gfs2_free_blocks(ip, bstart, blen, 1); 2468 gfs2_statfs_change(sdp, 0, +blen, 0); 2469 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 2470 } 2471 2472 void gfs2_unlink_di(struct inode *inode) 2473 { 2474 struct gfs2_inode *ip = GFS2_I(inode); 2475 struct gfs2_sbd *sdp = GFS2_SB(inode); 2476 struct gfs2_rgrpd *rgd; 2477 u64 blkno = ip->i_no_addr; 2478 2479 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); 2480 if (!rgd) 2481 return; 2482 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); 2483 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); 2484 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2485 be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1); 2486 } 2487 2488 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 2489 { 2490 struct gfs2_sbd *sdp = rgd->rd_sbd; 2491 struct gfs2_rgrpd *tmp_rgd; 2492 2493 tmp_rgd = rgblk_free(sdp, ip->i_no_addr, 1, GFS2_BLKST_FREE); 2494 if (!tmp_rgd) 2495 return; 2496 gfs2_assert_withdraw(sdp, rgd == tmp_rgd); 2497 2498 if (!rgd->rd_dinodes) 2499 gfs2_consist_rgrpd(rgd); 2500 rgd->rd_dinodes--; 2501 rgd->rd_free++; 2502 2503 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); 2504 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2505 be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1); 2506 2507 gfs2_statfs_change(sdp, 0, +1, -1); 2508 trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); 2509 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); 2510 gfs2_meta_wipe(ip, ip->i_no_addr, 1); 2511 } 2512 2513 /** 2514 * gfs2_check_blk_type - Check the type of a block 2515 * @sdp: The superblock 2516 * @no_addr: The block number to check 2517 * @type: The block type we are looking for 2518 * 2519 * Returns: 0 if the block type matches the expected type 2520 * -ESTALE if it doesn't match 2521 * or -ve errno if something went wrong while checking 2522 */ 2523 2524 int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) 2525 { 2526 struct gfs2_rgrpd *rgd; 2527 struct gfs2_holder rgd_gh; 2528 struct gfs2_rbm rbm; 2529 int error = -EINVAL; 2530 2531 rgd = gfs2_blk2rgrpd(sdp, no_addr, 1); 2532 if (!rgd) 2533 goto fail; 2534 2535 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); 2536 if (error) 2537 goto fail; 2538 2539 rbm.rgd = rgd; 2540 error = gfs2_rbm_from_block(&rbm, no_addr); 2541 WARN_ON_ONCE(error != 0); 2542 2543 if (gfs2_testbit(&rbm, false) != type) 2544 error = -ESTALE; 2545 2546 gfs2_glock_dq_uninit(&rgd_gh); 2547 fail: 2548 return error; 2549 } 2550 2551 /** 2552 * gfs2_rlist_add - add a RG to a list of RGs 2553 * @ip: the inode 2554 * @rlist: the list of resource groups 2555 * @block: the block 2556 * 2557 * Figure out what RG a block belongs to and add that RG to the list 2558 * 2559 * FIXME: Don't use NOFAIL 2560 * 2561 */ 2562 2563 void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, 2564 u64 block) 2565 { 2566 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2567 struct gfs2_rgrpd *rgd; 2568 struct gfs2_rgrpd **tmp; 2569 unsigned int new_space; 2570 unsigned int x; 2571 2572 if (gfs2_assert_warn(sdp, !rlist->rl_ghs)) 2573 return; 2574 2575 /* 2576 * The resource group last accessed is kept in the last position. 2577 */ 2578 2579 if (rlist->rl_rgrps) { 2580 rgd = rlist->rl_rgd[rlist->rl_rgrps - 1]; 2581 if (rgrp_contains_block(rgd, block)) 2582 return; 2583 rgd = gfs2_blk2rgrpd(sdp, block, 1); 2584 } else { 2585 rgd = ip->i_res.rs_rbm.rgd; 2586 if (!rgd || !rgrp_contains_block(rgd, block)) 2587 rgd = gfs2_blk2rgrpd(sdp, block, 1); 2588 } 2589 2590 if (!rgd) { 2591 fs_err(sdp, "rlist_add: no rgrp for block %llu\n", 2592 (unsigned long long)block); 2593 return; 2594 } 2595 2596 for (x = 0; x < rlist->rl_rgrps; x++) { 2597 if (rlist->rl_rgd[x] == rgd) { 2598 swap(rlist->rl_rgd[x], 2599 rlist->rl_rgd[rlist->rl_rgrps - 1]); 2600 return; 2601 } 2602 } 2603 2604 if (rlist->rl_rgrps == rlist->rl_space) { 2605 new_space = rlist->rl_space + 10; 2606 2607 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *), 2608 GFP_NOFS | __GFP_NOFAIL); 2609 2610 if (rlist->rl_rgd) { 2611 memcpy(tmp, rlist->rl_rgd, 2612 rlist->rl_space * sizeof(struct gfs2_rgrpd *)); 2613 kfree(rlist->rl_rgd); 2614 } 2615 2616 rlist->rl_space = new_space; 2617 rlist->rl_rgd = tmp; 2618 } 2619 2620 rlist->rl_rgd[rlist->rl_rgrps++] = rgd; 2621 } 2622 2623 /** 2624 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate 2625 * and initialize an array of glock holders for them 2626 * @rlist: the list of resource groups 2627 * @state: the lock state to acquire the RG lock in 2628 * 2629 * FIXME: Don't use NOFAIL 2630 * 2631 */ 2632 2633 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state) 2634 { 2635 unsigned int x; 2636 2637 rlist->rl_ghs = kmalloc_array(rlist->rl_rgrps, 2638 sizeof(struct gfs2_holder), 2639 GFP_NOFS | __GFP_NOFAIL); 2640 for (x = 0; x < rlist->rl_rgrps; x++) 2641 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, 2642 state, 0, 2643 &rlist->rl_ghs[x]); 2644 } 2645 2646 /** 2647 * gfs2_rlist_free - free a resource group list 2648 * @rlist: the list of resource groups 2649 * 2650 */ 2651 2652 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) 2653 { 2654 unsigned int x; 2655 2656 kfree(rlist->rl_rgd); 2657 2658 if (rlist->rl_ghs) { 2659 for (x = 0; x < rlist->rl_rgrps; x++) 2660 gfs2_holder_uninit(&rlist->rl_ghs[x]); 2661 kfree(rlist->rl_ghs); 2662 rlist->rl_ghs = NULL; 2663 } 2664 } 2665 2666