1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/slab.h> 11 #include <linux/spinlock.h> 12 #include <linux/completion.h> 13 #include <linux/buffer_head.h> 14 #include <linux/fs.h> 15 #include <linux/gfs2_ondisk.h> 16 #include <linux/prefetch.h> 17 #include <linux/blkdev.h> 18 19 #include "gfs2.h" 20 #include "incore.h" 21 #include "glock.h" 22 #include "glops.h" 23 #include "lops.h" 24 #include "meta_io.h" 25 #include "quota.h" 26 #include "rgrp.h" 27 #include "super.h" 28 #include "trans.h" 29 #include "util.h" 30 #include "log.h" 31 #include "inode.h" 32 #include "trace_gfs2.h" 33 34 #define BFITNOENT ((u32)~0) 35 #define NO_BLOCK ((u64)~0) 36 37 #if BITS_PER_LONG == 32 38 #define LBITMASK (0x55555555UL) 39 #define LBITSKIP55 (0x55555555UL) 40 #define LBITSKIP00 (0x00000000UL) 41 #else 42 #define LBITMASK (0x5555555555555555UL) 43 #define LBITSKIP55 (0x5555555555555555UL) 44 #define LBITSKIP00 (0x0000000000000000UL) 45 #endif 46 47 /* 48 * These routines are used by the resource group routines (rgrp.c) 49 * to keep track of block allocation. Each block is represented by two 50 * bits. So, each byte represents GFS2_NBBY (i.e. 4) blocks. 51 * 52 * 0 = Free 53 * 1 = Used (not metadata) 54 * 2 = Unlinked (still in use) inode 55 * 3 = Used (metadata) 56 */ 57 58 static const char valid_change[16] = { 59 /* current */ 60 /* n */ 0, 1, 1, 1, 61 /* e */ 1, 0, 0, 0, 62 /* w */ 0, 0, 0, 1, 63 1, 0, 0, 0 64 }; 65 66 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, 67 unsigned char old_state, unsigned char new_state, 68 unsigned int *n); 69 70 /** 71 * gfs2_setbit - Set a bit in the bitmaps 72 * @buffer: the buffer that holds the bitmaps 73 * @buflen: the length (in bytes) of the buffer 74 * @block: the block to set 75 * @new_state: the new state of the block 76 * 77 */ 78 79 static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, 80 unsigned char *buf2, unsigned int offset, 81 struct gfs2_bitmap *bi, u32 block, 82 unsigned char new_state) 83 { 84 unsigned char *byte1, *byte2, *end, cur_state; 85 unsigned int buflen = bi->bi_len; 86 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 87 88 byte1 = buf1 + offset + (block / GFS2_NBBY); 89 end = buf1 + offset + buflen; 90 91 BUG_ON(byte1 >= end); 92 93 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; 94 95 if (unlikely(!valid_change[new_state * 4 + cur_state])) { 96 printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, " 97 "new_state=%d\n", 98 (unsigned long long)block, cur_state, new_state); 99 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n", 100 (unsigned long long)rgd->rd_addr, 101 (unsigned long)bi->bi_start); 102 printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n", 103 (unsigned long)bi->bi_offset, 104 (unsigned long)bi->bi_len); 105 dump_stack(); 106 gfs2_consist_rgrpd(rgd); 107 return; 108 } 109 *byte1 ^= (cur_state ^ new_state) << bit; 110 111 if (buf2) { 112 byte2 = buf2 + offset + (block / GFS2_NBBY); 113 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; 114 *byte2 ^= (cur_state ^ new_state) << bit; 115 } 116 } 117 118 /** 119 * gfs2_testbit - test a bit in the bitmaps 120 * @buffer: the buffer that holds the bitmaps 121 * @buflen: the length (in bytes) of the buffer 122 * @block: the block to read 123 * 124 */ 125 126 static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, 127 const unsigned char *buffer, 128 unsigned int buflen, u32 block) 129 { 130 const unsigned char *byte, *end; 131 unsigned char cur_state; 132 unsigned int bit; 133 134 byte = buffer + (block / GFS2_NBBY); 135 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 136 end = buffer + buflen; 137 138 gfs2_assert(rgd->rd_sbd, byte < end); 139 140 cur_state = (*byte >> bit) & GFS2_BIT_MASK; 141 142 return cur_state; 143 } 144 145 /** 146 * gfs2_bit_search 147 * @ptr: Pointer to bitmap data 148 * @mask: Mask to use (normally 0x55555.... but adjusted for search start) 149 * @state: The state we are searching for 150 * 151 * We xor the bitmap data with a patter which is the bitwise opposite 152 * of what we are looking for, this gives rise to a pattern of ones 153 * wherever there is a match. Since we have two bits per entry, we 154 * take this pattern, shift it down by one place and then and it with 155 * the original. All the even bit positions (0,2,4, etc) then represent 156 * successful matches, so we mask with 0x55555..... to remove the unwanted 157 * odd bit positions. 158 * 159 * This allows searching of a whole u64 at once (32 blocks) with a 160 * single test (on 64 bit arches). 161 */ 162 163 static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) 164 { 165 u64 tmp; 166 static const u64 search[] = { 167 [0] = 0xffffffffffffffffULL, 168 [1] = 0xaaaaaaaaaaaaaaaaULL, 169 [2] = 0x5555555555555555ULL, 170 [3] = 0x0000000000000000ULL, 171 }; 172 tmp = le64_to_cpu(*ptr) ^ search[state]; 173 tmp &= (tmp >> 1); 174 tmp &= mask; 175 return tmp; 176 } 177 178 /** 179 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing 180 * a block in a given allocation state. 181 * @buffer: the buffer that holds the bitmaps 182 * @len: the length (in bytes) of the buffer 183 * @goal: start search at this block's bit-pair (within @buffer) 184 * @state: GFS2_BLKST_XXX the state of the block we're looking for. 185 * 186 * Scope of @goal and returned block number is only within this bitmap buffer, 187 * not entire rgrp or filesystem. @buffer will be offset from the actual 188 * beginning of a bitmap block buffer, skipping any header structures, but 189 * headers are always a multiple of 64 bits long so that the buffer is 190 * always aligned to a 64 bit boundary. 191 * 192 * The size of the buffer is in bytes, but is it assumed that it is 193 * always ok to read a complete multiple of 64 bits at the end 194 * of the block in case the end is no aligned to a natural boundary. 195 * 196 * Return: the block number (bitmap buffer scope) that was found 197 */ 198 199 static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, 200 u32 goal, u8 state) 201 { 202 u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1); 203 const __le64 *ptr = ((__le64 *)buf) + (goal >> 5); 204 const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64))); 205 u64 tmp; 206 u64 mask = 0x5555555555555555ULL; 207 u32 bit; 208 209 BUG_ON(state > 3); 210 211 /* Mask off bits we don't care about at the start of the search */ 212 mask <<= spoint; 213 tmp = gfs2_bit_search(ptr, mask, state); 214 ptr++; 215 while(tmp == 0 && ptr < end) { 216 tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state); 217 ptr++; 218 } 219 /* Mask off any bits which are more than len bytes from the start */ 220 if (ptr == end && (len & (sizeof(u64) - 1))) 221 tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1)))); 222 /* Didn't find anything, so return */ 223 if (tmp == 0) 224 return BFITNOENT; 225 ptr--; 226 bit = __ffs64(tmp); 227 bit /= 2; /* two bits per entry in the bitmap */ 228 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; 229 } 230 231 /** 232 * gfs2_bitcount - count the number of bits in a certain state 233 * @buffer: the buffer that holds the bitmaps 234 * @buflen: the length (in bytes) of the buffer 235 * @state: the state of the block we're looking for 236 * 237 * Returns: The number of bits 238 */ 239 240 static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer, 241 unsigned int buflen, u8 state) 242 { 243 const u8 *byte = buffer; 244 const u8 *end = buffer + buflen; 245 const u8 state1 = state << 2; 246 const u8 state2 = state << 4; 247 const u8 state3 = state << 6; 248 u32 count = 0; 249 250 for (; byte < end; byte++) { 251 if (((*byte) & 0x03) == state) 252 count++; 253 if (((*byte) & 0x0C) == state1) 254 count++; 255 if (((*byte) & 0x30) == state2) 256 count++; 257 if (((*byte) & 0xC0) == state3) 258 count++; 259 } 260 261 return count; 262 } 263 264 /** 265 * gfs2_rgrp_verify - Verify that a resource group is consistent 266 * @sdp: the filesystem 267 * @rgd: the rgrp 268 * 269 */ 270 271 void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) 272 { 273 struct gfs2_sbd *sdp = rgd->rd_sbd; 274 struct gfs2_bitmap *bi = NULL; 275 u32 length = rgd->rd_length; 276 u32 count[4], tmp; 277 int buf, x; 278 279 memset(count, 0, 4 * sizeof(u32)); 280 281 /* Count # blocks in each of 4 possible allocation states */ 282 for (buf = 0; buf < length; buf++) { 283 bi = rgd->rd_bits + buf; 284 for (x = 0; x < 4; x++) 285 count[x] += gfs2_bitcount(rgd, 286 bi->bi_bh->b_data + 287 bi->bi_offset, 288 bi->bi_len, x); 289 } 290 291 if (count[0] != rgd->rd_free) { 292 if (gfs2_consist_rgrpd(rgd)) 293 fs_err(sdp, "free data mismatch: %u != %u\n", 294 count[0], rgd->rd_free); 295 return; 296 } 297 298 tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes; 299 if (count[1] != tmp) { 300 if (gfs2_consist_rgrpd(rgd)) 301 fs_err(sdp, "used data mismatch: %u != %u\n", 302 count[1], tmp); 303 return; 304 } 305 306 if (count[2] + count[3] != rgd->rd_dinodes) { 307 if (gfs2_consist_rgrpd(rgd)) 308 fs_err(sdp, "used metadata mismatch: %u != %u\n", 309 count[2] + count[3], rgd->rd_dinodes); 310 return; 311 } 312 } 313 314 static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) 315 { 316 u64 first = rgd->rd_data0; 317 u64 last = first + rgd->rd_data; 318 return first <= block && block < last; 319 } 320 321 /** 322 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number 323 * @sdp: The GFS2 superblock 324 * @n: The data block number 325 * 326 * Returns: The resource group, or NULL if not found 327 */ 328 329 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk) 330 { 331 struct gfs2_rgrpd *rgd; 332 333 spin_lock(&sdp->sd_rindex_spin); 334 335 list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) { 336 if (rgrp_contains_block(rgd, blk)) { 337 list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); 338 spin_unlock(&sdp->sd_rindex_spin); 339 return rgd; 340 } 341 } 342 343 spin_unlock(&sdp->sd_rindex_spin); 344 345 return NULL; 346 } 347 348 /** 349 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem 350 * @sdp: The GFS2 superblock 351 * 352 * Returns: The first rgrp in the filesystem 353 */ 354 355 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp) 356 { 357 gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list)); 358 return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list); 359 } 360 361 /** 362 * gfs2_rgrpd_get_next - get the next RG 363 * @rgd: A RG 364 * 365 * Returns: The next rgrp 366 */ 367 368 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd) 369 { 370 if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list) 371 return NULL; 372 return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list); 373 } 374 375 static void clear_rgrpdi(struct gfs2_sbd *sdp) 376 { 377 struct list_head *head; 378 struct gfs2_rgrpd *rgd; 379 struct gfs2_glock *gl; 380 381 spin_lock(&sdp->sd_rindex_spin); 382 sdp->sd_rindex_forward = NULL; 383 spin_unlock(&sdp->sd_rindex_spin); 384 385 head = &sdp->sd_rindex_list; 386 while (!list_empty(head)) { 387 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list); 388 gl = rgd->rd_gl; 389 390 list_del(&rgd->rd_list); 391 list_del(&rgd->rd_list_mru); 392 393 if (gl) { 394 gl->gl_object = NULL; 395 gfs2_glock_add_to_lru(gl); 396 gfs2_glock_put(gl); 397 } 398 399 kfree(rgd->rd_bits); 400 kmem_cache_free(gfs2_rgrpd_cachep, rgd); 401 } 402 } 403 404 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) 405 { 406 mutex_lock(&sdp->sd_rindex_mutex); 407 clear_rgrpdi(sdp); 408 mutex_unlock(&sdp->sd_rindex_mutex); 409 } 410 411 static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) 412 { 413 printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); 414 printk(KERN_INFO " ri_length = %u\n", rgd->rd_length); 415 printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); 416 printk(KERN_INFO " ri_data = %u\n", rgd->rd_data); 417 printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes); 418 } 419 420 /** 421 * gfs2_compute_bitstructs - Compute the bitmap sizes 422 * @rgd: The resource group descriptor 423 * 424 * Calculates bitmap descriptors, one for each block that contains bitmap data 425 * 426 * Returns: errno 427 */ 428 429 static int compute_bitstructs(struct gfs2_rgrpd *rgd) 430 { 431 struct gfs2_sbd *sdp = rgd->rd_sbd; 432 struct gfs2_bitmap *bi; 433 u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */ 434 u32 bytes_left, bytes; 435 int x; 436 437 if (!length) 438 return -EINVAL; 439 440 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS); 441 if (!rgd->rd_bits) 442 return -ENOMEM; 443 444 bytes_left = rgd->rd_bitbytes; 445 446 for (x = 0; x < length; x++) { 447 bi = rgd->rd_bits + x; 448 449 bi->bi_flags = 0; 450 /* small rgrp; bitmap stored completely in header block */ 451 if (length == 1) { 452 bytes = bytes_left; 453 bi->bi_offset = sizeof(struct gfs2_rgrp); 454 bi->bi_start = 0; 455 bi->bi_len = bytes; 456 /* header block */ 457 } else if (x == 0) { 458 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp); 459 bi->bi_offset = sizeof(struct gfs2_rgrp); 460 bi->bi_start = 0; 461 bi->bi_len = bytes; 462 /* last block */ 463 } else if (x + 1 == length) { 464 bytes = bytes_left; 465 bi->bi_offset = sizeof(struct gfs2_meta_header); 466 bi->bi_start = rgd->rd_bitbytes - bytes_left; 467 bi->bi_len = bytes; 468 /* other blocks */ 469 } else { 470 bytes = sdp->sd_sb.sb_bsize - 471 sizeof(struct gfs2_meta_header); 472 bi->bi_offset = sizeof(struct gfs2_meta_header); 473 bi->bi_start = rgd->rd_bitbytes - bytes_left; 474 bi->bi_len = bytes; 475 } 476 477 bytes_left -= bytes; 478 } 479 480 if (bytes_left) { 481 gfs2_consist_rgrpd(rgd); 482 return -EIO; 483 } 484 bi = rgd->rd_bits + (length - 1); 485 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) { 486 if (gfs2_consist_rgrpd(rgd)) { 487 gfs2_rindex_print(rgd); 488 fs_err(sdp, "start=%u len=%u offset=%u\n", 489 bi->bi_start, bi->bi_len, bi->bi_offset); 490 } 491 return -EIO; 492 } 493 494 return 0; 495 } 496 497 /** 498 * gfs2_ri_total - Total up the file system space, according to the rindex. 499 * 500 */ 501 u64 gfs2_ri_total(struct gfs2_sbd *sdp) 502 { 503 u64 total_data = 0; 504 struct inode *inode = sdp->sd_rindex; 505 struct gfs2_inode *ip = GFS2_I(inode); 506 char buf[sizeof(struct gfs2_rindex)]; 507 struct file_ra_state ra_state; 508 int error, rgrps; 509 510 mutex_lock(&sdp->sd_rindex_mutex); 511 file_ra_state_init(&ra_state, inode->i_mapping); 512 for (rgrps = 0;; rgrps++) { 513 loff_t pos = rgrps * sizeof(struct gfs2_rindex); 514 515 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode)) 516 break; 517 error = gfs2_internal_read(ip, &ra_state, buf, &pos, 518 sizeof(struct gfs2_rindex)); 519 if (error != sizeof(struct gfs2_rindex)) 520 break; 521 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data); 522 } 523 mutex_unlock(&sdp->sd_rindex_mutex); 524 return total_data; 525 } 526 527 static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf) 528 { 529 const struct gfs2_rindex *str = buf; 530 531 rgd->rd_addr = be64_to_cpu(str->ri_addr); 532 rgd->rd_length = be32_to_cpu(str->ri_length); 533 rgd->rd_data0 = be64_to_cpu(str->ri_data0); 534 rgd->rd_data = be32_to_cpu(str->ri_data); 535 rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes); 536 } 537 538 /** 539 * read_rindex_entry - Pull in a new resource index entry from the disk 540 * @gl: The glock covering the rindex inode 541 * 542 * Returns: 0 on success, error code otherwise 543 */ 544 545 static int read_rindex_entry(struct gfs2_inode *ip, 546 struct file_ra_state *ra_state) 547 { 548 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 549 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); 550 char buf[sizeof(struct gfs2_rindex)]; 551 int error; 552 struct gfs2_rgrpd *rgd; 553 554 error = gfs2_internal_read(ip, ra_state, buf, &pos, 555 sizeof(struct gfs2_rindex)); 556 if (!error) 557 return 0; 558 if (error != sizeof(struct gfs2_rindex)) { 559 if (error > 0) 560 error = -EIO; 561 return error; 562 } 563 564 rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS); 565 error = -ENOMEM; 566 if (!rgd) 567 return error; 568 569 mutex_init(&rgd->rd_mutex); 570 lops_init_le(&rgd->rd_le, &gfs2_rg_lops); 571 rgd->rd_sbd = sdp; 572 573 list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list); 574 list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); 575 576 gfs2_rindex_in(rgd, buf); 577 error = compute_bitstructs(rgd); 578 if (error) 579 return error; 580 581 error = gfs2_glock_get(sdp, rgd->rd_addr, 582 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); 583 if (error) 584 return error; 585 586 rgd->rd_gl->gl_object = rgd; 587 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 588 return error; 589 } 590 591 /** 592 * gfs2_ri_update - Pull in a new resource index from the disk 593 * @ip: pointer to the rindex inode 594 * 595 * Returns: 0 on successful update, error code otherwise 596 */ 597 598 int gfs2_ri_update(struct gfs2_inode *ip) 599 { 600 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 601 struct inode *inode = &ip->i_inode; 602 struct file_ra_state ra_state; 603 u64 rgrp_count = i_size_read(inode); 604 struct gfs2_rgrpd *rgd; 605 unsigned int max_data = 0; 606 int error; 607 608 do_div(rgrp_count, sizeof(struct gfs2_rindex)); 609 clear_rgrpdi(sdp); 610 611 file_ra_state_init(&ra_state, inode->i_mapping); 612 for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) { 613 error = read_rindex_entry(ip, &ra_state); 614 if (error) { 615 clear_rgrpdi(sdp); 616 return error; 617 } 618 } 619 620 list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) 621 if (rgd->rd_data > max_data) 622 max_data = rgd->rd_data; 623 sdp->sd_max_rg_data = max_data; 624 sdp->sd_rindex_uptodate = 1; 625 return 0; 626 } 627 628 /** 629 * gfs2_rindex_hold - Grab a lock on the rindex 630 * @sdp: The GFS2 superblock 631 * @ri_gh: the glock holder 632 * 633 * We grab a lock on the rindex inode to make sure that it doesn't 634 * change whilst we are performing an operation. We keep this lock 635 * for quite long periods of time compared to other locks. This 636 * doesn't matter, since it is shared and it is very, very rarely 637 * accessed in the exclusive mode (i.e. only when expanding the filesystem). 638 * 639 * This makes sure that we're using the latest copy of the resource index 640 * special file, which might have been updated if someone expanded the 641 * filesystem (via gfs2_grow utility), which adds new resource groups. 642 * 643 * Returns: 0 on success, error code otherwise 644 */ 645 646 int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh) 647 { 648 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); 649 struct gfs2_glock *gl = ip->i_gl; 650 int error; 651 652 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh); 653 if (error) 654 return error; 655 656 /* Read new copy from disk if we don't have the latest */ 657 if (!sdp->sd_rindex_uptodate) { 658 mutex_lock(&sdp->sd_rindex_mutex); 659 if (!sdp->sd_rindex_uptodate) { 660 error = gfs2_ri_update(ip); 661 if (error) 662 gfs2_glock_dq_uninit(ri_gh); 663 } 664 mutex_unlock(&sdp->sd_rindex_mutex); 665 } 666 667 return error; 668 } 669 670 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) 671 { 672 const struct gfs2_rgrp *str = buf; 673 u32 rg_flags; 674 675 rg_flags = be32_to_cpu(str->rg_flags); 676 rg_flags &= ~GFS2_RDF_MASK; 677 rgd->rd_flags &= GFS2_RDF_MASK; 678 rgd->rd_flags |= rg_flags; 679 rgd->rd_free = be32_to_cpu(str->rg_free); 680 rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); 681 rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); 682 } 683 684 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) 685 { 686 struct gfs2_rgrp *str = buf; 687 688 str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK); 689 str->rg_free = cpu_to_be32(rgd->rd_free); 690 str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes); 691 str->__pad = cpu_to_be32(0); 692 str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration); 693 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); 694 } 695 696 /** 697 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps 698 * @rgd: the struct gfs2_rgrpd describing the RG to read in 699 * 700 * Read in all of a Resource Group's header and bitmap blocks. 701 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. 702 * 703 * Returns: errno 704 */ 705 706 int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) 707 { 708 struct gfs2_sbd *sdp = rgd->rd_sbd; 709 struct gfs2_glock *gl = rgd->rd_gl; 710 unsigned int length = rgd->rd_length; 711 struct gfs2_bitmap *bi; 712 unsigned int x, y; 713 int error; 714 715 mutex_lock(&rgd->rd_mutex); 716 717 spin_lock(&sdp->sd_rindex_spin); 718 if (rgd->rd_bh_count) { 719 rgd->rd_bh_count++; 720 spin_unlock(&sdp->sd_rindex_spin); 721 mutex_unlock(&rgd->rd_mutex); 722 return 0; 723 } 724 spin_unlock(&sdp->sd_rindex_spin); 725 726 for (x = 0; x < length; x++) { 727 bi = rgd->rd_bits + x; 728 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); 729 if (error) 730 goto fail; 731 } 732 733 for (y = length; y--;) { 734 bi = rgd->rd_bits + y; 735 error = gfs2_meta_wait(sdp, bi->bi_bh); 736 if (error) 737 goto fail; 738 if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB : 739 GFS2_METATYPE_RG)) { 740 error = -EIO; 741 goto fail; 742 } 743 } 744 745 if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { 746 for (x = 0; x < length; x++) 747 clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); 748 gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); 749 rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); 750 } 751 752 spin_lock(&sdp->sd_rindex_spin); 753 rgd->rd_free_clone = rgd->rd_free; 754 rgd->rd_bh_count++; 755 spin_unlock(&sdp->sd_rindex_spin); 756 757 mutex_unlock(&rgd->rd_mutex); 758 759 return 0; 760 761 fail: 762 while (x--) { 763 bi = rgd->rd_bits + x; 764 brelse(bi->bi_bh); 765 bi->bi_bh = NULL; 766 gfs2_assert_warn(sdp, !bi->bi_clone); 767 } 768 mutex_unlock(&rgd->rd_mutex); 769 770 return error; 771 } 772 773 void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd) 774 { 775 struct gfs2_sbd *sdp = rgd->rd_sbd; 776 777 spin_lock(&sdp->sd_rindex_spin); 778 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count); 779 rgd->rd_bh_count++; 780 spin_unlock(&sdp->sd_rindex_spin); 781 } 782 783 /** 784 * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get() 785 * @rgd: the struct gfs2_rgrpd describing the RG to read in 786 * 787 */ 788 789 void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd) 790 { 791 struct gfs2_sbd *sdp = rgd->rd_sbd; 792 int x, length = rgd->rd_length; 793 794 spin_lock(&sdp->sd_rindex_spin); 795 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count); 796 if (--rgd->rd_bh_count) { 797 spin_unlock(&sdp->sd_rindex_spin); 798 return; 799 } 800 801 for (x = 0; x < length; x++) { 802 struct gfs2_bitmap *bi = rgd->rd_bits + x; 803 kfree(bi->bi_clone); 804 bi->bi_clone = NULL; 805 brelse(bi->bi_bh); 806 bi->bi_bh = NULL; 807 } 808 809 spin_unlock(&sdp->sd_rindex_spin); 810 } 811 812 static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 813 const struct gfs2_bitmap *bi) 814 { 815 struct super_block *sb = sdp->sd_vfs; 816 struct block_device *bdev = sb->s_bdev; 817 const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize / 818 bdev_logical_block_size(sb->s_bdev); 819 u64 blk; 820 sector_t start = 0; 821 sector_t nr_sects = 0; 822 int rv; 823 unsigned int x; 824 825 for (x = 0; x < bi->bi_len; x++) { 826 const u8 *orig = bi->bi_bh->b_data + bi->bi_offset + x; 827 const u8 *clone = bi->bi_clone + bi->bi_offset + x; 828 u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); 829 diff &= 0x55; 830 if (diff == 0) 831 continue; 832 blk = offset + ((bi->bi_start + x) * GFS2_NBBY); 833 blk *= sects_per_blk; /* convert to sectors */ 834 while(diff) { 835 if (diff & 1) { 836 if (nr_sects == 0) 837 goto start_new_extent; 838 if ((start + nr_sects) != blk) { 839 rv = blkdev_issue_discard(bdev, start, 840 nr_sects, GFP_NOFS, 841 0); 842 if (rv) 843 goto fail; 844 nr_sects = 0; 845 start_new_extent: 846 start = blk; 847 } 848 nr_sects += sects_per_blk; 849 } 850 diff >>= 2; 851 blk += sects_per_blk; 852 } 853 } 854 if (nr_sects) { 855 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); 856 if (rv) 857 goto fail; 858 } 859 return; 860 fail: 861 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv); 862 sdp->sd_args.ar_discard = 0; 863 } 864 865 void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) 866 { 867 struct gfs2_sbd *sdp = rgd->rd_sbd; 868 unsigned int length = rgd->rd_length; 869 unsigned int x; 870 871 for (x = 0; x < length; x++) { 872 struct gfs2_bitmap *bi = rgd->rd_bits + x; 873 if (!bi->bi_clone) 874 continue; 875 if (sdp->sd_args.ar_discard) 876 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi); 877 clear_bit(GBF_FULL, &bi->bi_flags); 878 memcpy(bi->bi_clone + bi->bi_offset, 879 bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); 880 } 881 882 spin_lock(&sdp->sd_rindex_spin); 883 rgd->rd_free_clone = rgd->rd_free; 884 spin_unlock(&sdp->sd_rindex_spin); 885 } 886 887 /** 888 * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode 889 * @ip: the incore GFS2 inode structure 890 * 891 * Returns: the struct gfs2_alloc 892 */ 893 894 struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) 895 { 896 BUG_ON(ip->i_alloc != NULL); 897 ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_NOFS); 898 return ip->i_alloc; 899 } 900 901 /** 902 * try_rgrp_fit - See if a given reservation will fit in a given RG 903 * @rgd: the RG data 904 * @al: the struct gfs2_alloc structure describing the reservation 905 * 906 * If there's room for the requested blocks to be allocated from the RG: 907 * Sets the $al_rgd field in @al. 908 * 909 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) 910 */ 911 912 static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) 913 { 914 struct gfs2_sbd *sdp = rgd->rd_sbd; 915 int ret = 0; 916 917 if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) 918 return 0; 919 920 spin_lock(&sdp->sd_rindex_spin); 921 if (rgd->rd_free_clone >= al->al_requested) { 922 al->al_rgd = rgd; 923 ret = 1; 924 } 925 spin_unlock(&sdp->sd_rindex_spin); 926 927 return ret; 928 } 929 930 /** 931 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes 932 * @rgd: The rgrp 933 * 934 * Returns: 0 if no error 935 * The inode, if one has been found, in inode. 936 */ 937 938 static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) 939 { 940 u32 goal = 0, block; 941 u64 no_addr; 942 struct gfs2_sbd *sdp = rgd->rd_sbd; 943 unsigned int n; 944 struct gfs2_glock *gl; 945 struct gfs2_inode *ip; 946 int error; 947 int found = 0; 948 949 while (goal < rgd->rd_data) { 950 down_write(&sdp->sd_log_flush_lock); 951 n = 1; 952 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, 953 GFS2_BLKST_UNLINKED, &n); 954 up_write(&sdp->sd_log_flush_lock); 955 if (block == BFITNOENT) 956 break; 957 /* rgblk_search can return a block < goal, so we need to 958 keep it marching forward. */ 959 no_addr = block + rgd->rd_data0; 960 goal = max(block + 1, goal + 1); 961 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) 962 continue; 963 if (no_addr == skip) 964 continue; 965 *last_unlinked = no_addr; 966 967 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl); 968 if (error) 969 continue; 970 971 /* If the inode is already in cache, we can ignore it here 972 * because the existing inode disposal code will deal with 973 * it when all refs have gone away. Accessing gl_object like 974 * this is not safe in general. Here it is ok because we do 975 * not dereference the pointer, and we only need an approx 976 * answer to whether it is NULL or not. 977 */ 978 ip = gl->gl_object; 979 980 if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) 981 gfs2_glock_put(gl); 982 else 983 found++; 984 985 /* Limit reclaim to sensible number of tasks */ 986 if (found > NR_CPUS) 987 return; 988 } 989 990 rgd->rd_flags &= ~GFS2_RDF_CHECK; 991 return; 992 } 993 994 /** 995 * recent_rgrp_next - get next RG from "recent" list 996 * @cur_rgd: current rgrp 997 * 998 * Returns: The next rgrp in the recent list 999 */ 1000 1001 static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd) 1002 { 1003 struct gfs2_sbd *sdp = cur_rgd->rd_sbd; 1004 struct list_head *head; 1005 struct gfs2_rgrpd *rgd; 1006 1007 spin_lock(&sdp->sd_rindex_spin); 1008 head = &sdp->sd_rindex_mru_list; 1009 if (unlikely(cur_rgd->rd_list_mru.next == head)) { 1010 spin_unlock(&sdp->sd_rindex_spin); 1011 return NULL; 1012 } 1013 rgd = list_entry(cur_rgd->rd_list_mru.next, struct gfs2_rgrpd, rd_list_mru); 1014 spin_unlock(&sdp->sd_rindex_spin); 1015 return rgd; 1016 } 1017 1018 /** 1019 * forward_rgrp_get - get an rgrp to try next from full list 1020 * @sdp: The GFS2 superblock 1021 * 1022 * Returns: The rgrp to try next 1023 */ 1024 1025 static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp) 1026 { 1027 struct gfs2_rgrpd *rgd; 1028 unsigned int journals = gfs2_jindex_size(sdp); 1029 unsigned int rg = 0, x; 1030 1031 spin_lock(&sdp->sd_rindex_spin); 1032 1033 rgd = sdp->sd_rindex_forward; 1034 if (!rgd) { 1035 if (sdp->sd_rgrps >= journals) 1036 rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals; 1037 1038 for (x = 0, rgd = gfs2_rgrpd_get_first(sdp); x < rg; 1039 x++, rgd = gfs2_rgrpd_get_next(rgd)) 1040 /* Do Nothing */; 1041 1042 sdp->sd_rindex_forward = rgd; 1043 } 1044 1045 spin_unlock(&sdp->sd_rindex_spin); 1046 1047 return rgd; 1048 } 1049 1050 /** 1051 * forward_rgrp_set - set the forward rgrp pointer 1052 * @sdp: the filesystem 1053 * @rgd: The new forward rgrp 1054 * 1055 */ 1056 1057 static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd) 1058 { 1059 spin_lock(&sdp->sd_rindex_spin); 1060 sdp->sd_rindex_forward = rgd; 1061 spin_unlock(&sdp->sd_rindex_spin); 1062 } 1063 1064 /** 1065 * get_local_rgrp - Choose and lock a rgrp for allocation 1066 * @ip: the inode to reserve space for 1067 * @rgp: the chosen and locked rgrp 1068 * 1069 * Try to acquire rgrp in way which avoids contending with others. 1070 * 1071 * Returns: errno 1072 */ 1073 1074 static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) 1075 { 1076 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1077 struct gfs2_rgrpd *rgd, *begin = NULL; 1078 struct gfs2_alloc *al = ip->i_alloc; 1079 int flags = LM_FLAG_TRY; 1080 int skipped = 0; 1081 int loops = 0; 1082 int error, rg_locked; 1083 1084 rgd = gfs2_blk2rgrpd(sdp, ip->i_goal); 1085 1086 while (rgd) { 1087 rg_locked = 0; 1088 1089 if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { 1090 rg_locked = 1; 1091 error = 0; 1092 } else { 1093 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 1094 LM_FLAG_TRY, &al->al_rgd_gh); 1095 } 1096 switch (error) { 1097 case 0: 1098 if (try_rgrp_fit(rgd, al)) 1099 goto out; 1100 if (rgd->rd_flags & GFS2_RDF_CHECK) 1101 try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); 1102 if (!rg_locked) 1103 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1104 /* fall through */ 1105 case GLR_TRYFAILED: 1106 rgd = recent_rgrp_next(rgd); 1107 break; 1108 1109 default: 1110 return error; 1111 } 1112 } 1113 1114 /* Go through full list of rgrps */ 1115 1116 begin = rgd = forward_rgrp_get(sdp); 1117 1118 for (;;) { 1119 rg_locked = 0; 1120 1121 if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { 1122 rg_locked = 1; 1123 error = 0; 1124 } else { 1125 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags, 1126 &al->al_rgd_gh); 1127 } 1128 switch (error) { 1129 case 0: 1130 if (try_rgrp_fit(rgd, al)) 1131 goto out; 1132 if (rgd->rd_flags & GFS2_RDF_CHECK) 1133 try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); 1134 if (!rg_locked) 1135 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1136 break; 1137 1138 case GLR_TRYFAILED: 1139 skipped++; 1140 break; 1141 1142 default: 1143 return error; 1144 } 1145 1146 rgd = gfs2_rgrpd_get_next(rgd); 1147 if (!rgd) 1148 rgd = gfs2_rgrpd_get_first(sdp); 1149 1150 if (rgd == begin) { 1151 if (++loops >= 3) 1152 return -ENOSPC; 1153 if (!skipped) 1154 loops++; 1155 flags = 0; 1156 if (loops == 2) 1157 gfs2_log_flush(sdp, NULL); 1158 } 1159 } 1160 1161 out: 1162 if (begin) { 1163 spin_lock(&sdp->sd_rindex_spin); 1164 list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); 1165 spin_unlock(&sdp->sd_rindex_spin); 1166 rgd = gfs2_rgrpd_get_next(rgd); 1167 if (!rgd) 1168 rgd = gfs2_rgrpd_get_first(sdp); 1169 forward_rgrp_set(sdp, rgd); 1170 } 1171 1172 return 0; 1173 } 1174 1175 /** 1176 * gfs2_inplace_reserve_i - Reserve space in the filesystem 1177 * @ip: the inode to reserve space for 1178 * 1179 * Returns: errno 1180 */ 1181 1182 int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, 1183 char *file, unsigned int line) 1184 { 1185 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1186 struct gfs2_alloc *al = ip->i_alloc; 1187 int error = 0; 1188 u64 last_unlinked = NO_BLOCK; 1189 int tries = 0; 1190 1191 if (gfs2_assert_warn(sdp, al->al_requested)) 1192 return -EINVAL; 1193 1194 if (hold_rindex) { 1195 /* We need to hold the rindex unless the inode we're using is 1196 the rindex itself, in which case it's already held. */ 1197 if (ip != GFS2_I(sdp->sd_rindex)) 1198 error = gfs2_rindex_hold(sdp, &al->al_ri_gh); 1199 else if (!sdp->sd_rgrps) /* We may not have the rindex read 1200 in, so: */ 1201 error = gfs2_ri_update(ip); 1202 if (error) 1203 return error; 1204 } 1205 1206 try_again: 1207 do { 1208 error = get_local_rgrp(ip, &last_unlinked); 1209 /* If there is no space, flushing the log may release some */ 1210 if (error) { 1211 if (ip == GFS2_I(sdp->sd_rindex) && 1212 !sdp->sd_rindex_uptodate) { 1213 error = gfs2_ri_update(ip); 1214 if (error) 1215 return error; 1216 goto try_again; 1217 } 1218 gfs2_log_flush(sdp, NULL); 1219 } 1220 } while (error && tries++ < 3); 1221 1222 if (error) { 1223 if (hold_rindex && ip != GFS2_I(sdp->sd_rindex)) 1224 gfs2_glock_dq_uninit(&al->al_ri_gh); 1225 return error; 1226 } 1227 1228 /* no error, so we have the rgrp set in the inode's allocation. */ 1229 al->al_file = file; 1230 al->al_line = line; 1231 1232 return 0; 1233 } 1234 1235 /** 1236 * gfs2_inplace_release - release an inplace reservation 1237 * @ip: the inode the reservation was taken out on 1238 * 1239 * Release a reservation made by gfs2_inplace_reserve(). 1240 */ 1241 1242 void gfs2_inplace_release(struct gfs2_inode *ip) 1243 { 1244 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1245 struct gfs2_alloc *al = ip->i_alloc; 1246 1247 if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1) 1248 fs_warn(sdp, "al_alloced = %u, al_requested = %u " 1249 "al_file = %s, al_line = %u\n", 1250 al->al_alloced, al->al_requested, al->al_file, 1251 al->al_line); 1252 1253 al->al_rgd = NULL; 1254 if (al->al_rgd_gh.gh_gl) 1255 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1256 if (ip != GFS2_I(sdp->sd_rindex) && al->al_ri_gh.gh_gl) 1257 gfs2_glock_dq_uninit(&al->al_ri_gh); 1258 } 1259 1260 /** 1261 * gfs2_get_block_type - Check a block in a RG is of given type 1262 * @rgd: the resource group holding the block 1263 * @block: the block number 1264 * 1265 * Returns: The block type (GFS2_BLKST_*) 1266 */ 1267 1268 static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) 1269 { 1270 struct gfs2_bitmap *bi = NULL; 1271 u32 length, rgrp_block, buf_block; 1272 unsigned int buf; 1273 unsigned char type; 1274 1275 length = rgd->rd_length; 1276 rgrp_block = block - rgd->rd_data0; 1277 1278 for (buf = 0; buf < length; buf++) { 1279 bi = rgd->rd_bits + buf; 1280 if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY) 1281 break; 1282 } 1283 1284 gfs2_assert(rgd->rd_sbd, buf < length); 1285 buf_block = rgrp_block - bi->bi_start * GFS2_NBBY; 1286 1287 type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, 1288 bi->bi_len, buf_block); 1289 1290 return type; 1291 } 1292 1293 /** 1294 * rgblk_search - find a block in @old_state, change allocation 1295 * state to @new_state 1296 * @rgd: the resource group descriptor 1297 * @goal: the goal block within the RG (start here to search for avail block) 1298 * @old_state: GFS2_BLKST_XXX the before-allocation state to find 1299 * @new_state: GFS2_BLKST_XXX the after-allocation block state 1300 * @n: The extent length 1301 * 1302 * Walk rgrp's bitmap to find bits that represent a block in @old_state. 1303 * Add the found bitmap buffer to the transaction. 1304 * Set the found bits to @new_state to change block's allocation state. 1305 * 1306 * This function never fails, because we wouldn't call it unless we 1307 * know (from reservation results, etc.) that a block is available. 1308 * 1309 * Scope of @goal and returned block is just within rgrp, not the whole 1310 * filesystem. 1311 * 1312 * Returns: the block number allocated 1313 */ 1314 1315 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, 1316 unsigned char old_state, unsigned char new_state, 1317 unsigned int *n) 1318 { 1319 struct gfs2_bitmap *bi = NULL; 1320 const u32 length = rgd->rd_length; 1321 u32 blk = BFITNOENT; 1322 unsigned int buf, x; 1323 const unsigned int elen = *n; 1324 const u8 *buffer = NULL; 1325 1326 *n = 0; 1327 /* Find bitmap block that contains bits for goal block */ 1328 for (buf = 0; buf < length; buf++) { 1329 bi = rgd->rd_bits + buf; 1330 /* Convert scope of "goal" from rgrp-wide to within found bit block */ 1331 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { 1332 goal -= bi->bi_start * GFS2_NBBY; 1333 goto do_search; 1334 } 1335 } 1336 buf = 0; 1337 goal = 0; 1338 1339 do_search: 1340 /* Search (up to entire) bitmap in this rgrp for allocatable block. 1341 "x <= length", instead of "x < length", because we typically start 1342 the search in the middle of a bit block, but if we can't find an 1343 allocatable block anywhere else, we want to be able wrap around and 1344 search in the first part of our first-searched bit block. */ 1345 for (x = 0; x <= length; x++) { 1346 bi = rgd->rd_bits + buf; 1347 1348 if (test_bit(GBF_FULL, &bi->bi_flags) && 1349 (old_state == GFS2_BLKST_FREE)) 1350 goto skip; 1351 1352 /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone 1353 bitmaps, so we must search the originals for that. */ 1354 buffer = bi->bi_bh->b_data + bi->bi_offset; 1355 if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) 1356 buffer = bi->bi_clone + bi->bi_offset; 1357 1358 blk = gfs2_bitfit(buffer, bi->bi_len, goal, old_state); 1359 if (blk != BFITNOENT) 1360 break; 1361 1362 if ((goal == 0) && (old_state == GFS2_BLKST_FREE)) 1363 set_bit(GBF_FULL, &bi->bi_flags); 1364 1365 /* Try next bitmap block (wrap back to rgrp header if at end) */ 1366 skip: 1367 buf++; 1368 buf %= length; 1369 goal = 0; 1370 } 1371 1372 if (blk == BFITNOENT) 1373 return blk; 1374 *n = 1; 1375 if (old_state == new_state) 1376 goto out; 1377 1378 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1379 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1380 bi, blk, new_state); 1381 goal = blk; 1382 while (*n < elen) { 1383 goal++; 1384 if (goal >= (bi->bi_len * GFS2_NBBY)) 1385 break; 1386 if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != 1387 GFS2_BLKST_FREE) 1388 break; 1389 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1390 bi, goal, new_state); 1391 (*n)++; 1392 } 1393 out: 1394 return (bi->bi_start * GFS2_NBBY) + blk; 1395 } 1396 1397 /** 1398 * rgblk_free - Change alloc state of given block(s) 1399 * @sdp: the filesystem 1400 * @bstart: the start of a run of blocks to free 1401 * @blen: the length of the block run (all must lie within ONE RG!) 1402 * @new_state: GFS2_BLKST_XXX the after-allocation block state 1403 * 1404 * Returns: Resource group containing the block(s) 1405 */ 1406 1407 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, 1408 u32 blen, unsigned char new_state) 1409 { 1410 struct gfs2_rgrpd *rgd; 1411 struct gfs2_bitmap *bi = NULL; 1412 u32 length, rgrp_blk, buf_blk; 1413 unsigned int buf; 1414 1415 rgd = gfs2_blk2rgrpd(sdp, bstart); 1416 if (!rgd) { 1417 if (gfs2_consist(sdp)) 1418 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); 1419 return NULL; 1420 } 1421 1422 length = rgd->rd_length; 1423 1424 rgrp_blk = bstart - rgd->rd_data0; 1425 1426 while (blen--) { 1427 for (buf = 0; buf < length; buf++) { 1428 bi = rgd->rd_bits + buf; 1429 if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY) 1430 break; 1431 } 1432 1433 gfs2_assert(rgd->rd_sbd, buf < length); 1434 1435 buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY; 1436 rgrp_blk++; 1437 1438 if (!bi->bi_clone) { 1439 bi->bi_clone = kmalloc(bi->bi_bh->b_size, 1440 GFP_NOFS | __GFP_NOFAIL); 1441 memcpy(bi->bi_clone + bi->bi_offset, 1442 bi->bi_bh->b_data + bi->bi_offset, 1443 bi->bi_len); 1444 } 1445 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1446 gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset, 1447 bi, buf_blk, new_state); 1448 } 1449 1450 return rgd; 1451 } 1452 1453 /** 1454 * gfs2_rgrp_dump - print out an rgrp 1455 * @seq: The iterator 1456 * @gl: The glock in question 1457 * 1458 */ 1459 1460 int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) 1461 { 1462 const struct gfs2_rgrpd *rgd = gl->gl_object; 1463 if (rgd == NULL) 1464 return 0; 1465 gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", 1466 (unsigned long long)rgd->rd_addr, rgd->rd_flags, 1467 rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); 1468 return 0; 1469 } 1470 1471 static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) 1472 { 1473 struct gfs2_sbd *sdp = rgd->rd_sbd; 1474 fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", 1475 (unsigned long long)rgd->rd_addr); 1476 fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); 1477 gfs2_rgrp_dump(NULL, rgd->rd_gl); 1478 rgd->rd_flags |= GFS2_RDF_ERROR; 1479 } 1480 1481 /** 1482 * gfs2_alloc_block - Allocate one or more blocks 1483 * @ip: the inode to allocate the block for 1484 * @bn: Used to return the starting block number 1485 * @n: requested number of blocks/extent length (value/result) 1486 * 1487 * Returns: 0 or error 1488 */ 1489 1490 int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) 1491 { 1492 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1493 struct buffer_head *dibh; 1494 struct gfs2_alloc *al = ip->i_alloc; 1495 struct gfs2_rgrpd *rgd; 1496 u32 goal, blk; 1497 u64 block; 1498 int error; 1499 1500 /* Only happens if there is a bug in gfs2, return something distinctive 1501 * to ensure that it is noticed. 1502 */ 1503 if (al == NULL) 1504 return -ECANCELED; 1505 1506 rgd = al->al_rgd; 1507 1508 if (rgrp_contains_block(rgd, ip->i_goal)) 1509 goal = ip->i_goal - rgd->rd_data0; 1510 else 1511 goal = rgd->rd_last_alloc; 1512 1513 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n); 1514 1515 /* Since all blocks are reserved in advance, this shouldn't happen */ 1516 if (blk == BFITNOENT) 1517 goto rgrp_error; 1518 1519 rgd->rd_last_alloc = blk; 1520 block = rgd->rd_data0 + blk; 1521 ip->i_goal = block; 1522 error = gfs2_meta_inode_buffer(ip, &dibh); 1523 if (error == 0) { 1524 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; 1525 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1526 di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal); 1527 brelse(dibh); 1528 } 1529 if (rgd->rd_free < *n) 1530 goto rgrp_error; 1531 1532 rgd->rd_free -= *n; 1533 1534 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1535 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1536 1537 al->al_alloced += *n; 1538 1539 gfs2_statfs_change(sdp, 0, -(s64)*n, 0); 1540 gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid); 1541 1542 spin_lock(&sdp->sd_rindex_spin); 1543 rgd->rd_free_clone -= *n; 1544 spin_unlock(&sdp->sd_rindex_spin); 1545 trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED); 1546 *bn = block; 1547 return 0; 1548 1549 rgrp_error: 1550 gfs2_rgrp_error(rgd); 1551 return -EIO; 1552 } 1553 1554 /** 1555 * gfs2_alloc_di - Allocate a dinode 1556 * @dip: the directory that the inode is going in 1557 * @bn: the block number which is allocated 1558 * @generation: the generation number of the inode 1559 * 1560 * Returns: 0 on success or error 1561 */ 1562 1563 int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation) 1564 { 1565 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1566 struct gfs2_alloc *al = dip->i_alloc; 1567 struct gfs2_rgrpd *rgd = al->al_rgd; 1568 u32 blk; 1569 u64 block; 1570 unsigned int n = 1; 1571 1572 blk = rgblk_search(rgd, rgd->rd_last_alloc, 1573 GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n); 1574 1575 /* Since all blocks are reserved in advance, this shouldn't happen */ 1576 if (blk == BFITNOENT) 1577 goto rgrp_error; 1578 1579 rgd->rd_last_alloc = blk; 1580 block = rgd->rd_data0 + blk; 1581 if (rgd->rd_free == 0) 1582 goto rgrp_error; 1583 1584 rgd->rd_free--; 1585 rgd->rd_dinodes++; 1586 *generation = rgd->rd_igeneration++; 1587 if (*generation == 0) 1588 *generation = rgd->rd_igeneration++; 1589 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1590 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1591 1592 al->al_alloced++; 1593 1594 gfs2_statfs_change(sdp, 0, -1, +1); 1595 gfs2_trans_add_unrevoke(sdp, block, 1); 1596 1597 spin_lock(&sdp->sd_rindex_spin); 1598 rgd->rd_free_clone--; 1599 spin_unlock(&sdp->sd_rindex_spin); 1600 trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE); 1601 *bn = block; 1602 return 0; 1603 1604 rgrp_error: 1605 gfs2_rgrp_error(rgd); 1606 return -EIO; 1607 } 1608 1609 /** 1610 * __gfs2_free_blocks - free a contiguous run of block(s) 1611 * @ip: the inode these blocks are being freed from 1612 * @bstart: first block of a run of contiguous blocks 1613 * @blen: the length of the block run 1614 * @meta: 1 if the blocks represent metadata 1615 * 1616 */ 1617 1618 void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) 1619 { 1620 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1621 struct gfs2_rgrpd *rgd; 1622 1623 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); 1624 if (!rgd) 1625 return; 1626 trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); 1627 rgd->rd_free += blen; 1628 1629 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1630 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1631 1632 gfs2_trans_add_rg(rgd); 1633 1634 /* Directories keep their data in the metadata address space */ 1635 if (meta || ip->i_depth) 1636 gfs2_meta_wipe(ip, bstart, blen); 1637 } 1638 1639 /** 1640 * gfs2_free_meta - free a contiguous run of data block(s) 1641 * @ip: the inode these blocks are being freed from 1642 * @bstart: first block of a run of contiguous blocks 1643 * @blen: the length of the block run 1644 * 1645 */ 1646 1647 void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) 1648 { 1649 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1650 1651 __gfs2_free_blocks(ip, bstart, blen, 1); 1652 gfs2_statfs_change(sdp, 0, +blen, 0); 1653 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 1654 } 1655 1656 void gfs2_unlink_di(struct inode *inode) 1657 { 1658 struct gfs2_inode *ip = GFS2_I(inode); 1659 struct gfs2_sbd *sdp = GFS2_SB(inode); 1660 struct gfs2_rgrpd *rgd; 1661 u64 blkno = ip->i_no_addr; 1662 1663 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); 1664 if (!rgd) 1665 return; 1666 trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED); 1667 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1668 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1669 gfs2_trans_add_rg(rgd); 1670 } 1671 1672 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) 1673 { 1674 struct gfs2_sbd *sdp = rgd->rd_sbd; 1675 struct gfs2_rgrpd *tmp_rgd; 1676 1677 tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE); 1678 if (!tmp_rgd) 1679 return; 1680 gfs2_assert_withdraw(sdp, rgd == tmp_rgd); 1681 1682 if (!rgd->rd_dinodes) 1683 gfs2_consist_rgrpd(rgd); 1684 rgd->rd_dinodes--; 1685 rgd->rd_free++; 1686 1687 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1688 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1689 1690 gfs2_statfs_change(sdp, 0, +1, -1); 1691 gfs2_trans_add_rg(rgd); 1692 } 1693 1694 1695 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 1696 { 1697 gfs2_free_uninit_di(rgd, ip->i_no_addr); 1698 trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE); 1699 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); 1700 gfs2_meta_wipe(ip, ip->i_no_addr, 1); 1701 } 1702 1703 /** 1704 * gfs2_check_blk_type - Check the type of a block 1705 * @sdp: The superblock 1706 * @no_addr: The block number to check 1707 * @type: The block type we are looking for 1708 * 1709 * Returns: 0 if the block type matches the expected type 1710 * -ESTALE if it doesn't match 1711 * or -ve errno if something went wrong while checking 1712 */ 1713 1714 int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) 1715 { 1716 struct gfs2_rgrpd *rgd; 1717 struct gfs2_holder ri_gh, rgd_gh; 1718 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); 1719 int ri_locked = 0; 1720 int error; 1721 1722 if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { 1723 error = gfs2_rindex_hold(sdp, &ri_gh); 1724 if (error) 1725 goto fail; 1726 ri_locked = 1; 1727 } 1728 1729 error = -EINVAL; 1730 rgd = gfs2_blk2rgrpd(sdp, no_addr); 1731 if (!rgd) 1732 goto fail_rindex; 1733 1734 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); 1735 if (error) 1736 goto fail_rindex; 1737 1738 if (gfs2_get_block_type(rgd, no_addr) != type) 1739 error = -ESTALE; 1740 1741 gfs2_glock_dq_uninit(&rgd_gh); 1742 fail_rindex: 1743 if (ri_locked) 1744 gfs2_glock_dq_uninit(&ri_gh); 1745 fail: 1746 return error; 1747 } 1748 1749 /** 1750 * gfs2_rlist_add - add a RG to a list of RGs 1751 * @sdp: the filesystem 1752 * @rlist: the list of resource groups 1753 * @block: the block 1754 * 1755 * Figure out what RG a block belongs to and add that RG to the list 1756 * 1757 * FIXME: Don't use NOFAIL 1758 * 1759 */ 1760 1761 void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, 1762 u64 block) 1763 { 1764 struct gfs2_rgrpd *rgd; 1765 struct gfs2_rgrpd **tmp; 1766 unsigned int new_space; 1767 unsigned int x; 1768 1769 if (gfs2_assert_warn(sdp, !rlist->rl_ghs)) 1770 return; 1771 1772 rgd = gfs2_blk2rgrpd(sdp, block); 1773 if (!rgd) { 1774 if (gfs2_consist(sdp)) 1775 fs_err(sdp, "block = %llu\n", (unsigned long long)block); 1776 return; 1777 } 1778 1779 for (x = 0; x < rlist->rl_rgrps; x++) 1780 if (rlist->rl_rgd[x] == rgd) 1781 return; 1782 1783 if (rlist->rl_rgrps == rlist->rl_space) { 1784 new_space = rlist->rl_space + 10; 1785 1786 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *), 1787 GFP_NOFS | __GFP_NOFAIL); 1788 1789 if (rlist->rl_rgd) { 1790 memcpy(tmp, rlist->rl_rgd, 1791 rlist->rl_space * sizeof(struct gfs2_rgrpd *)); 1792 kfree(rlist->rl_rgd); 1793 } 1794 1795 rlist->rl_space = new_space; 1796 rlist->rl_rgd = tmp; 1797 } 1798 1799 rlist->rl_rgd[rlist->rl_rgrps++] = rgd; 1800 } 1801 1802 /** 1803 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate 1804 * and initialize an array of glock holders for them 1805 * @rlist: the list of resource groups 1806 * @state: the lock state to acquire the RG lock in 1807 * @flags: the modifier flags for the holder structures 1808 * 1809 * FIXME: Don't use NOFAIL 1810 * 1811 */ 1812 1813 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state) 1814 { 1815 unsigned int x; 1816 1817 rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder), 1818 GFP_NOFS | __GFP_NOFAIL); 1819 for (x = 0; x < rlist->rl_rgrps; x++) 1820 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, 1821 state, 0, 1822 &rlist->rl_ghs[x]); 1823 } 1824 1825 /** 1826 * gfs2_rlist_free - free a resource group list 1827 * @list: the list of resource groups 1828 * 1829 */ 1830 1831 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) 1832 { 1833 unsigned int x; 1834 1835 kfree(rlist->rl_rgd); 1836 1837 if (rlist->rl_ghs) { 1838 for (x = 0; x < rlist->rl_rgrps; x++) 1839 gfs2_holder_uninit(&rlist->rl_ghs[x]); 1840 kfree(rlist->rl_ghs); 1841 } 1842 } 1843 1844