1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/spinlock.h> 11 #include <linux/completion.h> 12 #include <linux/buffer_head.h> 13 #include <linux/blkdev.h> 14 #include <linux/gfs2_ondisk.h> 15 #include <linux/crc32.h> 16 17 #include "gfs2.h" 18 #include "incore.h" 19 #include "bmap.h" 20 #include "glock.h" 21 #include "inode.h" 22 #include "meta_io.h" 23 #include "quota.h" 24 #include "rgrp.h" 25 #include "log.h" 26 #include "super.h" 27 #include "trans.h" 28 #include "dir.h" 29 #include "util.h" 30 #include "trace_gfs2.h" 31 32 /* This doesn't need to be that large as max 64 bit pointers in a 4k 33 * block is 512, so __u16 is fine for that. It saves stack space to 34 * keep it small. 35 */ 36 struct metapath { 37 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT]; 38 __u16 mp_list[GFS2_MAX_META_HEIGHT]; 39 }; 40 41 struct strip_mine { 42 int sm_first; 43 unsigned int sm_height; 44 }; 45 46 /** 47 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page 48 * @ip: the inode 49 * @dibh: the dinode buffer 50 * @block: the block number that was allocated 51 * @page: The (optional) page. This is looked up if @page is NULL 52 * 53 * Returns: errno 54 */ 55 56 static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, 57 u64 block, struct page *page) 58 { 59 struct inode *inode = &ip->i_inode; 60 struct buffer_head *bh; 61 int release = 0; 62 63 if (!page || page->index) { 64 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS); 65 if (!page) 66 return -ENOMEM; 67 release = 1; 68 } 69 70 if (!PageUptodate(page)) { 71 void *kaddr = kmap(page); 72 u64 dsize = i_size_read(inode); 73 74 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode))) 75 dsize = dibh->b_size - sizeof(struct gfs2_dinode); 76 77 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); 78 memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize); 79 kunmap(page); 80 81 SetPageUptodate(page); 82 } 83 84 if (!page_has_buffers(page)) 85 create_empty_buffers(page, 1 << inode->i_blkbits, 86 (1 << BH_Uptodate)); 87 88 bh = page_buffers(page); 89 90 if (!buffer_mapped(bh)) 91 map_bh(bh, inode->i_sb, block); 92 93 set_buffer_uptodate(bh); 94 if (!gfs2_is_jdata(ip)) 95 mark_buffer_dirty(bh); 96 if (!gfs2_is_writeback(ip)) 97 gfs2_trans_add_data(ip->i_gl, bh); 98 99 if (release) { 100 unlock_page(page); 101 page_cache_release(page); 102 } 103 104 return 0; 105 } 106 107 /** 108 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big 109 * @ip: The GFS2 inode to unstuff 110 * @page: The (optional) page. This is looked up if the @page is NULL 111 * 112 * This routine unstuffs a dinode and returns it to a "normal" state such 113 * that the height can be grown in the traditional way. 114 * 115 * Returns: errno 116 */ 117 118 int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) 119 { 120 struct buffer_head *bh, *dibh; 121 struct gfs2_dinode *di; 122 u64 block = 0; 123 int isdir = gfs2_is_dir(ip); 124 int error; 125 126 down_write(&ip->i_rw_mutex); 127 128 error = gfs2_meta_inode_buffer(ip, &dibh); 129 if (error) 130 goto out; 131 132 if (i_size_read(&ip->i_inode)) { 133 /* Get a free block, fill it with the stuffed data, 134 and write it out to disk */ 135 136 unsigned int n = 1; 137 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL); 138 if (error) 139 goto out_brelse; 140 if (isdir) { 141 gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1); 142 error = gfs2_dir_get_new_buffer(ip, block, &bh); 143 if (error) 144 goto out_brelse; 145 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header), 146 dibh, sizeof(struct gfs2_dinode)); 147 brelse(bh); 148 } else { 149 error = gfs2_unstuffer_page(ip, dibh, block, page); 150 if (error) 151 goto out_brelse; 152 } 153 } 154 155 /* Set up the pointer to the new block */ 156 157 gfs2_trans_add_meta(ip->i_gl, dibh); 158 di = (struct gfs2_dinode *)dibh->b_data; 159 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 160 161 if (i_size_read(&ip->i_inode)) { 162 *(__be64 *)(di + 1) = cpu_to_be64(block); 163 gfs2_add_inode_blocks(&ip->i_inode, 1); 164 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 165 } 166 167 ip->i_height = 1; 168 di->di_height = cpu_to_be16(1); 169 170 out_brelse: 171 brelse(dibh); 172 out: 173 up_write(&ip->i_rw_mutex); 174 return error; 175 } 176 177 178 /** 179 * find_metapath - Find path through the metadata tree 180 * @sdp: The superblock 181 * @mp: The metapath to return the result in 182 * @block: The disk block to look up 183 * @height: The pre-calculated height of the metadata tree 184 * 185 * This routine returns a struct metapath structure that defines a path 186 * through the metadata of inode "ip" to get to block "block". 187 * 188 * Example: 189 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a 190 * filesystem with a blocksize of 4096. 191 * 192 * find_metapath() would return a struct metapath structure set to: 193 * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48, 194 * and mp_list[2] = 165. 195 * 196 * That means that in order to get to the block containing the byte at 197 * offset 101342453, we would load the indirect block pointed to by pointer 198 * 0 in the dinode. We would then load the indirect block pointed to by 199 * pointer 48 in that indirect block. We would then load the data block 200 * pointed to by pointer 165 in that indirect block. 201 * 202 * ---------------------------------------- 203 * | Dinode | | 204 * | | 4| 205 * | |0 1 2 3 4 5 9| 206 * | | 6| 207 * ---------------------------------------- 208 * | 209 * | 210 * V 211 * ---------------------------------------- 212 * | Indirect Block | 213 * | 5| 214 * | 4 4 4 4 4 5 5 1| 215 * |0 5 6 7 8 9 0 1 2| 216 * ---------------------------------------- 217 * | 218 * | 219 * V 220 * ---------------------------------------- 221 * | Indirect Block | 222 * | 1 1 1 1 1 5| 223 * | 6 6 6 6 6 1| 224 * |0 3 4 5 6 7 2| 225 * ---------------------------------------- 226 * | 227 * | 228 * V 229 * ---------------------------------------- 230 * | Data block containing offset | 231 * | 101342453 | 232 * | | 233 * | | 234 * ---------------------------------------- 235 * 236 */ 237 238 static void find_metapath(const struct gfs2_sbd *sdp, u64 block, 239 struct metapath *mp, unsigned int height) 240 { 241 unsigned int i; 242 243 for (i = height; i--;) 244 mp->mp_list[i] = do_div(block, sdp->sd_inptrs); 245 246 } 247 248 static inline unsigned int metapath_branch_start(const struct metapath *mp) 249 { 250 if (mp->mp_list[0] == 0) 251 return 2; 252 return 1; 253 } 254 255 /** 256 * metapointer - Return pointer to start of metadata in a buffer 257 * @height: The metadata height (0 = dinode) 258 * @mp: The metapath 259 * 260 * Return a pointer to the block number of the next height of the metadata 261 * tree given a buffer containing the pointer to the current height of the 262 * metadata tree. 263 */ 264 265 static inline __be64 *metapointer(unsigned int height, const struct metapath *mp) 266 { 267 struct buffer_head *bh = mp->mp_bh[height]; 268 unsigned int head_size = (height > 0) ? 269 sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); 270 return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height]; 271 } 272 273 static void gfs2_metapath_ra(struct gfs2_glock *gl, 274 const struct buffer_head *bh, const __be64 *pos) 275 { 276 struct buffer_head *rabh; 277 const __be64 *endp = (const __be64 *)(bh->b_data + bh->b_size); 278 const __be64 *t; 279 280 for (t = pos; t < endp; t++) { 281 if (!*t) 282 continue; 283 284 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE); 285 if (trylock_buffer(rabh)) { 286 if (!buffer_uptodate(rabh)) { 287 rabh->b_end_io = end_buffer_read_sync; 288 submit_bh(READA | REQ_META, rabh); 289 continue; 290 } 291 unlock_buffer(rabh); 292 } 293 brelse(rabh); 294 } 295 } 296 297 /** 298 * lookup_metapath - Walk the metadata tree to a specific point 299 * @ip: The inode 300 * @mp: The metapath 301 * 302 * Assumes that the inode's buffer has already been looked up and 303 * hooked onto mp->mp_bh[0] and that the metapath has been initialised 304 * by find_metapath(). 305 * 306 * If this function encounters part of the tree which has not been 307 * allocated, it returns the current height of the tree at the point 308 * at which it found the unallocated block. Blocks which are found are 309 * added to the mp->mp_bh[] list. 310 * 311 * Returns: error or height of metadata tree 312 */ 313 314 static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) 315 { 316 unsigned int end_of_metadata = ip->i_height - 1; 317 unsigned int x; 318 __be64 *ptr; 319 u64 dblock; 320 int ret; 321 322 for (x = 0; x < end_of_metadata; x++) { 323 ptr = metapointer(x, mp); 324 dblock = be64_to_cpu(*ptr); 325 if (!dblock) 326 return x + 1; 327 328 ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, &mp->mp_bh[x+1]); 329 if (ret) 330 return ret; 331 } 332 333 return ip->i_height; 334 } 335 336 static inline void release_metapath(struct metapath *mp) 337 { 338 int i; 339 340 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) { 341 if (mp->mp_bh[i] == NULL) 342 break; 343 brelse(mp->mp_bh[i]); 344 } 345 } 346 347 /** 348 * gfs2_extent_length - Returns length of an extent of blocks 349 * @start: Start of the buffer 350 * @len: Length of the buffer in bytes 351 * @ptr: Current position in the buffer 352 * @limit: Max extent length to return (0 = unlimited) 353 * @eob: Set to 1 if we hit "end of block" 354 * 355 * If the first block is zero (unallocated) it will return the number of 356 * unallocated blocks in the extent, otherwise it will return the number 357 * of contiguous blocks in the extent. 358 * 359 * Returns: The length of the extent (minimum of one block) 360 */ 361 362 static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, unsigned limit, int *eob) 363 { 364 const __be64 *end = (start + len); 365 const __be64 *first = ptr; 366 u64 d = be64_to_cpu(*ptr); 367 368 *eob = 0; 369 do { 370 ptr++; 371 if (ptr >= end) 372 break; 373 if (limit && --limit == 0) 374 break; 375 if (d) 376 d++; 377 } while(be64_to_cpu(*ptr) == d); 378 if (ptr >= end) 379 *eob = 1; 380 return (ptr - first); 381 } 382 383 static inline void bmap_lock(struct gfs2_inode *ip, int create) 384 { 385 if (create) 386 down_write(&ip->i_rw_mutex); 387 else 388 down_read(&ip->i_rw_mutex); 389 } 390 391 static inline void bmap_unlock(struct gfs2_inode *ip, int create) 392 { 393 if (create) 394 up_write(&ip->i_rw_mutex); 395 else 396 up_read(&ip->i_rw_mutex); 397 } 398 399 static inline __be64 *gfs2_indirect_init(struct metapath *mp, 400 struct gfs2_glock *gl, unsigned int i, 401 unsigned offset, u64 bn) 402 { 403 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data + 404 ((i > 1) ? sizeof(struct gfs2_meta_header) : 405 sizeof(struct gfs2_dinode))); 406 BUG_ON(i < 1); 407 BUG_ON(mp->mp_bh[i] != NULL); 408 mp->mp_bh[i] = gfs2_meta_new(gl, bn); 409 gfs2_trans_add_meta(gl, mp->mp_bh[i]); 410 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); 411 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); 412 ptr += offset; 413 *ptr = cpu_to_be64(bn); 414 return ptr; 415 } 416 417 enum alloc_state { 418 ALLOC_DATA = 0, 419 ALLOC_GROW_DEPTH = 1, 420 ALLOC_GROW_HEIGHT = 2, 421 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */ 422 }; 423 424 /** 425 * gfs2_bmap_alloc - Build a metadata tree of the requested height 426 * @inode: The GFS2 inode 427 * @lblock: The logical starting block of the extent 428 * @bh_map: This is used to return the mapping details 429 * @mp: The metapath 430 * @sheight: The starting height (i.e. whats already mapped) 431 * @height: The height to build to 432 * @maxlen: The max number of data blocks to alloc 433 * 434 * In this routine we may have to alloc: 435 * i) Indirect blocks to grow the metadata tree height 436 * ii) Indirect blocks to fill in lower part of the metadata tree 437 * iii) Data blocks 438 * 439 * The function is in two parts. The first part works out the total 440 * number of blocks which we need. The second part does the actual 441 * allocation asking for an extent at a time (if enough contiguous free 442 * blocks are available, there will only be one request per bmap call) 443 * and uses the state machine to initialise the blocks in order. 444 * 445 * Returns: errno on error 446 */ 447 448 static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, 449 struct buffer_head *bh_map, struct metapath *mp, 450 const unsigned int sheight, 451 const unsigned int height, 452 const unsigned int maxlen) 453 { 454 struct gfs2_inode *ip = GFS2_I(inode); 455 struct gfs2_sbd *sdp = GFS2_SB(inode); 456 struct super_block *sb = sdp->sd_vfs; 457 struct buffer_head *dibh = mp->mp_bh[0]; 458 u64 bn, dblock = 0; 459 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; 460 unsigned dblks = 0; 461 unsigned ptrs_per_blk; 462 const unsigned end_of_metadata = height - 1; 463 int ret; 464 int eob = 0; 465 enum alloc_state state; 466 __be64 *ptr; 467 __be64 zero_bn = 0; 468 469 BUG_ON(sheight < 1); 470 BUG_ON(dibh == NULL); 471 472 gfs2_trans_add_meta(ip->i_gl, dibh); 473 474 if (height == sheight) { 475 struct buffer_head *bh; 476 /* Bottom indirect block exists, find unalloced extent size */ 477 ptr = metapointer(end_of_metadata, mp); 478 bh = mp->mp_bh[end_of_metadata]; 479 dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, 480 &eob); 481 BUG_ON(dblks < 1); 482 state = ALLOC_DATA; 483 } else { 484 /* Need to allocate indirect blocks */ 485 ptrs_per_blk = height > 1 ? sdp->sd_inptrs : sdp->sd_diptrs; 486 dblks = min(maxlen, ptrs_per_blk - mp->mp_list[end_of_metadata]); 487 if (height == ip->i_height) { 488 /* Writing into existing tree, extend tree down */ 489 iblks = height - sheight; 490 state = ALLOC_GROW_DEPTH; 491 } else { 492 /* Building up tree height */ 493 state = ALLOC_GROW_HEIGHT; 494 iblks = height - ip->i_height; 495 branch_start = metapath_branch_start(mp); 496 iblks += (height - branch_start); 497 } 498 } 499 500 /* start of the second part of the function (state machine) */ 501 502 blks = dblks + iblks; 503 i = sheight; 504 do { 505 int error; 506 n = blks - alloced; 507 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); 508 if (error) 509 return error; 510 alloced += n; 511 if (state != ALLOC_DATA || gfs2_is_jdata(ip)) 512 gfs2_trans_add_unrevoke(sdp, bn, n); 513 switch (state) { 514 /* Growing height of tree */ 515 case ALLOC_GROW_HEIGHT: 516 if (i == 1) { 517 ptr = (__be64 *)(dibh->b_data + 518 sizeof(struct gfs2_dinode)); 519 zero_bn = *ptr; 520 } 521 for (; i - 1 < height - ip->i_height && n > 0; i++, n--) 522 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++); 523 if (i - 1 == height - ip->i_height) { 524 i--; 525 gfs2_buffer_copy_tail(mp->mp_bh[i], 526 sizeof(struct gfs2_meta_header), 527 dibh, sizeof(struct gfs2_dinode)); 528 gfs2_buffer_clear_tail(dibh, 529 sizeof(struct gfs2_dinode) + 530 sizeof(__be64)); 531 ptr = (__be64 *)(mp->mp_bh[i]->b_data + 532 sizeof(struct gfs2_meta_header)); 533 *ptr = zero_bn; 534 state = ALLOC_GROW_DEPTH; 535 for(i = branch_start; i < height; i++) { 536 if (mp->mp_bh[i] == NULL) 537 break; 538 brelse(mp->mp_bh[i]); 539 mp->mp_bh[i] = NULL; 540 } 541 i = branch_start; 542 } 543 if (n == 0) 544 break; 545 /* Branching from existing tree */ 546 case ALLOC_GROW_DEPTH: 547 if (i > 1 && i < height) 548 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]); 549 for (; i < height && n > 0; i++, n--) 550 gfs2_indirect_init(mp, ip->i_gl, i, 551 mp->mp_list[i-1], bn++); 552 if (i == height) 553 state = ALLOC_DATA; 554 if (n == 0) 555 break; 556 /* Tree complete, adding data blocks */ 557 case ALLOC_DATA: 558 BUG_ON(n > dblks); 559 BUG_ON(mp->mp_bh[end_of_metadata] == NULL); 560 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]); 561 dblks = n; 562 ptr = metapointer(end_of_metadata, mp); 563 dblock = bn; 564 while (n-- > 0) 565 *ptr++ = cpu_to_be64(bn++); 566 if (buffer_zeronew(bh_map)) { 567 ret = sb_issue_zeroout(sb, dblock, dblks, 568 GFP_NOFS); 569 if (ret) { 570 fs_err(sdp, 571 "Failed to zero data buffers\n"); 572 clear_buffer_zeronew(bh_map); 573 } 574 } 575 break; 576 } 577 } while ((state != ALLOC_DATA) || !dblock); 578 579 ip->i_height = height; 580 gfs2_add_inode_blocks(&ip->i_inode, alloced); 581 gfs2_dinode_out(ip, mp->mp_bh[0]->b_data); 582 map_bh(bh_map, inode->i_sb, dblock); 583 bh_map->b_size = dblks << inode->i_blkbits; 584 set_buffer_new(bh_map); 585 return 0; 586 } 587 588 /** 589 * gfs2_block_map - Map a block from an inode to a disk block 590 * @inode: The inode 591 * @lblock: The logical block number 592 * @bh_map: The bh to be mapped 593 * @create: True if its ok to alloc blocks to satify the request 594 * 595 * Sets buffer_mapped() if successful, sets buffer_boundary() if a 596 * read of metadata will be required before the next block can be 597 * mapped. Sets buffer_new() if new blocks were allocated. 598 * 599 * Returns: errno 600 */ 601 602 int gfs2_block_map(struct inode *inode, sector_t lblock, 603 struct buffer_head *bh_map, int create) 604 { 605 struct gfs2_inode *ip = GFS2_I(inode); 606 struct gfs2_sbd *sdp = GFS2_SB(inode); 607 unsigned int bsize = sdp->sd_sb.sb_bsize; 608 const unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; 609 const u64 *arr = sdp->sd_heightsize; 610 __be64 *ptr; 611 u64 size; 612 struct metapath mp; 613 int ret; 614 int eob; 615 unsigned int len; 616 struct buffer_head *bh; 617 u8 height; 618 619 BUG_ON(maxlen == 0); 620 621 memset(mp.mp_bh, 0, sizeof(mp.mp_bh)); 622 bmap_lock(ip, create); 623 clear_buffer_mapped(bh_map); 624 clear_buffer_new(bh_map); 625 clear_buffer_boundary(bh_map); 626 trace_gfs2_bmap(ip, bh_map, lblock, create, 1); 627 if (gfs2_is_dir(ip)) { 628 bsize = sdp->sd_jbsize; 629 arr = sdp->sd_jheightsize; 630 } 631 632 ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]); 633 if (ret) 634 goto out; 635 636 height = ip->i_height; 637 size = (lblock + 1) * bsize; 638 while (size > arr[height]) 639 height++; 640 find_metapath(sdp, lblock, &mp, height); 641 ret = 1; 642 if (height > ip->i_height || gfs2_is_stuffed(ip)) 643 goto do_alloc; 644 ret = lookup_metapath(ip, &mp); 645 if (ret < 0) 646 goto out; 647 if (ret != ip->i_height) 648 goto do_alloc; 649 ptr = metapointer(ip->i_height - 1, &mp); 650 if (*ptr == 0) 651 goto do_alloc; 652 map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr)); 653 bh = mp.mp_bh[ip->i_height - 1]; 654 len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, &eob); 655 bh_map->b_size = (len << inode->i_blkbits); 656 if (eob) 657 set_buffer_boundary(bh_map); 658 ret = 0; 659 out: 660 release_metapath(&mp); 661 trace_gfs2_bmap(ip, bh_map, lblock, create, ret); 662 bmap_unlock(ip, create); 663 return ret; 664 665 do_alloc: 666 /* All allocations are done here, firstly check create flag */ 667 if (!create) { 668 BUG_ON(gfs2_is_stuffed(ip)); 669 ret = 0; 670 goto out; 671 } 672 673 /* At this point ret is the tree depth of already allocated blocks */ 674 ret = gfs2_bmap_alloc(inode, lblock, bh_map, &mp, ret, height, maxlen); 675 goto out; 676 } 677 678 /* 679 * Deprecated: do not use in new code 680 */ 681 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen) 682 { 683 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 }; 684 int ret; 685 int create = *new; 686 687 BUG_ON(!extlen); 688 BUG_ON(!dblock); 689 BUG_ON(!new); 690 691 bh.b_size = 1 << (inode->i_blkbits + (create ? 0 : 5)); 692 ret = gfs2_block_map(inode, lblock, &bh, create); 693 *extlen = bh.b_size >> inode->i_blkbits; 694 *dblock = bh.b_blocknr; 695 if (buffer_new(&bh)) 696 *new = 1; 697 else 698 *new = 0; 699 return ret; 700 } 701 702 /** 703 * do_strip - Look for a layer a particular layer of the file and strip it off 704 * @ip: the inode 705 * @dibh: the dinode buffer 706 * @bh: A buffer of pointers 707 * @top: The first pointer in the buffer 708 * @bottom: One more than the last pointer 709 * @height: the height this buffer is at 710 * @data: a pointer to a struct strip_mine 711 * 712 * Returns: errno 713 */ 714 715 static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, 716 struct buffer_head *bh, __be64 *top, __be64 *bottom, 717 unsigned int height, struct strip_mine *sm) 718 { 719 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 720 struct gfs2_rgrp_list rlist; 721 u64 bn, bstart; 722 u32 blen, btotal; 723 __be64 *p; 724 unsigned int rg_blocks = 0; 725 int metadata; 726 unsigned int revokes = 0; 727 int x; 728 int error; 729 730 error = gfs2_rindex_update(sdp); 731 if (error) 732 return error; 733 734 if (!*top) 735 sm->sm_first = 0; 736 737 if (height != sm->sm_height) 738 return 0; 739 740 if (sm->sm_first) { 741 top++; 742 sm->sm_first = 0; 743 } 744 745 metadata = (height != ip->i_height - 1); 746 if (metadata) 747 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; 748 else if (ip->i_depth) 749 revokes = sdp->sd_inptrs; 750 751 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); 752 bstart = 0; 753 blen = 0; 754 755 for (p = top; p < bottom; p++) { 756 if (!*p) 757 continue; 758 759 bn = be64_to_cpu(*p); 760 761 if (bstart + blen == bn) 762 blen++; 763 else { 764 if (bstart) 765 gfs2_rlist_add(ip, &rlist, bstart); 766 767 bstart = bn; 768 blen = 1; 769 } 770 } 771 772 if (bstart) 773 gfs2_rlist_add(ip, &rlist, bstart); 774 else 775 goto out; /* Nothing to do */ 776 777 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); 778 779 for (x = 0; x < rlist.rl_rgrps; x++) { 780 struct gfs2_rgrpd *rgd; 781 rgd = rlist.rl_ghs[x].gh_gl->gl_object; 782 rg_blocks += rgd->rd_length; 783 } 784 785 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); 786 if (error) 787 goto out_rlist; 788 789 if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ 790 gfs2_rs_deltree(ip->i_res); 791 792 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + 793 RES_INDIRECT + RES_STATFS + RES_QUOTA, 794 revokes); 795 if (error) 796 goto out_rg_gunlock; 797 798 down_write(&ip->i_rw_mutex); 799 800 gfs2_trans_add_meta(ip->i_gl, dibh); 801 gfs2_trans_add_meta(ip->i_gl, bh); 802 803 bstart = 0; 804 blen = 0; 805 btotal = 0; 806 807 for (p = top; p < bottom; p++) { 808 if (!*p) 809 continue; 810 811 bn = be64_to_cpu(*p); 812 813 if (bstart + blen == bn) 814 blen++; 815 else { 816 if (bstart) { 817 __gfs2_free_blocks(ip, bstart, blen, metadata); 818 btotal += blen; 819 } 820 821 bstart = bn; 822 blen = 1; 823 } 824 825 *p = 0; 826 gfs2_add_inode_blocks(&ip->i_inode, -1); 827 } 828 if (bstart) { 829 __gfs2_free_blocks(ip, bstart, blen, metadata); 830 btotal += blen; 831 } 832 833 gfs2_statfs_change(sdp, 0, +btotal, 0); 834 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, 835 ip->i_inode.i_gid); 836 837 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 838 839 gfs2_dinode_out(ip, dibh->b_data); 840 841 up_write(&ip->i_rw_mutex); 842 843 gfs2_trans_end(sdp); 844 845 out_rg_gunlock: 846 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); 847 out_rlist: 848 gfs2_rlist_free(&rlist); 849 out: 850 return error; 851 } 852 853 /** 854 * recursive_scan - recursively scan through the end of a file 855 * @ip: the inode 856 * @dibh: the dinode buffer 857 * @mp: the path through the metadata to the point to start 858 * @height: the height the recursion is at 859 * @block: the indirect block to look at 860 * @first: 1 if this is the first block 861 * @sm: data opaque to this function to pass to @bc 862 * 863 * When this is first called @height and @block should be zero and 864 * @first should be 1. 865 * 866 * Returns: errno 867 */ 868 869 static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, 870 struct metapath *mp, unsigned int height, 871 u64 block, int first, struct strip_mine *sm) 872 { 873 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 874 struct buffer_head *bh = NULL; 875 __be64 *top, *bottom; 876 u64 bn; 877 int error; 878 int mh_size = sizeof(struct gfs2_meta_header); 879 880 if (!height) { 881 error = gfs2_meta_inode_buffer(ip, &bh); 882 if (error) 883 return error; 884 dibh = bh; 885 886 top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0]; 887 bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs; 888 } else { 889 error = gfs2_meta_indirect_buffer(ip, height, block, &bh); 890 if (error) 891 return error; 892 893 top = (__be64 *)(bh->b_data + mh_size) + 894 (first ? mp->mp_list[height] : 0); 895 896 bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs; 897 } 898 899 error = do_strip(ip, dibh, bh, top, bottom, height, sm); 900 if (error) 901 goto out; 902 903 if (height < ip->i_height - 1) { 904 905 gfs2_metapath_ra(ip->i_gl, bh, top); 906 907 for (; top < bottom; top++, first = 0) { 908 if (!*top) 909 continue; 910 911 bn = be64_to_cpu(*top); 912 913 error = recursive_scan(ip, dibh, mp, height + 1, bn, 914 first, sm); 915 if (error) 916 break; 917 } 918 } 919 out: 920 brelse(bh); 921 return error; 922 } 923 924 925 /** 926 * gfs2_block_truncate_page - Deal with zeroing out data for truncate 927 * 928 * This is partly borrowed from ext3. 929 */ 930 static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from) 931 { 932 struct inode *inode = mapping->host; 933 struct gfs2_inode *ip = GFS2_I(inode); 934 unsigned long index = from >> PAGE_CACHE_SHIFT; 935 unsigned offset = from & (PAGE_CACHE_SIZE-1); 936 unsigned blocksize, iblock, length, pos; 937 struct buffer_head *bh; 938 struct page *page; 939 int err; 940 941 page = find_or_create_page(mapping, index, GFP_NOFS); 942 if (!page) 943 return 0; 944 945 blocksize = inode->i_sb->s_blocksize; 946 length = blocksize - (offset & (blocksize - 1)); 947 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 948 949 if (!page_has_buffers(page)) 950 create_empty_buffers(page, blocksize, 0); 951 952 /* Find the buffer that contains "offset" */ 953 bh = page_buffers(page); 954 pos = blocksize; 955 while (offset >= pos) { 956 bh = bh->b_this_page; 957 iblock++; 958 pos += blocksize; 959 } 960 961 err = 0; 962 963 if (!buffer_mapped(bh)) { 964 gfs2_block_map(inode, iblock, bh, 0); 965 /* unmapped? It's a hole - nothing to do */ 966 if (!buffer_mapped(bh)) 967 goto unlock; 968 } 969 970 /* Ok, it's mapped. Make sure it's up-to-date */ 971 if (PageUptodate(page)) 972 set_buffer_uptodate(bh); 973 974 if (!buffer_uptodate(bh)) { 975 err = -EIO; 976 ll_rw_block(READ, 1, &bh); 977 wait_on_buffer(bh); 978 /* Uhhuh. Read error. Complain and punt. */ 979 if (!buffer_uptodate(bh)) 980 goto unlock; 981 err = 0; 982 } 983 984 if (!gfs2_is_writeback(ip)) 985 gfs2_trans_add_data(ip->i_gl, bh); 986 987 zero_user(page, offset, length); 988 mark_buffer_dirty(bh); 989 unlock: 990 unlock_page(page); 991 page_cache_release(page); 992 return err; 993 } 994 995 /** 996 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files 997 * @inode: The inode being truncated 998 * @oldsize: The original (larger) size 999 * @newsize: The new smaller size 1000 * 1001 * With jdata files, we have to journal a revoke for each block which is 1002 * truncated. As a result, we need to split this into separate transactions 1003 * if the number of pages being truncated gets too large. 1004 */ 1005 1006 #define GFS2_JTRUNC_REVOKES 8192 1007 1008 static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize) 1009 { 1010 struct gfs2_sbd *sdp = GFS2_SB(inode); 1011 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; 1012 u64 chunk; 1013 int error; 1014 1015 while (oldsize != newsize) { 1016 chunk = oldsize - newsize; 1017 if (chunk > max_chunk) 1018 chunk = max_chunk; 1019 truncate_pagecache(inode, oldsize - chunk); 1020 oldsize -= chunk; 1021 gfs2_trans_end(sdp); 1022 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); 1023 if (error) 1024 return error; 1025 } 1026 1027 return 0; 1028 } 1029 1030 static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) 1031 { 1032 struct gfs2_inode *ip = GFS2_I(inode); 1033 struct gfs2_sbd *sdp = GFS2_SB(inode); 1034 struct address_space *mapping = inode->i_mapping; 1035 struct buffer_head *dibh; 1036 int journaled = gfs2_is_jdata(ip); 1037 int error; 1038 1039 if (journaled) 1040 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES); 1041 else 1042 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1043 if (error) 1044 return error; 1045 1046 error = gfs2_meta_inode_buffer(ip, &dibh); 1047 if (error) 1048 goto out; 1049 1050 gfs2_trans_add_meta(ip->i_gl, dibh); 1051 1052 if (gfs2_is_stuffed(ip)) { 1053 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); 1054 } else { 1055 if (newsize & (u64)(sdp->sd_sb.sb_bsize - 1)) { 1056 error = gfs2_block_truncate_page(mapping, newsize); 1057 if (error) 1058 goto out_brelse; 1059 } 1060 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; 1061 } 1062 1063 i_size_write(inode, newsize); 1064 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1065 gfs2_dinode_out(ip, dibh->b_data); 1066 1067 if (journaled) 1068 error = gfs2_journaled_truncate(inode, oldsize, newsize); 1069 else 1070 truncate_pagecache(inode, newsize); 1071 1072 if (error) { 1073 brelse(dibh); 1074 return error; 1075 } 1076 1077 out_brelse: 1078 brelse(dibh); 1079 out: 1080 gfs2_trans_end(sdp); 1081 return error; 1082 } 1083 1084 static int trunc_dealloc(struct gfs2_inode *ip, u64 size) 1085 { 1086 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1087 unsigned int height = ip->i_height; 1088 u64 lblock; 1089 struct metapath mp; 1090 int error; 1091 1092 if (!size) 1093 lblock = 0; 1094 else 1095 lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift; 1096 1097 find_metapath(sdp, lblock, &mp, ip->i_height); 1098 error = gfs2_rindex_update(sdp); 1099 if (error) 1100 return error; 1101 1102 error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); 1103 if (error) 1104 return error; 1105 1106 while (height--) { 1107 struct strip_mine sm; 1108 sm.sm_first = !!size; 1109 sm.sm_height = height; 1110 1111 error = recursive_scan(ip, NULL, &mp, 0, 0, 1, &sm); 1112 if (error) 1113 break; 1114 } 1115 1116 gfs2_quota_unhold(ip); 1117 1118 return error; 1119 } 1120 1121 static int trunc_end(struct gfs2_inode *ip) 1122 { 1123 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1124 struct buffer_head *dibh; 1125 int error; 1126 1127 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1128 if (error) 1129 return error; 1130 1131 down_write(&ip->i_rw_mutex); 1132 1133 error = gfs2_meta_inode_buffer(ip, &dibh); 1134 if (error) 1135 goto out; 1136 1137 if (!i_size_read(&ip->i_inode)) { 1138 ip->i_height = 0; 1139 ip->i_goal = ip->i_no_addr; 1140 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 1141 gfs2_ordered_del_inode(ip); 1142 } 1143 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1144 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; 1145 1146 gfs2_trans_add_meta(ip->i_gl, dibh); 1147 gfs2_dinode_out(ip, dibh->b_data); 1148 brelse(dibh); 1149 1150 out: 1151 up_write(&ip->i_rw_mutex); 1152 gfs2_trans_end(sdp); 1153 return error; 1154 } 1155 1156 /** 1157 * do_shrink - make a file smaller 1158 * @inode: the inode 1159 * @oldsize: the current inode size 1160 * @newsize: the size to make the file 1161 * 1162 * Called with an exclusive lock on @inode. The @size must 1163 * be equal to or smaller than the current inode size. 1164 * 1165 * Returns: errno 1166 */ 1167 1168 static int do_shrink(struct inode *inode, u64 oldsize, u64 newsize) 1169 { 1170 struct gfs2_inode *ip = GFS2_I(inode); 1171 int error; 1172 1173 error = trunc_start(inode, oldsize, newsize); 1174 if (error < 0) 1175 return error; 1176 if (gfs2_is_stuffed(ip)) 1177 return 0; 1178 1179 error = trunc_dealloc(ip, newsize); 1180 if (error == 0) 1181 error = trunc_end(ip); 1182 1183 return error; 1184 } 1185 1186 void gfs2_trim_blocks(struct inode *inode) 1187 { 1188 u64 size = inode->i_size; 1189 int ret; 1190 1191 ret = do_shrink(inode, size, size); 1192 WARN_ON(ret != 0); 1193 } 1194 1195 /** 1196 * do_grow - Touch and update inode size 1197 * @inode: The inode 1198 * @size: The new size 1199 * 1200 * This function updates the timestamps on the inode and 1201 * may also increase the size of the inode. This function 1202 * must not be called with @size any smaller than the current 1203 * inode size. 1204 * 1205 * Although it is not strictly required to unstuff files here, 1206 * earlier versions of GFS2 have a bug in the stuffed file reading 1207 * code which will result in a buffer overrun if the size is larger 1208 * than the max stuffed file size. In order to prevent this from 1209 * occurring, such files are unstuffed, but in other cases we can 1210 * just update the inode size directly. 1211 * 1212 * Returns: 0 on success, or -ve on error 1213 */ 1214 1215 static int do_grow(struct inode *inode, u64 size) 1216 { 1217 struct gfs2_inode *ip = GFS2_I(inode); 1218 struct gfs2_sbd *sdp = GFS2_SB(inode); 1219 struct buffer_head *dibh; 1220 int error; 1221 int unstuff = 0; 1222 1223 if (gfs2_is_stuffed(ip) && 1224 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { 1225 error = gfs2_quota_lock_check(ip); 1226 if (error) 1227 return error; 1228 1229 error = gfs2_inplace_reserve(ip, 1, 0); 1230 if (error) 1231 goto do_grow_qunlock; 1232 unstuff = 1; 1233 } 1234 1235 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT + 1236 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ? 1237 0 : RES_QUOTA), 0); 1238 if (error) 1239 goto do_grow_release; 1240 1241 if (unstuff) { 1242 error = gfs2_unstuff_dinode(ip, NULL); 1243 if (error) 1244 goto do_end_trans; 1245 } 1246 1247 error = gfs2_meta_inode_buffer(ip, &dibh); 1248 if (error) 1249 goto do_end_trans; 1250 1251 i_size_write(inode, size); 1252 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1253 gfs2_trans_add_meta(ip->i_gl, dibh); 1254 gfs2_dinode_out(ip, dibh->b_data); 1255 brelse(dibh); 1256 1257 do_end_trans: 1258 gfs2_trans_end(sdp); 1259 do_grow_release: 1260 if (unstuff) { 1261 gfs2_inplace_release(ip); 1262 do_grow_qunlock: 1263 gfs2_quota_unlock(ip); 1264 } 1265 return error; 1266 } 1267 1268 /** 1269 * gfs2_setattr_size - make a file a given size 1270 * @inode: the inode 1271 * @newsize: the size to make the file 1272 * 1273 * The file size can grow, shrink, or stay the same size. This 1274 * is called holding i_mutex and an exclusive glock on the inode 1275 * in question. 1276 * 1277 * Returns: errno 1278 */ 1279 1280 int gfs2_setattr_size(struct inode *inode, u64 newsize) 1281 { 1282 int ret; 1283 u64 oldsize; 1284 1285 BUG_ON(!S_ISREG(inode->i_mode)); 1286 1287 ret = inode_newsize_ok(inode, newsize); 1288 if (ret) 1289 return ret; 1290 1291 ret = get_write_access(inode); 1292 if (ret) 1293 return ret; 1294 1295 inode_dio_wait(inode); 1296 1297 ret = gfs2_rs_alloc(GFS2_I(inode)); 1298 if (ret) 1299 goto out; 1300 1301 oldsize = inode->i_size; 1302 if (newsize >= oldsize) { 1303 ret = do_grow(inode, newsize); 1304 goto out; 1305 } 1306 1307 ret = do_shrink(inode, oldsize, newsize); 1308 out: 1309 put_write_access(inode); 1310 return ret; 1311 } 1312 1313 int gfs2_truncatei_resume(struct gfs2_inode *ip) 1314 { 1315 int error; 1316 error = trunc_dealloc(ip, i_size_read(&ip->i_inode)); 1317 if (!error) 1318 error = trunc_end(ip); 1319 return error; 1320 } 1321 1322 int gfs2_file_dealloc(struct gfs2_inode *ip) 1323 { 1324 return trunc_dealloc(ip, 0); 1325 } 1326 1327 /** 1328 * gfs2_write_alloc_required - figure out if a write will require an allocation 1329 * @ip: the file being written to 1330 * @offset: the offset to write to 1331 * @len: the number of bytes being written 1332 * 1333 * Returns: 1 if an alloc is required, 0 otherwise 1334 */ 1335 1336 int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, 1337 unsigned int len) 1338 { 1339 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1340 struct buffer_head bh; 1341 unsigned int shift; 1342 u64 lblock, lblock_stop, size; 1343 u64 end_of_file; 1344 1345 if (!len) 1346 return 0; 1347 1348 if (gfs2_is_stuffed(ip)) { 1349 if (offset + len > 1350 sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) 1351 return 1; 1352 return 0; 1353 } 1354 1355 shift = sdp->sd_sb.sb_bsize_shift; 1356 BUG_ON(gfs2_is_dir(ip)); 1357 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; 1358 lblock = offset >> shift; 1359 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; 1360 if (lblock_stop > end_of_file) 1361 return 1; 1362 1363 size = (lblock_stop - lblock) << shift; 1364 do { 1365 bh.b_state = 0; 1366 bh.b_size = size; 1367 gfs2_block_map(&ip->i_inode, lblock, &bh, 0); 1368 if (!buffer_mapped(&bh)) 1369 return 1; 1370 size -= bh.b_size; 1371 lblock += (bh.b_size >> ip->i_inode.i_blkbits); 1372 } while(size > 0); 1373 1374 return 0; 1375 } 1376 1377