1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 5 */ 6 7 #include <linux/spinlock.h> 8 #include <linux/completion.h> 9 #include <linux/buffer_head.h> 10 #include <linux/blkdev.h> 11 #include <linux/gfs2_ondisk.h> 12 #include <linux/crc32.h> 13 #include <linux/iomap.h> 14 #include <linux/ktime.h> 15 16 #include "gfs2.h" 17 #include "incore.h" 18 #include "bmap.h" 19 #include "glock.h" 20 #include "inode.h" 21 #include "meta_io.h" 22 #include "quota.h" 23 #include "rgrp.h" 24 #include "log.h" 25 #include "super.h" 26 #include "trans.h" 27 #include "dir.h" 28 #include "util.h" 29 #include "aops.h" 30 #include "trace_gfs2.h" 31 32 /* This doesn't need to be that large as max 64 bit pointers in a 4k 33 * block is 512, so __u16 is fine for that. It saves stack space to 34 * keep it small. 35 */ 36 struct metapath { 37 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT]; 38 __u16 mp_list[GFS2_MAX_META_HEIGHT]; 39 int mp_fheight; /* find_metapath height */ 40 int mp_aheight; /* actual height (lookup height) */ 41 }; 42 43 static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length); 44 45 /** 46 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page 47 * @ip: the inode 48 * @dibh: the dinode buffer 49 * @block: the block number that was allocated 50 * @page: The (optional) page. This is looked up if @page is NULL 51 * 52 * Returns: errno 53 */ 54 55 static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, 56 u64 block, struct page *page) 57 { 58 struct inode *inode = &ip->i_inode; 59 struct buffer_head *bh; 60 int release = 0; 61 62 if (!page || page->index) { 63 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS); 64 if (!page) 65 return -ENOMEM; 66 release = 1; 67 } 68 69 if (!PageUptodate(page)) { 70 void *kaddr = kmap(page); 71 u64 dsize = i_size_read(inode); 72 73 if (dsize > gfs2_max_stuffed_size(ip)) 74 dsize = gfs2_max_stuffed_size(ip); 75 76 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); 77 memset(kaddr + dsize, 0, PAGE_SIZE - dsize); 78 kunmap(page); 79 80 SetPageUptodate(page); 81 } 82 83 if (!page_has_buffers(page)) 84 create_empty_buffers(page, BIT(inode->i_blkbits), 85 BIT(BH_Uptodate)); 86 87 bh = page_buffers(page); 88 89 if (!buffer_mapped(bh)) 90 map_bh(bh, inode->i_sb, block); 91 92 set_buffer_uptodate(bh); 93 if (gfs2_is_jdata(ip)) 94 gfs2_trans_add_data(ip->i_gl, bh); 95 else { 96 mark_buffer_dirty(bh); 97 gfs2_ordered_add_inode(ip); 98 } 99 100 if (release) { 101 unlock_page(page); 102 put_page(page); 103 } 104 105 return 0; 106 } 107 108 /** 109 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big 110 * @ip: The GFS2 inode to unstuff 111 * @page: The (optional) page. This is looked up if the @page is NULL 112 * 113 * This routine unstuffs a dinode and returns it to a "normal" state such 114 * that the height can be grown in the traditional way. 115 * 116 * Returns: errno 117 */ 118 119 int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) 120 { 121 struct buffer_head *bh, *dibh; 122 struct gfs2_dinode *di; 123 u64 block = 0; 124 int isdir = gfs2_is_dir(ip); 125 int error; 126 127 down_write(&ip->i_rw_mutex); 128 129 error = gfs2_meta_inode_buffer(ip, &dibh); 130 if (error) 131 goto out; 132 133 if (i_size_read(&ip->i_inode)) { 134 /* Get a free block, fill it with the stuffed data, 135 and write it out to disk */ 136 137 unsigned int n = 1; 138 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL); 139 if (error) 140 goto out_brelse; 141 if (isdir) { 142 gfs2_trans_remove_revoke(GFS2_SB(&ip->i_inode), block, 1); 143 error = gfs2_dir_get_new_buffer(ip, block, &bh); 144 if (error) 145 goto out_brelse; 146 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header), 147 dibh, sizeof(struct gfs2_dinode)); 148 brelse(bh); 149 } else { 150 error = gfs2_unstuffer_page(ip, dibh, block, page); 151 if (error) 152 goto out_brelse; 153 } 154 } 155 156 /* Set up the pointer to the new block */ 157 158 gfs2_trans_add_meta(ip->i_gl, dibh); 159 di = (struct gfs2_dinode *)dibh->b_data; 160 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 161 162 if (i_size_read(&ip->i_inode)) { 163 *(__be64 *)(di + 1) = cpu_to_be64(block); 164 gfs2_add_inode_blocks(&ip->i_inode, 1); 165 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 166 } 167 168 ip->i_height = 1; 169 di->di_height = cpu_to_be16(1); 170 171 out_brelse: 172 brelse(dibh); 173 out: 174 up_write(&ip->i_rw_mutex); 175 return error; 176 } 177 178 179 /** 180 * find_metapath - Find path through the metadata tree 181 * @sdp: The superblock 182 * @block: The disk block to look up 183 * @mp: The metapath to return the result in 184 * @height: The pre-calculated height of the metadata tree 185 * 186 * This routine returns a struct metapath structure that defines a path 187 * through the metadata of inode "ip" to get to block "block". 188 * 189 * Example: 190 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a 191 * filesystem with a blocksize of 4096. 192 * 193 * find_metapath() would return a struct metapath structure set to: 194 * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165. 195 * 196 * That means that in order to get to the block containing the byte at 197 * offset 101342453, we would load the indirect block pointed to by pointer 198 * 0 in the dinode. We would then load the indirect block pointed to by 199 * pointer 48 in that indirect block. We would then load the data block 200 * pointed to by pointer 165 in that indirect block. 201 * 202 * ---------------------------------------- 203 * | Dinode | | 204 * | | 4| 205 * | |0 1 2 3 4 5 9| 206 * | | 6| 207 * ---------------------------------------- 208 * | 209 * | 210 * V 211 * ---------------------------------------- 212 * | Indirect Block | 213 * | 5| 214 * | 4 4 4 4 4 5 5 1| 215 * |0 5 6 7 8 9 0 1 2| 216 * ---------------------------------------- 217 * | 218 * | 219 * V 220 * ---------------------------------------- 221 * | Indirect Block | 222 * | 1 1 1 1 1 5| 223 * | 6 6 6 6 6 1| 224 * |0 3 4 5 6 7 2| 225 * ---------------------------------------- 226 * | 227 * | 228 * V 229 * ---------------------------------------- 230 * | Data block containing offset | 231 * | 101342453 | 232 * | | 233 * | | 234 * ---------------------------------------- 235 * 236 */ 237 238 static void find_metapath(const struct gfs2_sbd *sdp, u64 block, 239 struct metapath *mp, unsigned int height) 240 { 241 unsigned int i; 242 243 mp->mp_fheight = height; 244 for (i = height; i--;) 245 mp->mp_list[i] = do_div(block, sdp->sd_inptrs); 246 } 247 248 static inline unsigned int metapath_branch_start(const struct metapath *mp) 249 { 250 if (mp->mp_list[0] == 0) 251 return 2; 252 return 1; 253 } 254 255 /** 256 * metaptr1 - Return the first possible metadata pointer in a metapath buffer 257 * @height: The metadata height (0 = dinode) 258 * @mp: The metapath 259 */ 260 static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp) 261 { 262 struct buffer_head *bh = mp->mp_bh[height]; 263 if (height == 0) 264 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode))); 265 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header))); 266 } 267 268 /** 269 * metapointer - Return pointer to start of metadata in a buffer 270 * @height: The metadata height (0 = dinode) 271 * @mp: The metapath 272 * 273 * Return a pointer to the block number of the next height of the metadata 274 * tree given a buffer containing the pointer to the current height of the 275 * metadata tree. 276 */ 277 278 static inline __be64 *metapointer(unsigned int height, const struct metapath *mp) 279 { 280 __be64 *p = metaptr1(height, mp); 281 return p + mp->mp_list[height]; 282 } 283 284 static inline const __be64 *metaend(unsigned int height, const struct metapath *mp) 285 { 286 const struct buffer_head *bh = mp->mp_bh[height]; 287 return (const __be64 *)(bh->b_data + bh->b_size); 288 } 289 290 static void clone_metapath(struct metapath *clone, struct metapath *mp) 291 { 292 unsigned int hgt; 293 294 *clone = *mp; 295 for (hgt = 0; hgt < mp->mp_aheight; hgt++) 296 get_bh(clone->mp_bh[hgt]); 297 } 298 299 static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end) 300 { 301 const __be64 *t; 302 303 for (t = start; t < end; t++) { 304 struct buffer_head *rabh; 305 306 if (!*t) 307 continue; 308 309 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE); 310 if (trylock_buffer(rabh)) { 311 if (!buffer_uptodate(rabh)) { 312 rabh->b_end_io = end_buffer_read_sync; 313 submit_bh(REQ_OP_READ, 314 REQ_RAHEAD | REQ_META | REQ_PRIO, 315 rabh); 316 continue; 317 } 318 unlock_buffer(rabh); 319 } 320 brelse(rabh); 321 } 322 } 323 324 static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, 325 unsigned int x, unsigned int h) 326 { 327 for (; x < h; x++) { 328 __be64 *ptr = metapointer(x, mp); 329 u64 dblock = be64_to_cpu(*ptr); 330 int ret; 331 332 if (!dblock) 333 break; 334 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]); 335 if (ret) 336 return ret; 337 } 338 mp->mp_aheight = x + 1; 339 return 0; 340 } 341 342 /** 343 * lookup_metapath - Walk the metadata tree to a specific point 344 * @ip: The inode 345 * @mp: The metapath 346 * 347 * Assumes that the inode's buffer has already been looked up and 348 * hooked onto mp->mp_bh[0] and that the metapath has been initialised 349 * by find_metapath(). 350 * 351 * If this function encounters part of the tree which has not been 352 * allocated, it returns the current height of the tree at the point 353 * at which it found the unallocated block. Blocks which are found are 354 * added to the mp->mp_bh[] list. 355 * 356 * Returns: error 357 */ 358 359 static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) 360 { 361 return __fillup_metapath(ip, mp, 0, ip->i_height - 1); 362 } 363 364 /** 365 * fillup_metapath - fill up buffers for the metadata path to a specific height 366 * @ip: The inode 367 * @mp: The metapath 368 * @h: The height to which it should be mapped 369 * 370 * Similar to lookup_metapath, but does lookups for a range of heights 371 * 372 * Returns: error or the number of buffers filled 373 */ 374 375 static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h) 376 { 377 unsigned int x = 0; 378 int ret; 379 380 if (h) { 381 /* find the first buffer we need to look up. */ 382 for (x = h - 1; x > 0; x--) { 383 if (mp->mp_bh[x]) 384 break; 385 } 386 } 387 ret = __fillup_metapath(ip, mp, x, h); 388 if (ret) 389 return ret; 390 return mp->mp_aheight - x - 1; 391 } 392 393 static void release_metapath(struct metapath *mp) 394 { 395 int i; 396 397 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) { 398 if (mp->mp_bh[i] == NULL) 399 break; 400 brelse(mp->mp_bh[i]); 401 mp->mp_bh[i] = NULL; 402 } 403 } 404 405 /** 406 * gfs2_extent_length - Returns length of an extent of blocks 407 * @bh: The metadata block 408 * @ptr: Current position in @bh 409 * @limit: Max extent length to return 410 * @eob: Set to 1 if we hit "end of block" 411 * 412 * Returns: The length of the extent (minimum of one block) 413 */ 414 415 static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob) 416 { 417 const __be64 *end = (__be64 *)(bh->b_data + bh->b_size); 418 const __be64 *first = ptr; 419 u64 d = be64_to_cpu(*ptr); 420 421 *eob = 0; 422 do { 423 ptr++; 424 if (ptr >= end) 425 break; 426 d++; 427 } while(be64_to_cpu(*ptr) == d); 428 if (ptr >= end) 429 *eob = 1; 430 return ptr - first; 431 } 432 433 typedef const __be64 *(*gfs2_metadata_walker)( 434 struct metapath *mp, 435 const __be64 *start, const __be64 *end, 436 u64 factor, void *data); 437 438 #define WALK_STOP ((__be64 *)0) 439 #define WALK_NEXT ((__be64 *)1) 440 441 static int gfs2_walk_metadata(struct inode *inode, sector_t lblock, 442 u64 len, struct metapath *mp, gfs2_metadata_walker walker, 443 void *data) 444 { 445 struct metapath clone; 446 struct gfs2_inode *ip = GFS2_I(inode); 447 struct gfs2_sbd *sdp = GFS2_SB(inode); 448 const __be64 *start, *end, *ptr; 449 u64 factor = 1; 450 unsigned int hgt; 451 int ret = 0; 452 453 for (hgt = ip->i_height - 1; hgt >= mp->mp_aheight; hgt--) 454 factor *= sdp->sd_inptrs; 455 456 for (;;) { 457 u64 step; 458 459 /* Walk indirect block. */ 460 start = metapointer(hgt, mp); 461 end = metaend(hgt, mp); 462 463 step = (end - start) * factor; 464 if (step > len) 465 end = start + DIV_ROUND_UP_ULL(len, factor); 466 467 ptr = walker(mp, start, end, factor, data); 468 if (ptr == WALK_STOP) 469 break; 470 if (step >= len) 471 break; 472 len -= step; 473 if (ptr != WALK_NEXT) { 474 BUG_ON(!*ptr); 475 mp->mp_list[hgt] += ptr - start; 476 goto fill_up_metapath; 477 } 478 479 lower_metapath: 480 /* Decrease height of metapath. */ 481 if (mp != &clone) { 482 clone_metapath(&clone, mp); 483 mp = &clone; 484 } 485 brelse(mp->mp_bh[hgt]); 486 mp->mp_bh[hgt] = NULL; 487 if (!hgt) 488 break; 489 hgt--; 490 factor *= sdp->sd_inptrs; 491 492 /* Advance in metadata tree. */ 493 (mp->mp_list[hgt])++; 494 start = metapointer(hgt, mp); 495 end = metaend(hgt, mp); 496 if (start >= end) { 497 mp->mp_list[hgt] = 0; 498 if (!hgt) 499 break; 500 goto lower_metapath; 501 } 502 503 fill_up_metapath: 504 /* Increase height of metapath. */ 505 if (mp != &clone) { 506 clone_metapath(&clone, mp); 507 mp = &clone; 508 } 509 ret = fillup_metapath(ip, mp, ip->i_height - 1); 510 if (ret < 0) 511 break; 512 hgt += ret; 513 for (; ret; ret--) 514 do_div(factor, sdp->sd_inptrs); 515 mp->mp_aheight = hgt + 1; 516 } 517 if (mp == &clone) 518 release_metapath(mp); 519 return ret; 520 } 521 522 struct gfs2_hole_walker_args { 523 u64 blocks; 524 }; 525 526 static const __be64 *gfs2_hole_walker(struct metapath *mp, 527 const __be64 *start, const __be64 *end, 528 u64 factor, void *data) 529 { 530 struct gfs2_hole_walker_args *args = data; 531 const __be64 *ptr; 532 533 for (ptr = start; ptr < end; ptr++) { 534 if (*ptr) { 535 args->blocks += (ptr - start) * factor; 536 if (mp->mp_aheight == mp->mp_fheight) 537 return WALK_STOP; 538 return ptr; /* increase height */ 539 } 540 } 541 args->blocks += (end - start) * factor; 542 return WALK_NEXT; 543 } 544 545 /** 546 * gfs2_hole_size - figure out the size of a hole 547 * @inode: The inode 548 * @lblock: The logical starting block number 549 * @len: How far to look (in blocks) 550 * @mp: The metapath at lblock 551 * @iomap: The iomap to store the hole size in 552 * 553 * This function modifies @mp. 554 * 555 * Returns: errno on error 556 */ 557 static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len, 558 struct metapath *mp, struct iomap *iomap) 559 { 560 struct gfs2_hole_walker_args args = { }; 561 int ret = 0; 562 563 ret = gfs2_walk_metadata(inode, lblock, len, mp, gfs2_hole_walker, &args); 564 if (!ret) 565 iomap->length = args.blocks << inode->i_blkbits; 566 return ret; 567 } 568 569 static inline __be64 *gfs2_indirect_init(struct metapath *mp, 570 struct gfs2_glock *gl, unsigned int i, 571 unsigned offset, u64 bn) 572 { 573 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data + 574 ((i > 1) ? sizeof(struct gfs2_meta_header) : 575 sizeof(struct gfs2_dinode))); 576 BUG_ON(i < 1); 577 BUG_ON(mp->mp_bh[i] != NULL); 578 mp->mp_bh[i] = gfs2_meta_new(gl, bn); 579 gfs2_trans_add_meta(gl, mp->mp_bh[i]); 580 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); 581 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); 582 ptr += offset; 583 *ptr = cpu_to_be64(bn); 584 return ptr; 585 } 586 587 enum alloc_state { 588 ALLOC_DATA = 0, 589 ALLOC_GROW_DEPTH = 1, 590 ALLOC_GROW_HEIGHT = 2, 591 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */ 592 }; 593 594 /** 595 * gfs2_iomap_alloc - Build a metadata tree of the requested height 596 * @inode: The GFS2 inode 597 * @iomap: The iomap structure 598 * @mp: The metapath, with proper height information calculated 599 * 600 * In this routine we may have to alloc: 601 * i) Indirect blocks to grow the metadata tree height 602 * ii) Indirect blocks to fill in lower part of the metadata tree 603 * iii) Data blocks 604 * 605 * This function is called after gfs2_iomap_get, which works out the 606 * total number of blocks which we need via gfs2_alloc_size. 607 * 608 * We then do the actual allocation asking for an extent at a time (if 609 * enough contiguous free blocks are available, there will only be one 610 * allocation request per call) and uses the state machine to initialise 611 * the blocks in order. 612 * 613 * Right now, this function will allocate at most one indirect block 614 * worth of data -- with a default block size of 4K, that's slightly 615 * less than 2M. If this limitation is ever removed to allow huge 616 * allocations, we would probably still want to limit the iomap size we 617 * return to avoid stalling other tasks during huge writes; the next 618 * iomap iteration would then find the blocks already allocated. 619 * 620 * Returns: errno on error 621 */ 622 623 static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, 624 struct metapath *mp) 625 { 626 struct gfs2_inode *ip = GFS2_I(inode); 627 struct gfs2_sbd *sdp = GFS2_SB(inode); 628 struct buffer_head *dibh = mp->mp_bh[0]; 629 u64 bn; 630 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; 631 size_t dblks = iomap->length >> inode->i_blkbits; 632 const unsigned end_of_metadata = mp->mp_fheight - 1; 633 int ret; 634 enum alloc_state state; 635 __be64 *ptr; 636 __be64 zero_bn = 0; 637 638 BUG_ON(mp->mp_aheight < 1); 639 BUG_ON(dibh == NULL); 640 BUG_ON(dblks < 1); 641 642 gfs2_trans_add_meta(ip->i_gl, dibh); 643 644 down_write(&ip->i_rw_mutex); 645 646 if (mp->mp_fheight == mp->mp_aheight) { 647 /* Bottom indirect block exists */ 648 state = ALLOC_DATA; 649 } else { 650 /* Need to allocate indirect blocks */ 651 if (mp->mp_fheight == ip->i_height) { 652 /* Writing into existing tree, extend tree down */ 653 iblks = mp->mp_fheight - mp->mp_aheight; 654 state = ALLOC_GROW_DEPTH; 655 } else { 656 /* Building up tree height */ 657 state = ALLOC_GROW_HEIGHT; 658 iblks = mp->mp_fheight - ip->i_height; 659 branch_start = metapath_branch_start(mp); 660 iblks += (mp->mp_fheight - branch_start); 661 } 662 } 663 664 /* start of the second part of the function (state machine) */ 665 666 blks = dblks + iblks; 667 i = mp->mp_aheight; 668 do { 669 n = blks - alloced; 670 ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); 671 if (ret) 672 goto out; 673 alloced += n; 674 if (state != ALLOC_DATA || gfs2_is_jdata(ip)) 675 gfs2_trans_remove_revoke(sdp, bn, n); 676 switch (state) { 677 /* Growing height of tree */ 678 case ALLOC_GROW_HEIGHT: 679 if (i == 1) { 680 ptr = (__be64 *)(dibh->b_data + 681 sizeof(struct gfs2_dinode)); 682 zero_bn = *ptr; 683 } 684 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0; 685 i++, n--) 686 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++); 687 if (i - 1 == mp->mp_fheight - ip->i_height) { 688 i--; 689 gfs2_buffer_copy_tail(mp->mp_bh[i], 690 sizeof(struct gfs2_meta_header), 691 dibh, sizeof(struct gfs2_dinode)); 692 gfs2_buffer_clear_tail(dibh, 693 sizeof(struct gfs2_dinode) + 694 sizeof(__be64)); 695 ptr = (__be64 *)(mp->mp_bh[i]->b_data + 696 sizeof(struct gfs2_meta_header)); 697 *ptr = zero_bn; 698 state = ALLOC_GROW_DEPTH; 699 for(i = branch_start; i < mp->mp_fheight; i++) { 700 if (mp->mp_bh[i] == NULL) 701 break; 702 brelse(mp->mp_bh[i]); 703 mp->mp_bh[i] = NULL; 704 } 705 i = branch_start; 706 } 707 if (n == 0) 708 break; 709 /* fall through - To branching from existing tree */ 710 case ALLOC_GROW_DEPTH: 711 if (i > 1 && i < mp->mp_fheight) 712 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]); 713 for (; i < mp->mp_fheight && n > 0; i++, n--) 714 gfs2_indirect_init(mp, ip->i_gl, i, 715 mp->mp_list[i-1], bn++); 716 if (i == mp->mp_fheight) 717 state = ALLOC_DATA; 718 if (n == 0) 719 break; 720 /* fall through - To tree complete, adding data blocks */ 721 case ALLOC_DATA: 722 BUG_ON(n > dblks); 723 BUG_ON(mp->mp_bh[end_of_metadata] == NULL); 724 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]); 725 dblks = n; 726 ptr = metapointer(end_of_metadata, mp); 727 iomap->addr = bn << inode->i_blkbits; 728 iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW; 729 while (n-- > 0) 730 *ptr++ = cpu_to_be64(bn++); 731 break; 732 } 733 } while (iomap->addr == IOMAP_NULL_ADDR); 734 735 iomap->type = IOMAP_MAPPED; 736 iomap->length = (u64)dblks << inode->i_blkbits; 737 ip->i_height = mp->mp_fheight; 738 gfs2_add_inode_blocks(&ip->i_inode, alloced); 739 gfs2_dinode_out(ip, dibh->b_data); 740 out: 741 up_write(&ip->i_rw_mutex); 742 return ret; 743 } 744 745 #define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE 746 747 /** 748 * gfs2_alloc_size - Compute the maximum allocation size 749 * @inode: The inode 750 * @mp: The metapath 751 * @size: Requested size in blocks 752 * 753 * Compute the maximum size of the next allocation at @mp. 754 * 755 * Returns: size in blocks 756 */ 757 static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size) 758 { 759 struct gfs2_inode *ip = GFS2_I(inode); 760 struct gfs2_sbd *sdp = GFS2_SB(inode); 761 const __be64 *first, *ptr, *end; 762 763 /* 764 * For writes to stuffed files, this function is called twice via 765 * gfs2_iomap_get, before and after unstuffing. The size we return the 766 * first time needs to be large enough to get the reservation and 767 * allocation sizes right. The size we return the second time must 768 * be exact or else gfs2_iomap_alloc won't do the right thing. 769 */ 770 771 if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) { 772 unsigned int maxsize = mp->mp_fheight > 1 ? 773 sdp->sd_inptrs : sdp->sd_diptrs; 774 maxsize -= mp->mp_list[mp->mp_fheight - 1]; 775 if (size > maxsize) 776 size = maxsize; 777 return size; 778 } 779 780 first = metapointer(ip->i_height - 1, mp); 781 end = metaend(ip->i_height - 1, mp); 782 if (end - first > size) 783 end = first + size; 784 for (ptr = first; ptr < end; ptr++) { 785 if (*ptr) 786 break; 787 } 788 return ptr - first; 789 } 790 791 /** 792 * gfs2_iomap_get - Map blocks from an inode to disk blocks 793 * @inode: The inode 794 * @pos: Starting position in bytes 795 * @length: Length to map, in bytes 796 * @flags: iomap flags 797 * @iomap: The iomap structure 798 * @mp: The metapath 799 * 800 * Returns: errno 801 */ 802 static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, 803 unsigned flags, struct iomap *iomap, 804 struct metapath *mp) 805 { 806 struct gfs2_inode *ip = GFS2_I(inode); 807 struct gfs2_sbd *sdp = GFS2_SB(inode); 808 loff_t size = i_size_read(inode); 809 __be64 *ptr; 810 sector_t lblock; 811 sector_t lblock_stop; 812 int ret; 813 int eob; 814 u64 len; 815 struct buffer_head *dibh = NULL, *bh; 816 u8 height; 817 818 if (!length) 819 return -EINVAL; 820 821 down_read(&ip->i_rw_mutex); 822 823 ret = gfs2_meta_inode_buffer(ip, &dibh); 824 if (ret) 825 goto unlock; 826 mp->mp_bh[0] = dibh; 827 828 if (gfs2_is_stuffed(ip)) { 829 if (flags & IOMAP_WRITE) { 830 loff_t max_size = gfs2_max_stuffed_size(ip); 831 832 if (pos + length > max_size) 833 goto unstuff; 834 iomap->length = max_size; 835 } else { 836 if (pos >= size) { 837 if (flags & IOMAP_REPORT) { 838 ret = -ENOENT; 839 goto unlock; 840 } else { 841 /* report a hole */ 842 iomap->offset = pos; 843 iomap->length = length; 844 goto do_alloc; 845 } 846 } 847 iomap->length = size; 848 } 849 iomap->addr = (ip->i_no_addr << inode->i_blkbits) + 850 sizeof(struct gfs2_dinode); 851 iomap->type = IOMAP_INLINE; 852 iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode); 853 goto out; 854 } 855 856 unstuff: 857 lblock = pos >> inode->i_blkbits; 858 iomap->offset = lblock << inode->i_blkbits; 859 lblock_stop = (pos + length - 1) >> inode->i_blkbits; 860 len = lblock_stop - lblock + 1; 861 iomap->length = len << inode->i_blkbits; 862 863 height = ip->i_height; 864 while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height]) 865 height++; 866 find_metapath(sdp, lblock, mp, height); 867 if (height > ip->i_height || gfs2_is_stuffed(ip)) 868 goto do_alloc; 869 870 ret = lookup_metapath(ip, mp); 871 if (ret) 872 goto unlock; 873 874 if (mp->mp_aheight != ip->i_height) 875 goto do_alloc; 876 877 ptr = metapointer(ip->i_height - 1, mp); 878 if (*ptr == 0) 879 goto do_alloc; 880 881 bh = mp->mp_bh[ip->i_height - 1]; 882 len = gfs2_extent_length(bh, ptr, len, &eob); 883 884 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits; 885 iomap->length = len << inode->i_blkbits; 886 iomap->type = IOMAP_MAPPED; 887 iomap->flags |= IOMAP_F_MERGED; 888 if (eob) 889 iomap->flags |= IOMAP_F_GFS2_BOUNDARY; 890 891 out: 892 iomap->bdev = inode->i_sb->s_bdev; 893 unlock: 894 up_read(&ip->i_rw_mutex); 895 return ret; 896 897 do_alloc: 898 iomap->addr = IOMAP_NULL_ADDR; 899 iomap->type = IOMAP_HOLE; 900 if (flags & IOMAP_REPORT) { 901 if (pos >= size) 902 ret = -ENOENT; 903 else if (height == ip->i_height) 904 ret = gfs2_hole_size(inode, lblock, len, mp, iomap); 905 else 906 iomap->length = size - pos; 907 } else if (flags & IOMAP_WRITE) { 908 u64 alloc_size; 909 910 if (flags & IOMAP_DIRECT) 911 goto out; /* (see gfs2_file_direct_write) */ 912 913 len = gfs2_alloc_size(inode, mp, len); 914 alloc_size = len << inode->i_blkbits; 915 if (alloc_size < iomap->length) 916 iomap->length = alloc_size; 917 } else { 918 if (pos < size && height == ip->i_height) 919 ret = gfs2_hole_size(inode, lblock, len, mp, iomap); 920 } 921 goto out; 922 } 923 924 /** 925 * gfs2_lblk_to_dblk - convert logical block to disk block 926 * @inode: the inode of the file we're mapping 927 * @lblock: the block relative to the start of the file 928 * @dblock: the returned dblock, if no error 929 * 930 * This function maps a single block from a file logical block (relative to 931 * the start of the file) to a file system absolute block using iomap. 932 * 933 * Returns: the absolute file system block, or an error 934 */ 935 int gfs2_lblk_to_dblk(struct inode *inode, u32 lblock, u64 *dblock) 936 { 937 struct iomap iomap = { }; 938 struct metapath mp = { .mp_aheight = 1, }; 939 loff_t pos = (loff_t)lblock << inode->i_blkbits; 940 int ret; 941 942 ret = gfs2_iomap_get(inode, pos, i_blocksize(inode), 0, &iomap, &mp); 943 release_metapath(&mp); 944 if (ret == 0) 945 *dblock = iomap.addr >> inode->i_blkbits; 946 947 return ret; 948 } 949 950 static int gfs2_write_lock(struct inode *inode) 951 { 952 struct gfs2_inode *ip = GFS2_I(inode); 953 struct gfs2_sbd *sdp = GFS2_SB(inode); 954 int error; 955 956 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); 957 error = gfs2_glock_nq(&ip->i_gh); 958 if (error) 959 goto out_uninit; 960 if (&ip->i_inode == sdp->sd_rindex) { 961 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 962 963 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 964 GL_NOCACHE, &m_ip->i_gh); 965 if (error) 966 goto out_unlock; 967 } 968 return 0; 969 970 out_unlock: 971 gfs2_glock_dq(&ip->i_gh); 972 out_uninit: 973 gfs2_holder_uninit(&ip->i_gh); 974 return error; 975 } 976 977 static void gfs2_write_unlock(struct inode *inode) 978 { 979 struct gfs2_inode *ip = GFS2_I(inode); 980 struct gfs2_sbd *sdp = GFS2_SB(inode); 981 982 if (&ip->i_inode == sdp->sd_rindex) { 983 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 984 985 gfs2_glock_dq_uninit(&m_ip->i_gh); 986 } 987 gfs2_glock_dq_uninit(&ip->i_gh); 988 } 989 990 static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos, 991 unsigned len, struct iomap *iomap) 992 { 993 unsigned int blockmask = i_blocksize(inode) - 1; 994 struct gfs2_sbd *sdp = GFS2_SB(inode); 995 unsigned int blocks; 996 997 blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits; 998 return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0); 999 } 1000 1001 static void gfs2_iomap_page_done(struct inode *inode, loff_t pos, 1002 unsigned copied, struct page *page, 1003 struct iomap *iomap) 1004 { 1005 struct gfs2_trans *tr = current->journal_info; 1006 struct gfs2_inode *ip = GFS2_I(inode); 1007 struct gfs2_sbd *sdp = GFS2_SB(inode); 1008 1009 if (page && !gfs2_is_stuffed(ip)) 1010 gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied); 1011 1012 if (tr->tr_num_buf_new) 1013 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 1014 1015 gfs2_trans_end(sdp); 1016 } 1017 1018 static const struct iomap_page_ops gfs2_iomap_page_ops = { 1019 .page_prepare = gfs2_iomap_page_prepare, 1020 .page_done = gfs2_iomap_page_done, 1021 }; 1022 1023 static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, 1024 loff_t length, unsigned flags, 1025 struct iomap *iomap, 1026 struct metapath *mp) 1027 { 1028 struct gfs2_inode *ip = GFS2_I(inode); 1029 struct gfs2_sbd *sdp = GFS2_SB(inode); 1030 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 1031 bool unstuff, alloc_required; 1032 int ret; 1033 1034 ret = gfs2_write_lock(inode); 1035 if (ret) 1036 return ret; 1037 1038 unstuff = gfs2_is_stuffed(ip) && 1039 pos + length > gfs2_max_stuffed_size(ip); 1040 1041 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, mp); 1042 if (ret) 1043 goto out_unlock; 1044 1045 alloc_required = unstuff || iomap->type == IOMAP_HOLE; 1046 1047 if (alloc_required || gfs2_is_jdata(ip)) 1048 gfs2_write_calc_reserv(ip, iomap->length, &data_blocks, 1049 &ind_blocks); 1050 1051 if (alloc_required) { 1052 struct gfs2_alloc_parms ap = { 1053 .target = data_blocks + ind_blocks 1054 }; 1055 1056 ret = gfs2_quota_lock_check(ip, &ap); 1057 if (ret) 1058 goto out_unlock; 1059 1060 ret = gfs2_inplace_reserve(ip, &ap); 1061 if (ret) 1062 goto out_qunlock; 1063 } 1064 1065 rblocks = RES_DINODE + ind_blocks; 1066 if (gfs2_is_jdata(ip)) 1067 rblocks += data_blocks; 1068 if (ind_blocks || data_blocks) 1069 rblocks += RES_STATFS + RES_QUOTA; 1070 if (inode == sdp->sd_rindex) 1071 rblocks += 2 * RES_STATFS; 1072 if (alloc_required) 1073 rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); 1074 1075 if (unstuff || iomap->type == IOMAP_HOLE) { 1076 struct gfs2_trans *tr; 1077 1078 ret = gfs2_trans_begin(sdp, rblocks, 1079 iomap->length >> inode->i_blkbits); 1080 if (ret) 1081 goto out_trans_fail; 1082 1083 if (unstuff) { 1084 ret = gfs2_unstuff_dinode(ip, NULL); 1085 if (ret) 1086 goto out_trans_end; 1087 release_metapath(mp); 1088 ret = gfs2_iomap_get(inode, iomap->offset, 1089 iomap->length, flags, iomap, mp); 1090 if (ret) 1091 goto out_trans_end; 1092 } 1093 1094 if (iomap->type == IOMAP_HOLE) { 1095 ret = gfs2_iomap_alloc(inode, iomap, mp); 1096 if (ret) { 1097 gfs2_trans_end(sdp); 1098 gfs2_inplace_release(ip); 1099 punch_hole(ip, iomap->offset, iomap->length); 1100 goto out_qunlock; 1101 } 1102 } 1103 1104 tr = current->journal_info; 1105 if (tr->tr_num_buf_new) 1106 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 1107 1108 gfs2_trans_end(sdp); 1109 } 1110 1111 if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip)) 1112 iomap->page_ops = &gfs2_iomap_page_ops; 1113 return 0; 1114 1115 out_trans_end: 1116 gfs2_trans_end(sdp); 1117 out_trans_fail: 1118 if (alloc_required) 1119 gfs2_inplace_release(ip); 1120 out_qunlock: 1121 if (alloc_required) 1122 gfs2_quota_unlock(ip); 1123 out_unlock: 1124 gfs2_write_unlock(inode); 1125 return ret; 1126 } 1127 1128 static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, 1129 unsigned flags, struct iomap *iomap) 1130 { 1131 struct gfs2_inode *ip = GFS2_I(inode); 1132 struct metapath mp = { .mp_aheight = 1, }; 1133 int ret; 1134 1135 iomap->flags |= IOMAP_F_BUFFER_HEAD; 1136 1137 trace_gfs2_iomap_start(ip, pos, length, flags); 1138 if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) { 1139 ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp); 1140 } else { 1141 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); 1142 1143 /* 1144 * Silently fall back to buffered I/O for stuffed files or if 1145 * we've hot a hole (see gfs2_file_direct_write). 1146 */ 1147 if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) && 1148 iomap->type != IOMAP_MAPPED) 1149 ret = -ENOTBLK; 1150 } 1151 release_metapath(&mp); 1152 trace_gfs2_iomap_end(ip, iomap, ret); 1153 return ret; 1154 } 1155 1156 static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, 1157 ssize_t written, unsigned flags, struct iomap *iomap) 1158 { 1159 struct gfs2_inode *ip = GFS2_I(inode); 1160 struct gfs2_sbd *sdp = GFS2_SB(inode); 1161 1162 if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE) 1163 goto out; 1164 1165 if (!gfs2_is_stuffed(ip)) 1166 gfs2_ordered_add_inode(ip); 1167 1168 if (inode == sdp->sd_rindex) 1169 adjust_fs_space(inode); 1170 1171 gfs2_inplace_release(ip); 1172 1173 if (length != written && (iomap->flags & IOMAP_F_NEW)) { 1174 /* Deallocate blocks that were just allocated. */ 1175 loff_t blockmask = i_blocksize(inode) - 1; 1176 loff_t end = (pos + length) & ~blockmask; 1177 1178 pos = (pos + written + blockmask) & ~blockmask; 1179 if (pos < end) { 1180 truncate_pagecache_range(inode, pos, end - 1); 1181 punch_hole(ip, pos, end - pos); 1182 } 1183 } 1184 1185 if (ip->i_qadata && ip->i_qadata->qa_qd_num) 1186 gfs2_quota_unlock(ip); 1187 1188 if (unlikely(!written)) 1189 goto out_unlock; 1190 1191 if (iomap->flags & IOMAP_F_SIZE_CHANGED) 1192 mark_inode_dirty(inode); 1193 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); 1194 1195 out_unlock: 1196 gfs2_write_unlock(inode); 1197 out: 1198 return 0; 1199 } 1200 1201 const struct iomap_ops gfs2_iomap_ops = { 1202 .iomap_begin = gfs2_iomap_begin, 1203 .iomap_end = gfs2_iomap_end, 1204 }; 1205 1206 /** 1207 * gfs2_block_map - Map one or more blocks of an inode to a disk block 1208 * @inode: The inode 1209 * @lblock: The logical block number 1210 * @bh_map: The bh to be mapped 1211 * @create: True if its ok to alloc blocks to satify the request 1212 * 1213 * The size of the requested mapping is defined in bh_map->b_size. 1214 * 1215 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged 1216 * when @lblock is not mapped. Sets buffer_mapped(bh_map) and 1217 * bh_map->b_size to indicate the size of the mapping when @lblock and 1218 * successive blocks are mapped, up to the requested size. 1219 * 1220 * Sets buffer_boundary() if a read of metadata will be required 1221 * before the next block can be mapped. Sets buffer_new() if new 1222 * blocks were allocated. 1223 * 1224 * Returns: errno 1225 */ 1226 1227 int gfs2_block_map(struct inode *inode, sector_t lblock, 1228 struct buffer_head *bh_map, int create) 1229 { 1230 struct gfs2_inode *ip = GFS2_I(inode); 1231 loff_t pos = (loff_t)lblock << inode->i_blkbits; 1232 loff_t length = bh_map->b_size; 1233 struct metapath mp = { .mp_aheight = 1, }; 1234 struct iomap iomap = { }; 1235 int ret; 1236 1237 clear_buffer_mapped(bh_map); 1238 clear_buffer_new(bh_map); 1239 clear_buffer_boundary(bh_map); 1240 trace_gfs2_bmap(ip, bh_map, lblock, create, 1); 1241 1242 if (create) { 1243 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp); 1244 if (!ret && iomap.type == IOMAP_HOLE) 1245 ret = gfs2_iomap_alloc(inode, &iomap, &mp); 1246 release_metapath(&mp); 1247 } else { 1248 ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp); 1249 release_metapath(&mp); 1250 } 1251 if (ret) 1252 goto out; 1253 1254 if (iomap.length > bh_map->b_size) { 1255 iomap.length = bh_map->b_size; 1256 iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY; 1257 } 1258 if (iomap.addr != IOMAP_NULL_ADDR) 1259 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits); 1260 bh_map->b_size = iomap.length; 1261 if (iomap.flags & IOMAP_F_GFS2_BOUNDARY) 1262 set_buffer_boundary(bh_map); 1263 if (iomap.flags & IOMAP_F_NEW) 1264 set_buffer_new(bh_map); 1265 1266 out: 1267 trace_gfs2_bmap(ip, bh_map, lblock, create, ret); 1268 return ret; 1269 } 1270 1271 /* 1272 * Deprecated: do not use in new code 1273 */ 1274 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen) 1275 { 1276 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 }; 1277 int ret; 1278 int create = *new; 1279 1280 BUG_ON(!extlen); 1281 BUG_ON(!dblock); 1282 BUG_ON(!new); 1283 1284 bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5)); 1285 ret = gfs2_block_map(inode, lblock, &bh, create); 1286 *extlen = bh.b_size >> inode->i_blkbits; 1287 *dblock = bh.b_blocknr; 1288 if (buffer_new(&bh)) 1289 *new = 1; 1290 else 1291 *new = 0; 1292 return ret; 1293 } 1294 1295 /** 1296 * gfs2_block_zero_range - Deal with zeroing out data 1297 * 1298 * This is partly borrowed from ext3. 1299 */ 1300 static int gfs2_block_zero_range(struct inode *inode, loff_t from, 1301 unsigned int length) 1302 { 1303 struct address_space *mapping = inode->i_mapping; 1304 struct gfs2_inode *ip = GFS2_I(inode); 1305 unsigned long index = from >> PAGE_SHIFT; 1306 unsigned offset = from & (PAGE_SIZE-1); 1307 unsigned blocksize, iblock, pos; 1308 struct buffer_head *bh; 1309 struct page *page; 1310 int err; 1311 1312 page = find_or_create_page(mapping, index, GFP_NOFS); 1313 if (!page) 1314 return 0; 1315 1316 blocksize = inode->i_sb->s_blocksize; 1317 iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits); 1318 1319 if (!page_has_buffers(page)) 1320 create_empty_buffers(page, blocksize, 0); 1321 1322 /* Find the buffer that contains "offset" */ 1323 bh = page_buffers(page); 1324 pos = blocksize; 1325 while (offset >= pos) { 1326 bh = bh->b_this_page; 1327 iblock++; 1328 pos += blocksize; 1329 } 1330 1331 err = 0; 1332 1333 if (!buffer_mapped(bh)) { 1334 gfs2_block_map(inode, iblock, bh, 0); 1335 /* unmapped? It's a hole - nothing to do */ 1336 if (!buffer_mapped(bh)) 1337 goto unlock; 1338 } 1339 1340 /* Ok, it's mapped. Make sure it's up-to-date */ 1341 if (PageUptodate(page)) 1342 set_buffer_uptodate(bh); 1343 1344 if (!buffer_uptodate(bh)) { 1345 err = -EIO; 1346 ll_rw_block(REQ_OP_READ, 0, 1, &bh); 1347 wait_on_buffer(bh); 1348 /* Uhhuh. Read error. Complain and punt. */ 1349 if (!buffer_uptodate(bh)) 1350 goto unlock; 1351 err = 0; 1352 } 1353 1354 if (gfs2_is_jdata(ip)) 1355 gfs2_trans_add_data(ip->i_gl, bh); 1356 else 1357 gfs2_ordered_add_inode(ip); 1358 1359 zero_user(page, offset, length); 1360 mark_buffer_dirty(bh); 1361 unlock: 1362 unlock_page(page); 1363 put_page(page); 1364 return err; 1365 } 1366 1367 #define GFS2_JTRUNC_REVOKES 8192 1368 1369 /** 1370 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files 1371 * @inode: The inode being truncated 1372 * @oldsize: The original (larger) size 1373 * @newsize: The new smaller size 1374 * 1375 * With jdata files, we have to journal a revoke for each block which is 1376 * truncated. As a result, we need to split this into separate transactions 1377 * if the number of pages being truncated gets too large. 1378 */ 1379 1380 static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize) 1381 { 1382 struct gfs2_sbd *sdp = GFS2_SB(inode); 1383 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; 1384 u64 chunk; 1385 int error; 1386 1387 while (oldsize != newsize) { 1388 struct gfs2_trans *tr; 1389 unsigned int offs; 1390 1391 chunk = oldsize - newsize; 1392 if (chunk > max_chunk) 1393 chunk = max_chunk; 1394 1395 offs = oldsize & ~PAGE_MASK; 1396 if (offs && chunk > PAGE_SIZE) 1397 chunk = offs + ((chunk - offs) & PAGE_MASK); 1398 1399 truncate_pagecache(inode, oldsize - chunk); 1400 oldsize -= chunk; 1401 1402 tr = current->journal_info; 1403 if (!test_bit(TR_TOUCHED, &tr->tr_flags)) 1404 continue; 1405 1406 gfs2_trans_end(sdp); 1407 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); 1408 if (error) 1409 return error; 1410 } 1411 1412 return 0; 1413 } 1414 1415 static int trunc_start(struct inode *inode, u64 newsize) 1416 { 1417 struct gfs2_inode *ip = GFS2_I(inode); 1418 struct gfs2_sbd *sdp = GFS2_SB(inode); 1419 struct buffer_head *dibh = NULL; 1420 int journaled = gfs2_is_jdata(ip); 1421 u64 oldsize = inode->i_size; 1422 int error; 1423 1424 if (journaled) 1425 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES); 1426 else 1427 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1428 if (error) 1429 return error; 1430 1431 error = gfs2_meta_inode_buffer(ip, &dibh); 1432 if (error) 1433 goto out; 1434 1435 gfs2_trans_add_meta(ip->i_gl, dibh); 1436 1437 if (gfs2_is_stuffed(ip)) { 1438 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); 1439 } else { 1440 unsigned int blocksize = i_blocksize(inode); 1441 unsigned int offs = newsize & (blocksize - 1); 1442 if (offs) { 1443 error = gfs2_block_zero_range(inode, newsize, 1444 blocksize - offs); 1445 if (error) 1446 goto out; 1447 } 1448 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; 1449 } 1450 1451 i_size_write(inode, newsize); 1452 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); 1453 gfs2_dinode_out(ip, dibh->b_data); 1454 1455 if (journaled) 1456 error = gfs2_journaled_truncate(inode, oldsize, newsize); 1457 else 1458 truncate_pagecache(inode, newsize); 1459 1460 out: 1461 brelse(dibh); 1462 if (current->journal_info) 1463 gfs2_trans_end(sdp); 1464 return error; 1465 } 1466 1467 int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length, 1468 struct iomap *iomap) 1469 { 1470 struct metapath mp = { .mp_aheight = 1, }; 1471 int ret; 1472 1473 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp); 1474 if (!ret && iomap->type == IOMAP_HOLE) 1475 ret = gfs2_iomap_alloc(inode, iomap, &mp); 1476 release_metapath(&mp); 1477 return ret; 1478 } 1479 1480 /** 1481 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein 1482 * @ip: inode 1483 * @rg_gh: holder of resource group glock 1484 * @bh: buffer head to sweep 1485 * @start: starting point in bh 1486 * @end: end point in bh 1487 * @meta: true if bh points to metadata (rather than data) 1488 * @btotal: place to keep count of total blocks freed 1489 * 1490 * We sweep a metadata buffer (provided by the metapath) for blocks we need to 1491 * free, and free them all. However, we do it one rgrp at a time. If this 1492 * block has references to multiple rgrps, we break it into individual 1493 * transactions. This allows other processes to use the rgrps while we're 1494 * focused on a single one, for better concurrency / performance. 1495 * At every transaction boundary, we rewrite the inode into the journal. 1496 * That way the bitmaps are kept consistent with the inode and we can recover 1497 * if we're interrupted by power-outages. 1498 * 1499 * Returns: 0, or return code if an error occurred. 1500 * *btotal has the total number of blocks freed 1501 */ 1502 static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh, 1503 struct buffer_head *bh, __be64 *start, __be64 *end, 1504 bool meta, u32 *btotal) 1505 { 1506 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1507 struct gfs2_rgrpd *rgd; 1508 struct gfs2_trans *tr; 1509 __be64 *p; 1510 int blks_outside_rgrp; 1511 u64 bn, bstart, isize_blks; 1512 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */ 1513 int ret = 0; 1514 bool buf_in_tr = false; /* buffer was added to transaction */ 1515 1516 more_rgrps: 1517 rgd = NULL; 1518 if (gfs2_holder_initialized(rd_gh)) { 1519 rgd = gfs2_glock2rgrp(rd_gh->gh_gl); 1520 gfs2_assert_withdraw(sdp, 1521 gfs2_glock_is_locked_by_me(rd_gh->gh_gl)); 1522 } 1523 blks_outside_rgrp = 0; 1524 bstart = 0; 1525 blen = 0; 1526 1527 for (p = start; p < end; p++) { 1528 if (!*p) 1529 continue; 1530 bn = be64_to_cpu(*p); 1531 1532 if (rgd) { 1533 if (!rgrp_contains_block(rgd, bn)) { 1534 blks_outside_rgrp++; 1535 continue; 1536 } 1537 } else { 1538 rgd = gfs2_blk2rgrpd(sdp, bn, true); 1539 if (unlikely(!rgd)) { 1540 ret = -EIO; 1541 goto out; 1542 } 1543 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 1544 0, rd_gh); 1545 if (ret) 1546 goto out; 1547 1548 /* Must be done with the rgrp glock held: */ 1549 if (gfs2_rs_active(&ip->i_res) && 1550 rgd == ip->i_res.rs_rbm.rgd) 1551 gfs2_rs_deltree(&ip->i_res); 1552 } 1553 1554 /* The size of our transactions will be unknown until we 1555 actually process all the metadata blocks that relate to 1556 the rgrp. So we estimate. We know it can't be more than 1557 the dinode's i_blocks and we don't want to exceed the 1558 journal flush threshold, sd_log_thresh2. */ 1559 if (current->journal_info == NULL) { 1560 unsigned int jblocks_rqsted, revokes; 1561 1562 jblocks_rqsted = rgd->rd_length + RES_DINODE + 1563 RES_INDIRECT; 1564 isize_blks = gfs2_get_inode_blocks(&ip->i_inode); 1565 if (isize_blks > atomic_read(&sdp->sd_log_thresh2)) 1566 jblocks_rqsted += 1567 atomic_read(&sdp->sd_log_thresh2); 1568 else 1569 jblocks_rqsted += isize_blks; 1570 revokes = jblocks_rqsted; 1571 if (meta) 1572 revokes += end - start; 1573 else if (ip->i_depth) 1574 revokes += sdp->sd_inptrs; 1575 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes); 1576 if (ret) 1577 goto out_unlock; 1578 down_write(&ip->i_rw_mutex); 1579 } 1580 /* check if we will exceed the transaction blocks requested */ 1581 tr = current->journal_info; 1582 if (tr->tr_num_buf_new + RES_STATFS + 1583 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) { 1584 /* We set blks_outside_rgrp to ensure the loop will 1585 be repeated for the same rgrp, but with a new 1586 transaction. */ 1587 blks_outside_rgrp++; 1588 /* This next part is tricky. If the buffer was added 1589 to the transaction, we've already set some block 1590 pointers to 0, so we better follow through and free 1591 them, or we will introduce corruption (so break). 1592 This may be impossible, or at least rare, but I 1593 decided to cover the case regardless. 1594 1595 If the buffer was not added to the transaction 1596 (this call), doing so would exceed our transaction 1597 size, so we need to end the transaction and start a 1598 new one (so goto). */ 1599 1600 if (buf_in_tr) 1601 break; 1602 goto out_unlock; 1603 } 1604 1605 gfs2_trans_add_meta(ip->i_gl, bh); 1606 buf_in_tr = true; 1607 *p = 0; 1608 if (bstart + blen == bn) { 1609 blen++; 1610 continue; 1611 } 1612 if (bstart) { 1613 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta); 1614 (*btotal) += blen; 1615 gfs2_add_inode_blocks(&ip->i_inode, -blen); 1616 } 1617 bstart = bn; 1618 blen = 1; 1619 } 1620 if (bstart) { 1621 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta); 1622 (*btotal) += blen; 1623 gfs2_add_inode_blocks(&ip->i_inode, -blen); 1624 } 1625 out_unlock: 1626 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks 1627 outside the rgrp we just processed, 1628 do it all over again. */ 1629 if (current->journal_info) { 1630 struct buffer_head *dibh; 1631 1632 ret = gfs2_meta_inode_buffer(ip, &dibh); 1633 if (ret) 1634 goto out; 1635 1636 /* Every transaction boundary, we rewrite the dinode 1637 to keep its di_blocks current in case of failure. */ 1638 ip->i_inode.i_mtime = ip->i_inode.i_ctime = 1639 current_time(&ip->i_inode); 1640 gfs2_trans_add_meta(ip->i_gl, dibh); 1641 gfs2_dinode_out(ip, dibh->b_data); 1642 brelse(dibh); 1643 up_write(&ip->i_rw_mutex); 1644 gfs2_trans_end(sdp); 1645 } 1646 gfs2_glock_dq_uninit(rd_gh); 1647 cond_resched(); 1648 goto more_rgrps; 1649 } 1650 out: 1651 return ret; 1652 } 1653 1654 static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h) 1655 { 1656 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0]))) 1657 return false; 1658 return true; 1659 } 1660 1661 /** 1662 * find_nonnull_ptr - find a non-null pointer given a metapath and height 1663 * @mp: starting metapath 1664 * @h: desired height to search 1665 * 1666 * Assumes the metapath is valid (with buffers) out to height h. 1667 * Returns: true if a non-null pointer was found in the metapath buffer 1668 * false if all remaining pointers are NULL in the buffer 1669 */ 1670 static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp, 1671 unsigned int h, 1672 __u16 *end_list, unsigned int end_aligned) 1673 { 1674 struct buffer_head *bh = mp->mp_bh[h]; 1675 __be64 *first, *ptr, *end; 1676 1677 first = metaptr1(h, mp); 1678 ptr = first + mp->mp_list[h]; 1679 end = (__be64 *)(bh->b_data + bh->b_size); 1680 if (end_list && mp_eq_to_hgt(mp, end_list, h)) { 1681 bool keep_end = h < end_aligned; 1682 end = first + end_list[h] + keep_end; 1683 } 1684 1685 while (ptr < end) { 1686 if (*ptr) { /* if we have a non-null pointer */ 1687 mp->mp_list[h] = ptr - first; 1688 h++; 1689 if (h < GFS2_MAX_META_HEIGHT) 1690 mp->mp_list[h] = 0; 1691 return true; 1692 } 1693 ptr++; 1694 } 1695 return false; 1696 } 1697 1698 enum dealloc_states { 1699 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */ 1700 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */ 1701 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */ 1702 DEALLOC_DONE = 3, /* process complete */ 1703 }; 1704 1705 static inline void 1706 metapointer_range(struct metapath *mp, int height, 1707 __u16 *start_list, unsigned int start_aligned, 1708 __u16 *end_list, unsigned int end_aligned, 1709 __be64 **start, __be64 **end) 1710 { 1711 struct buffer_head *bh = mp->mp_bh[height]; 1712 __be64 *first; 1713 1714 first = metaptr1(height, mp); 1715 *start = first; 1716 if (mp_eq_to_hgt(mp, start_list, height)) { 1717 bool keep_start = height < start_aligned; 1718 *start = first + start_list[height] + keep_start; 1719 } 1720 *end = (__be64 *)(bh->b_data + bh->b_size); 1721 if (end_list && mp_eq_to_hgt(mp, end_list, height)) { 1722 bool keep_end = height < end_aligned; 1723 *end = first + end_list[height] + keep_end; 1724 } 1725 } 1726 1727 static inline bool walk_done(struct gfs2_sbd *sdp, 1728 struct metapath *mp, int height, 1729 __u16 *end_list, unsigned int end_aligned) 1730 { 1731 __u16 end; 1732 1733 if (end_list) { 1734 bool keep_end = height < end_aligned; 1735 if (!mp_eq_to_hgt(mp, end_list, height)) 1736 return false; 1737 end = end_list[height] + keep_end; 1738 } else 1739 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs; 1740 return mp->mp_list[height] >= end; 1741 } 1742 1743 /** 1744 * punch_hole - deallocate blocks in a file 1745 * @ip: inode to truncate 1746 * @offset: the start of the hole 1747 * @length: the size of the hole (or 0 for truncate) 1748 * 1749 * Punch a hole into a file or truncate a file at a given position. This 1750 * function operates in whole blocks (@offset and @length are rounded 1751 * accordingly); partially filled blocks must be cleared otherwise. 1752 * 1753 * This function works from the bottom up, and from the right to the left. In 1754 * other words, it strips off the highest layer (data) before stripping any of 1755 * the metadata. Doing it this way is best in case the operation is interrupted 1756 * by power failure, etc. The dinode is rewritten in every transaction to 1757 * guarantee integrity. 1758 */ 1759 static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) 1760 { 1761 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1762 u64 maxsize = sdp->sd_heightsize[ip->i_height]; 1763 struct metapath mp = {}; 1764 struct buffer_head *dibh, *bh; 1765 struct gfs2_holder rd_gh; 1766 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; 1767 u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift; 1768 __u16 start_list[GFS2_MAX_META_HEIGHT]; 1769 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL; 1770 unsigned int start_aligned, uninitialized_var(end_aligned); 1771 unsigned int strip_h = ip->i_height - 1; 1772 u32 btotal = 0; 1773 int ret, state; 1774 int mp_h; /* metapath buffers are read in to this height */ 1775 u64 prev_bnr = 0; 1776 __be64 *start, *end; 1777 1778 if (offset >= maxsize) { 1779 /* 1780 * The starting point lies beyond the allocated meta-data; 1781 * there are no blocks do deallocate. 1782 */ 1783 return 0; 1784 } 1785 1786 /* 1787 * The start position of the hole is defined by lblock, start_list, and 1788 * start_aligned. The end position of the hole is defined by lend, 1789 * end_list, and end_aligned. 1790 * 1791 * start_aligned and end_aligned define down to which height the start 1792 * and end positions are aligned to the metadata tree (i.e., the 1793 * position is a multiple of the metadata granularity at the height 1794 * above). This determines at which heights additional meta pointers 1795 * needs to be preserved for the remaining data. 1796 */ 1797 1798 if (length) { 1799 u64 end_offset = offset + length; 1800 u64 lend; 1801 1802 /* 1803 * Clip the end at the maximum file size for the given height: 1804 * that's how far the metadata goes; files bigger than that 1805 * will have additional layers of indirection. 1806 */ 1807 if (end_offset > maxsize) 1808 end_offset = maxsize; 1809 lend = end_offset >> bsize_shift; 1810 1811 if (lblock >= lend) 1812 return 0; 1813 1814 find_metapath(sdp, lend, &mp, ip->i_height); 1815 end_list = __end_list; 1816 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list)); 1817 1818 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) { 1819 if (end_list[mp_h]) 1820 break; 1821 } 1822 end_aligned = mp_h; 1823 } 1824 1825 find_metapath(sdp, lblock, &mp, ip->i_height); 1826 memcpy(start_list, mp.mp_list, sizeof(start_list)); 1827 1828 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) { 1829 if (start_list[mp_h]) 1830 break; 1831 } 1832 start_aligned = mp_h; 1833 1834 ret = gfs2_meta_inode_buffer(ip, &dibh); 1835 if (ret) 1836 return ret; 1837 1838 mp.mp_bh[0] = dibh; 1839 ret = lookup_metapath(ip, &mp); 1840 if (ret) 1841 goto out_metapath; 1842 1843 /* issue read-ahead on metadata */ 1844 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) { 1845 metapointer_range(&mp, mp_h, start_list, start_aligned, 1846 end_list, end_aligned, &start, &end); 1847 gfs2_metapath_ra(ip->i_gl, start, end); 1848 } 1849 1850 if (mp.mp_aheight == ip->i_height) 1851 state = DEALLOC_MP_FULL; /* We have a complete metapath */ 1852 else 1853 state = DEALLOC_FILL_MP; /* deal with partial metapath */ 1854 1855 ret = gfs2_rindex_update(sdp); 1856 if (ret) 1857 goto out_metapath; 1858 1859 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); 1860 if (ret) 1861 goto out_metapath; 1862 gfs2_holder_mark_uninitialized(&rd_gh); 1863 1864 mp_h = strip_h; 1865 1866 while (state != DEALLOC_DONE) { 1867 switch (state) { 1868 /* Truncate a full metapath at the given strip height. 1869 * Note that strip_h == mp_h in order to be in this state. */ 1870 case DEALLOC_MP_FULL: 1871 bh = mp.mp_bh[mp_h]; 1872 gfs2_assert_withdraw(sdp, bh); 1873 if (gfs2_assert_withdraw(sdp, 1874 prev_bnr != bh->b_blocknr)) { 1875 fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u," 1876 "s_h:%u, mp_h:%u\n", 1877 (unsigned long long)ip->i_no_addr, 1878 prev_bnr, ip->i_height, strip_h, mp_h); 1879 } 1880 prev_bnr = bh->b_blocknr; 1881 1882 if (gfs2_metatype_check(sdp, bh, 1883 (mp_h ? GFS2_METATYPE_IN : 1884 GFS2_METATYPE_DI))) { 1885 ret = -EIO; 1886 goto out; 1887 } 1888 1889 /* 1890 * Below, passing end_aligned as 0 gives us the 1891 * metapointer range excluding the end point: the end 1892 * point is the first metapath we must not deallocate! 1893 */ 1894 1895 metapointer_range(&mp, mp_h, start_list, start_aligned, 1896 end_list, 0 /* end_aligned */, 1897 &start, &end); 1898 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h], 1899 start, end, 1900 mp_h != ip->i_height - 1, 1901 &btotal); 1902 1903 /* If we hit an error or just swept dinode buffer, 1904 just exit. */ 1905 if (ret || !mp_h) { 1906 state = DEALLOC_DONE; 1907 break; 1908 } 1909 state = DEALLOC_MP_LOWER; 1910 break; 1911 1912 /* lower the metapath strip height */ 1913 case DEALLOC_MP_LOWER: 1914 /* We're done with the current buffer, so release it, 1915 unless it's the dinode buffer. Then back up to the 1916 previous pointer. */ 1917 if (mp_h) { 1918 brelse(mp.mp_bh[mp_h]); 1919 mp.mp_bh[mp_h] = NULL; 1920 } 1921 /* If we can't get any lower in height, we've stripped 1922 off all we can. Next step is to back up and start 1923 stripping the previous level of metadata. */ 1924 if (mp_h == 0) { 1925 strip_h--; 1926 memcpy(mp.mp_list, start_list, sizeof(start_list)); 1927 mp_h = strip_h; 1928 state = DEALLOC_FILL_MP; 1929 break; 1930 } 1931 mp.mp_list[mp_h] = 0; 1932 mp_h--; /* search one metadata height down */ 1933 mp.mp_list[mp_h]++; 1934 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned)) 1935 break; 1936 /* Here we've found a part of the metapath that is not 1937 * allocated. We need to search at that height for the 1938 * next non-null pointer. */ 1939 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) { 1940 state = DEALLOC_FILL_MP; 1941 mp_h++; 1942 } 1943 /* No more non-null pointers at this height. Back up 1944 to the previous height and try again. */ 1945 break; /* loop around in the same state */ 1946 1947 /* Fill the metapath with buffers to the given height. */ 1948 case DEALLOC_FILL_MP: 1949 /* Fill the buffers out to the current height. */ 1950 ret = fillup_metapath(ip, &mp, mp_h); 1951 if (ret < 0) 1952 goto out; 1953 1954 /* On the first pass, issue read-ahead on metadata. */ 1955 if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) { 1956 unsigned int height = mp.mp_aheight - 1; 1957 1958 /* No read-ahead for data blocks. */ 1959 if (mp.mp_aheight - 1 == strip_h) 1960 height--; 1961 1962 for (; height >= mp.mp_aheight - ret; height--) { 1963 metapointer_range(&mp, height, 1964 start_list, start_aligned, 1965 end_list, end_aligned, 1966 &start, &end); 1967 gfs2_metapath_ra(ip->i_gl, start, end); 1968 } 1969 } 1970 1971 /* If buffers found for the entire strip height */ 1972 if (mp.mp_aheight - 1 == strip_h) { 1973 state = DEALLOC_MP_FULL; 1974 break; 1975 } 1976 if (mp.mp_aheight < ip->i_height) /* We have a partial height */ 1977 mp_h = mp.mp_aheight - 1; 1978 1979 /* If we find a non-null block pointer, crawl a bit 1980 higher up in the metapath and try again, otherwise 1981 we need to look lower for a new starting point. */ 1982 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) 1983 mp_h++; 1984 else 1985 state = DEALLOC_MP_LOWER; 1986 break; 1987 } 1988 } 1989 1990 if (btotal) { 1991 if (current->journal_info == NULL) { 1992 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + 1993 RES_QUOTA, 0); 1994 if (ret) 1995 goto out; 1996 down_write(&ip->i_rw_mutex); 1997 } 1998 gfs2_statfs_change(sdp, 0, +btotal, 0); 1999 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, 2000 ip->i_inode.i_gid); 2001 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); 2002 gfs2_trans_add_meta(ip->i_gl, dibh); 2003 gfs2_dinode_out(ip, dibh->b_data); 2004 up_write(&ip->i_rw_mutex); 2005 gfs2_trans_end(sdp); 2006 } 2007 2008 out: 2009 if (gfs2_holder_initialized(&rd_gh)) 2010 gfs2_glock_dq_uninit(&rd_gh); 2011 if (current->journal_info) { 2012 up_write(&ip->i_rw_mutex); 2013 gfs2_trans_end(sdp); 2014 cond_resched(); 2015 } 2016 gfs2_quota_unhold(ip); 2017 out_metapath: 2018 release_metapath(&mp); 2019 return ret; 2020 } 2021 2022 static int trunc_end(struct gfs2_inode *ip) 2023 { 2024 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2025 struct buffer_head *dibh; 2026 int error; 2027 2028 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 2029 if (error) 2030 return error; 2031 2032 down_write(&ip->i_rw_mutex); 2033 2034 error = gfs2_meta_inode_buffer(ip, &dibh); 2035 if (error) 2036 goto out; 2037 2038 if (!i_size_read(&ip->i_inode)) { 2039 ip->i_height = 0; 2040 ip->i_goal = ip->i_no_addr; 2041 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 2042 gfs2_ordered_del_inode(ip); 2043 } 2044 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); 2045 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; 2046 2047 gfs2_trans_add_meta(ip->i_gl, dibh); 2048 gfs2_dinode_out(ip, dibh->b_data); 2049 brelse(dibh); 2050 2051 out: 2052 up_write(&ip->i_rw_mutex); 2053 gfs2_trans_end(sdp); 2054 return error; 2055 } 2056 2057 /** 2058 * do_shrink - make a file smaller 2059 * @inode: the inode 2060 * @newsize: the size to make the file 2061 * 2062 * Called with an exclusive lock on @inode. The @size must 2063 * be equal to or smaller than the current inode size. 2064 * 2065 * Returns: errno 2066 */ 2067 2068 static int do_shrink(struct inode *inode, u64 newsize) 2069 { 2070 struct gfs2_inode *ip = GFS2_I(inode); 2071 int error; 2072 2073 error = trunc_start(inode, newsize); 2074 if (error < 0) 2075 return error; 2076 if (gfs2_is_stuffed(ip)) 2077 return 0; 2078 2079 error = punch_hole(ip, newsize, 0); 2080 if (error == 0) 2081 error = trunc_end(ip); 2082 2083 return error; 2084 } 2085 2086 void gfs2_trim_blocks(struct inode *inode) 2087 { 2088 int ret; 2089 2090 ret = do_shrink(inode, inode->i_size); 2091 WARN_ON(ret != 0); 2092 } 2093 2094 /** 2095 * do_grow - Touch and update inode size 2096 * @inode: The inode 2097 * @size: The new size 2098 * 2099 * This function updates the timestamps on the inode and 2100 * may also increase the size of the inode. This function 2101 * must not be called with @size any smaller than the current 2102 * inode size. 2103 * 2104 * Although it is not strictly required to unstuff files here, 2105 * earlier versions of GFS2 have a bug in the stuffed file reading 2106 * code which will result in a buffer overrun if the size is larger 2107 * than the max stuffed file size. In order to prevent this from 2108 * occurring, such files are unstuffed, but in other cases we can 2109 * just update the inode size directly. 2110 * 2111 * Returns: 0 on success, or -ve on error 2112 */ 2113 2114 static int do_grow(struct inode *inode, u64 size) 2115 { 2116 struct gfs2_inode *ip = GFS2_I(inode); 2117 struct gfs2_sbd *sdp = GFS2_SB(inode); 2118 struct gfs2_alloc_parms ap = { .target = 1, }; 2119 struct buffer_head *dibh; 2120 int error; 2121 int unstuff = 0; 2122 2123 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) { 2124 error = gfs2_quota_lock_check(ip, &ap); 2125 if (error) 2126 return error; 2127 2128 error = gfs2_inplace_reserve(ip, &ap); 2129 if (error) 2130 goto do_grow_qunlock; 2131 unstuff = 1; 2132 } 2133 2134 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT + 2135 (unstuff && 2136 gfs2_is_jdata(ip) ? RES_JDATA : 0) + 2137 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ? 2138 0 : RES_QUOTA), 0); 2139 if (error) 2140 goto do_grow_release; 2141 2142 if (unstuff) { 2143 error = gfs2_unstuff_dinode(ip, NULL); 2144 if (error) 2145 goto do_end_trans; 2146 } 2147 2148 error = gfs2_meta_inode_buffer(ip, &dibh); 2149 if (error) 2150 goto do_end_trans; 2151 2152 i_size_write(inode, size); 2153 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); 2154 gfs2_trans_add_meta(ip->i_gl, dibh); 2155 gfs2_dinode_out(ip, dibh->b_data); 2156 brelse(dibh); 2157 2158 do_end_trans: 2159 gfs2_trans_end(sdp); 2160 do_grow_release: 2161 if (unstuff) { 2162 gfs2_inplace_release(ip); 2163 do_grow_qunlock: 2164 gfs2_quota_unlock(ip); 2165 } 2166 return error; 2167 } 2168 2169 /** 2170 * gfs2_setattr_size - make a file a given size 2171 * @inode: the inode 2172 * @newsize: the size to make the file 2173 * 2174 * The file size can grow, shrink, or stay the same size. This 2175 * is called holding i_rwsem and an exclusive glock on the inode 2176 * in question. 2177 * 2178 * Returns: errno 2179 */ 2180 2181 int gfs2_setattr_size(struct inode *inode, u64 newsize) 2182 { 2183 struct gfs2_inode *ip = GFS2_I(inode); 2184 int ret; 2185 2186 BUG_ON(!S_ISREG(inode->i_mode)); 2187 2188 ret = inode_newsize_ok(inode, newsize); 2189 if (ret) 2190 return ret; 2191 2192 inode_dio_wait(inode); 2193 2194 ret = gfs2_rsqa_alloc(ip); 2195 if (ret) 2196 goto out; 2197 2198 if (newsize >= inode->i_size) { 2199 ret = do_grow(inode, newsize); 2200 goto out; 2201 } 2202 2203 ret = do_shrink(inode, newsize); 2204 out: 2205 gfs2_rsqa_delete(ip, NULL); 2206 return ret; 2207 } 2208 2209 int gfs2_truncatei_resume(struct gfs2_inode *ip) 2210 { 2211 int error; 2212 error = punch_hole(ip, i_size_read(&ip->i_inode), 0); 2213 if (!error) 2214 error = trunc_end(ip); 2215 return error; 2216 } 2217 2218 int gfs2_file_dealloc(struct gfs2_inode *ip) 2219 { 2220 return punch_hole(ip, 0, 0); 2221 } 2222 2223 /** 2224 * gfs2_free_journal_extents - Free cached journal bmap info 2225 * @jd: The journal 2226 * 2227 */ 2228 2229 void gfs2_free_journal_extents(struct gfs2_jdesc *jd) 2230 { 2231 struct gfs2_journal_extent *jext; 2232 2233 while(!list_empty(&jd->extent_list)) { 2234 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list); 2235 list_del(&jext->list); 2236 kfree(jext); 2237 } 2238 } 2239 2240 /** 2241 * gfs2_add_jextent - Add or merge a new extent to extent cache 2242 * @jd: The journal descriptor 2243 * @lblock: The logical block at start of new extent 2244 * @dblock: The physical block at start of new extent 2245 * @blocks: Size of extent in fs blocks 2246 * 2247 * Returns: 0 on success or -ENOMEM 2248 */ 2249 2250 static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks) 2251 { 2252 struct gfs2_journal_extent *jext; 2253 2254 if (!list_empty(&jd->extent_list)) { 2255 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list); 2256 if ((jext->dblock + jext->blocks) == dblock) { 2257 jext->blocks += blocks; 2258 return 0; 2259 } 2260 } 2261 2262 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS); 2263 if (jext == NULL) 2264 return -ENOMEM; 2265 jext->dblock = dblock; 2266 jext->lblock = lblock; 2267 jext->blocks = blocks; 2268 list_add_tail(&jext->list, &jd->extent_list); 2269 jd->nr_extents++; 2270 return 0; 2271 } 2272 2273 /** 2274 * gfs2_map_journal_extents - Cache journal bmap info 2275 * @sdp: The super block 2276 * @jd: The journal to map 2277 * 2278 * Create a reusable "extent" mapping from all logical 2279 * blocks to all physical blocks for the given journal. This will save 2280 * us time when writing journal blocks. Most journals will have only one 2281 * extent that maps all their logical blocks. That's because gfs2.mkfs 2282 * arranges the journal blocks sequentially to maximize performance. 2283 * So the extent would map the first block for the entire file length. 2284 * However, gfs2_jadd can happen while file activity is happening, so 2285 * those journals may not be sequential. Less likely is the case where 2286 * the users created their own journals by mounting the metafs and 2287 * laying it out. But it's still possible. These journals might have 2288 * several extents. 2289 * 2290 * Returns: 0 on success, or error on failure 2291 */ 2292 2293 int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) 2294 { 2295 u64 lblock = 0; 2296 u64 lblock_stop; 2297 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 2298 struct buffer_head bh; 2299 unsigned int shift = sdp->sd_sb.sb_bsize_shift; 2300 u64 size; 2301 int rc; 2302 ktime_t start, end; 2303 2304 start = ktime_get(); 2305 lblock_stop = i_size_read(jd->jd_inode) >> shift; 2306 size = (lblock_stop - lblock) << shift; 2307 jd->nr_extents = 0; 2308 WARN_ON(!list_empty(&jd->extent_list)); 2309 2310 do { 2311 bh.b_state = 0; 2312 bh.b_blocknr = 0; 2313 bh.b_size = size; 2314 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0); 2315 if (rc || !buffer_mapped(&bh)) 2316 goto fail; 2317 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift); 2318 if (rc) 2319 goto fail; 2320 size -= bh.b_size; 2321 lblock += (bh.b_size >> ip->i_inode.i_blkbits); 2322 } while(size > 0); 2323 2324 end = ktime_get(); 2325 fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid, 2326 jd->nr_extents, ktime_ms_delta(end, start)); 2327 return 0; 2328 2329 fail: 2330 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n", 2331 rc, jd->jd_jid, 2332 (unsigned long long)(i_size_read(jd->jd_inode) - size), 2333 jd->nr_extents); 2334 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n", 2335 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr, 2336 bh.b_state, (unsigned long long)bh.b_size); 2337 gfs2_free_journal_extents(jd); 2338 return rc; 2339 } 2340 2341 /** 2342 * gfs2_write_alloc_required - figure out if a write will require an allocation 2343 * @ip: the file being written to 2344 * @offset: the offset to write to 2345 * @len: the number of bytes being written 2346 * 2347 * Returns: 1 if an alloc is required, 0 otherwise 2348 */ 2349 2350 int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, 2351 unsigned int len) 2352 { 2353 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2354 struct buffer_head bh; 2355 unsigned int shift; 2356 u64 lblock, lblock_stop, size; 2357 u64 end_of_file; 2358 2359 if (!len) 2360 return 0; 2361 2362 if (gfs2_is_stuffed(ip)) { 2363 if (offset + len > gfs2_max_stuffed_size(ip)) 2364 return 1; 2365 return 0; 2366 } 2367 2368 shift = sdp->sd_sb.sb_bsize_shift; 2369 BUG_ON(gfs2_is_dir(ip)); 2370 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; 2371 lblock = offset >> shift; 2372 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; 2373 if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex)) 2374 return 1; 2375 2376 size = (lblock_stop - lblock) << shift; 2377 do { 2378 bh.b_state = 0; 2379 bh.b_size = size; 2380 gfs2_block_map(&ip->i_inode, lblock, &bh, 0); 2381 if (!buffer_mapped(&bh)) 2382 return 1; 2383 size -= bh.b_size; 2384 lblock += (bh.b_size >> ip->i_inode.i_blkbits); 2385 } while(size > 0); 2386 2387 return 0; 2388 } 2389 2390 static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length) 2391 { 2392 struct gfs2_inode *ip = GFS2_I(inode); 2393 struct buffer_head *dibh; 2394 int error; 2395 2396 if (offset >= inode->i_size) 2397 return 0; 2398 if (offset + length > inode->i_size) 2399 length = inode->i_size - offset; 2400 2401 error = gfs2_meta_inode_buffer(ip, &dibh); 2402 if (error) 2403 return error; 2404 gfs2_trans_add_meta(ip->i_gl, dibh); 2405 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0, 2406 length); 2407 brelse(dibh); 2408 return 0; 2409 } 2410 2411 static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset, 2412 loff_t length) 2413 { 2414 struct gfs2_sbd *sdp = GFS2_SB(inode); 2415 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; 2416 int error; 2417 2418 while (length) { 2419 struct gfs2_trans *tr; 2420 loff_t chunk; 2421 unsigned int offs; 2422 2423 chunk = length; 2424 if (chunk > max_chunk) 2425 chunk = max_chunk; 2426 2427 offs = offset & ~PAGE_MASK; 2428 if (offs && chunk > PAGE_SIZE) 2429 chunk = offs + ((chunk - offs) & PAGE_MASK); 2430 2431 truncate_pagecache_range(inode, offset, chunk); 2432 offset += chunk; 2433 length -= chunk; 2434 2435 tr = current->journal_info; 2436 if (!test_bit(TR_TOUCHED, &tr->tr_flags)) 2437 continue; 2438 2439 gfs2_trans_end(sdp); 2440 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); 2441 if (error) 2442 return error; 2443 } 2444 return 0; 2445 } 2446 2447 int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length) 2448 { 2449 struct inode *inode = file_inode(file); 2450 struct gfs2_inode *ip = GFS2_I(inode); 2451 struct gfs2_sbd *sdp = GFS2_SB(inode); 2452 int error; 2453 2454 if (gfs2_is_jdata(ip)) 2455 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA, 2456 GFS2_JTRUNC_REVOKES); 2457 else 2458 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 2459 if (error) 2460 return error; 2461 2462 if (gfs2_is_stuffed(ip)) { 2463 error = stuffed_zero_range(inode, offset, length); 2464 if (error) 2465 goto out; 2466 } else { 2467 unsigned int start_off, end_len, blocksize; 2468 2469 blocksize = i_blocksize(inode); 2470 start_off = offset & (blocksize - 1); 2471 end_len = (offset + length) & (blocksize - 1); 2472 if (start_off) { 2473 unsigned int len = length; 2474 if (length > blocksize - start_off) 2475 len = blocksize - start_off; 2476 error = gfs2_block_zero_range(inode, offset, len); 2477 if (error) 2478 goto out; 2479 if (start_off + length < blocksize) 2480 end_len = 0; 2481 } 2482 if (end_len) { 2483 error = gfs2_block_zero_range(inode, 2484 offset + length - end_len, end_len); 2485 if (error) 2486 goto out; 2487 } 2488 } 2489 2490 if (gfs2_is_jdata(ip)) { 2491 BUG_ON(!current->journal_info); 2492 gfs2_journaled_truncate_range(inode, offset, length); 2493 } else 2494 truncate_pagecache_range(inode, offset, offset + length - 1); 2495 2496 file_update_time(file); 2497 mark_inode_dirty(inode); 2498 2499 if (current->journal_info) 2500 gfs2_trans_end(sdp); 2501 2502 if (!gfs2_is_stuffed(ip)) 2503 error = punch_hole(ip, offset, length); 2504 2505 out: 2506 if (current->journal_info) 2507 gfs2_trans_end(sdp); 2508 return error; 2509 } 2510