1 /* 2 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com 3 * Written by Alex Tomas <alex@clusterfs.com> 4 * 5 * Architecture independence: 6 * Copyright (c) 2005, Bull S.A. 7 * Written by Pierre Peiffer <pierre.peiffer@bull.net> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public Licens 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- 21 */ 22 23 /* 24 * Extents support for EXT4 25 * 26 * TODO: 27 * - ext4*_error() should be used in some situations 28 * - analyze all BUG()/BUG_ON(), use -EIO where appropriate 29 * - smart tree reduction 30 */ 31 32 #include <linux/module.h> 33 #include <linux/fs.h> 34 #include <linux/time.h> 35 #include <linux/jbd2.h> 36 #include <linux/highuid.h> 37 #include <linux/pagemap.h> 38 #include <linux/quotaops.h> 39 #include <linux/string.h> 40 #include <linux/slab.h> 41 #include <linux/falloc.h> 42 #include <asm/uaccess.h> 43 #include <linux/fiemap.h> 44 #include "ext4_jbd2.h" 45 #include "ext4_extents.h" 46 47 #include <trace/events/ext4.h> 48 49 static int ext4_ext_truncate_extend_restart(handle_t *handle, 50 struct inode *inode, 51 int needed) 52 { 53 int err; 54 55 if (!ext4_handle_valid(handle)) 56 return 0; 57 if (handle->h_buffer_credits > needed) 58 return 0; 59 err = ext4_journal_extend(handle, needed); 60 if (err <= 0) 61 return err; 62 err = ext4_truncate_restart_trans(handle, inode, needed); 63 if (err == 0) 64 err = -EAGAIN; 65 66 return err; 67 } 68 69 /* 70 * could return: 71 * - EROFS 72 * - ENOMEM 73 */ 74 static int ext4_ext_get_access(handle_t *handle, struct inode *inode, 75 struct ext4_ext_path *path) 76 { 77 if (path->p_bh) { 78 /* path points to block */ 79 return ext4_journal_get_write_access(handle, path->p_bh); 80 } 81 /* path points to leaf/index in inode body */ 82 /* we use in-core data, no need to protect them */ 83 return 0; 84 } 85 86 /* 87 * could return: 88 * - EROFS 89 * - ENOMEM 90 * - EIO 91 */ 92 static int ext4_ext_dirty(handle_t *handle, struct inode *inode, 93 struct ext4_ext_path *path) 94 { 95 int err; 96 if (path->p_bh) { 97 /* path points to block */ 98 err = ext4_handle_dirty_metadata(handle, inode, path->p_bh); 99 } else { 100 /* path points to leaf/index in inode body */ 101 err = ext4_mark_inode_dirty(handle, inode); 102 } 103 return err; 104 } 105 106 static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, 107 struct ext4_ext_path *path, 108 ext4_lblk_t block) 109 { 110 struct ext4_inode_info *ei = EXT4_I(inode); 111 ext4_fsblk_t bg_start; 112 ext4_fsblk_t last_block; 113 ext4_grpblk_t colour; 114 ext4_group_t block_group; 115 int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); 116 int depth; 117 118 if (path) { 119 struct ext4_extent *ex; 120 depth = path->p_depth; 121 122 /* 123 * Try to predict block placement assuming that we are 124 * filling in a file which will eventually be 125 * non-sparse --- i.e., in the case of libbfd writing 126 * an ELF object sections out-of-order but in a way 127 * the eventually results in a contiguous object or 128 * executable file, or some database extending a table 129 * space file. However, this is actually somewhat 130 * non-ideal if we are writing a sparse file such as 131 * qemu or KVM writing a raw image file that is going 132 * to stay fairly sparse, since it will end up 133 * fragmenting the file system's free space. Maybe we 134 * should have some hueristics or some way to allow 135 * userspace to pass a hint to file system, 136 * especially if the latter case turns out to be 137 * common. 138 */ 139 ex = path[depth].p_ext; 140 if (ex) { 141 ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex); 142 ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block); 143 144 if (block > ext_block) 145 return ext_pblk + (block - ext_block); 146 else 147 return ext_pblk - (ext_block - block); 148 } 149 150 /* it looks like index is empty; 151 * try to find starting block from index itself */ 152 if (path[depth].p_bh) 153 return path[depth].p_bh->b_blocknr; 154 } 155 156 /* OK. use inode's group */ 157 block_group = ei->i_block_group; 158 if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { 159 /* 160 * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 161 * block groups per flexgroup, reserve the first block 162 * group for directories and special files. Regular 163 * files will start at the second block group. This 164 * tends to speed up directory access and improves 165 * fsck times. 166 */ 167 block_group &= ~(flex_size-1); 168 if (S_ISREG(inode->i_mode)) 169 block_group++; 170 } 171 bg_start = ext4_group_first_block_no(inode->i_sb, block_group); 172 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; 173 174 /* 175 * If we are doing delayed allocation, we don't need take 176 * colour into account. 177 */ 178 if (test_opt(inode->i_sb, DELALLOC)) 179 return bg_start; 180 181 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) 182 colour = (current->pid % 16) * 183 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); 184 else 185 colour = (current->pid % 16) * ((last_block - bg_start) / 16); 186 return bg_start + colour + block; 187 } 188 189 /* 190 * Allocation for a meta data block 191 */ 192 static ext4_fsblk_t 193 ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, 194 struct ext4_ext_path *path, 195 struct ext4_extent *ex, int *err) 196 { 197 ext4_fsblk_t goal, newblock; 198 199 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); 200 newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err); 201 return newblock; 202 } 203 204 static inline int ext4_ext_space_block(struct inode *inode, int check) 205 { 206 int size; 207 208 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 209 / sizeof(struct ext4_extent); 210 if (!check) { 211 #ifdef AGGRESSIVE_TEST 212 if (size > 6) 213 size = 6; 214 #endif 215 } 216 return size; 217 } 218 219 static inline int ext4_ext_space_block_idx(struct inode *inode, int check) 220 { 221 int size; 222 223 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 224 / sizeof(struct ext4_extent_idx); 225 if (!check) { 226 #ifdef AGGRESSIVE_TEST 227 if (size > 5) 228 size = 5; 229 #endif 230 } 231 return size; 232 } 233 234 static inline int ext4_ext_space_root(struct inode *inode, int check) 235 { 236 int size; 237 238 size = sizeof(EXT4_I(inode)->i_data); 239 size -= sizeof(struct ext4_extent_header); 240 size /= sizeof(struct ext4_extent); 241 if (!check) { 242 #ifdef AGGRESSIVE_TEST 243 if (size > 3) 244 size = 3; 245 #endif 246 } 247 return size; 248 } 249 250 static inline int ext4_ext_space_root_idx(struct inode *inode, int check) 251 { 252 int size; 253 254 size = sizeof(EXT4_I(inode)->i_data); 255 size -= sizeof(struct ext4_extent_header); 256 size /= sizeof(struct ext4_extent_idx); 257 if (!check) { 258 #ifdef AGGRESSIVE_TEST 259 if (size > 4) 260 size = 4; 261 #endif 262 } 263 return size; 264 } 265 266 /* 267 * Calculate the number of metadata blocks needed 268 * to allocate @blocks 269 * Worse case is one block per extent 270 */ 271 int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) 272 { 273 struct ext4_inode_info *ei = EXT4_I(inode); 274 int idxs, num = 0; 275 276 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 277 / sizeof(struct ext4_extent_idx)); 278 279 /* 280 * If the new delayed allocation block is contiguous with the 281 * previous da block, it can share index blocks with the 282 * previous block, so we only need to allocate a new index 283 * block every idxs leaf blocks. At ldxs**2 blocks, we need 284 * an additional index block, and at ldxs**3 blocks, yet 285 * another index blocks. 286 */ 287 if (ei->i_da_metadata_calc_len && 288 ei->i_da_metadata_calc_last_lblock+1 == lblock) { 289 if ((ei->i_da_metadata_calc_len % idxs) == 0) 290 num++; 291 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) 292 num++; 293 if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) { 294 num++; 295 ei->i_da_metadata_calc_len = 0; 296 } else 297 ei->i_da_metadata_calc_len++; 298 ei->i_da_metadata_calc_last_lblock++; 299 return num; 300 } 301 302 /* 303 * In the worst case we need a new set of index blocks at 304 * every level of the inode's extent tree. 305 */ 306 ei->i_da_metadata_calc_len = 1; 307 ei->i_da_metadata_calc_last_lblock = lblock; 308 return ext_depth(inode) + 1; 309 } 310 311 static int 312 ext4_ext_max_entries(struct inode *inode, int depth) 313 { 314 int max; 315 316 if (depth == ext_depth(inode)) { 317 if (depth == 0) 318 max = ext4_ext_space_root(inode, 1); 319 else 320 max = ext4_ext_space_root_idx(inode, 1); 321 } else { 322 if (depth == 0) 323 max = ext4_ext_space_block(inode, 1); 324 else 325 max = ext4_ext_space_block_idx(inode, 1); 326 } 327 328 return max; 329 } 330 331 static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) 332 { 333 ext4_fsblk_t block = ext4_ext_pblock(ext); 334 int len = ext4_ext_get_actual_len(ext); 335 336 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); 337 } 338 339 static int ext4_valid_extent_idx(struct inode *inode, 340 struct ext4_extent_idx *ext_idx) 341 { 342 ext4_fsblk_t block = ext4_idx_pblock(ext_idx); 343 344 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); 345 } 346 347 static int ext4_valid_extent_entries(struct inode *inode, 348 struct ext4_extent_header *eh, 349 int depth) 350 { 351 struct ext4_extent *ext; 352 struct ext4_extent_idx *ext_idx; 353 unsigned short entries; 354 if (eh->eh_entries == 0) 355 return 1; 356 357 entries = le16_to_cpu(eh->eh_entries); 358 359 if (depth == 0) { 360 /* leaf entries */ 361 ext = EXT_FIRST_EXTENT(eh); 362 while (entries) { 363 if (!ext4_valid_extent(inode, ext)) 364 return 0; 365 ext++; 366 entries--; 367 } 368 } else { 369 ext_idx = EXT_FIRST_INDEX(eh); 370 while (entries) { 371 if (!ext4_valid_extent_idx(inode, ext_idx)) 372 return 0; 373 ext_idx++; 374 entries--; 375 } 376 } 377 return 1; 378 } 379 380 static int __ext4_ext_check(const char *function, unsigned int line, 381 struct inode *inode, struct ext4_extent_header *eh, 382 int depth) 383 { 384 const char *error_msg; 385 int max = 0; 386 387 if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) { 388 error_msg = "invalid magic"; 389 goto corrupted; 390 } 391 if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) { 392 error_msg = "unexpected eh_depth"; 393 goto corrupted; 394 } 395 if (unlikely(eh->eh_max == 0)) { 396 error_msg = "invalid eh_max"; 397 goto corrupted; 398 } 399 max = ext4_ext_max_entries(inode, depth); 400 if (unlikely(le16_to_cpu(eh->eh_max) > max)) { 401 error_msg = "too large eh_max"; 402 goto corrupted; 403 } 404 if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) { 405 error_msg = "invalid eh_entries"; 406 goto corrupted; 407 } 408 if (!ext4_valid_extent_entries(inode, eh, depth)) { 409 error_msg = "invalid extent entries"; 410 goto corrupted; 411 } 412 return 0; 413 414 corrupted: 415 ext4_error_inode(inode, function, line, 0, 416 "bad header/extent: %s - magic %x, " 417 "entries %u, max %u(%u), depth %u(%u)", 418 error_msg, le16_to_cpu(eh->eh_magic), 419 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), 420 max, le16_to_cpu(eh->eh_depth), depth); 421 422 return -EIO; 423 } 424 425 #define ext4_ext_check(inode, eh, depth) \ 426 __ext4_ext_check(__func__, __LINE__, inode, eh, depth) 427 428 int ext4_ext_check_inode(struct inode *inode) 429 { 430 return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode)); 431 } 432 433 #ifdef EXT_DEBUG 434 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) 435 { 436 int k, l = path->p_depth; 437 438 ext_debug("path:"); 439 for (k = 0; k <= l; k++, path++) { 440 if (path->p_idx) { 441 ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), 442 ext4_idx_pblock(path->p_idx)); 443 } else if (path->p_ext) { 444 ext_debug(" %d:[%d]%d:%llu ", 445 le32_to_cpu(path->p_ext->ee_block), 446 ext4_ext_is_uninitialized(path->p_ext), 447 ext4_ext_get_actual_len(path->p_ext), 448 ext4_ext_pblock(path->p_ext)); 449 } else 450 ext_debug(" []"); 451 } 452 ext_debug("\n"); 453 } 454 455 static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) 456 { 457 int depth = ext_depth(inode); 458 struct ext4_extent_header *eh; 459 struct ext4_extent *ex; 460 int i; 461 462 if (!path) 463 return; 464 465 eh = path[depth].p_hdr; 466 ex = EXT_FIRST_EXTENT(eh); 467 468 ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino); 469 470 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { 471 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), 472 ext4_ext_is_uninitialized(ex), 473 ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); 474 } 475 ext_debug("\n"); 476 } 477 #else 478 #define ext4_ext_show_path(inode, path) 479 #define ext4_ext_show_leaf(inode, path) 480 #endif 481 482 void ext4_ext_drop_refs(struct ext4_ext_path *path) 483 { 484 int depth = path->p_depth; 485 int i; 486 487 for (i = 0; i <= depth; i++, path++) 488 if (path->p_bh) { 489 brelse(path->p_bh); 490 path->p_bh = NULL; 491 } 492 } 493 494 /* 495 * ext4_ext_binsearch_idx: 496 * binary search for the closest index of the given block 497 * the header must be checked before calling this 498 */ 499 static void 500 ext4_ext_binsearch_idx(struct inode *inode, 501 struct ext4_ext_path *path, ext4_lblk_t block) 502 { 503 struct ext4_extent_header *eh = path->p_hdr; 504 struct ext4_extent_idx *r, *l, *m; 505 506 507 ext_debug("binsearch for %u(idx): ", block); 508 509 l = EXT_FIRST_INDEX(eh) + 1; 510 r = EXT_LAST_INDEX(eh); 511 while (l <= r) { 512 m = l + (r - l) / 2; 513 if (block < le32_to_cpu(m->ei_block)) 514 r = m - 1; 515 else 516 l = m + 1; 517 ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block), 518 m, le32_to_cpu(m->ei_block), 519 r, le32_to_cpu(r->ei_block)); 520 } 521 522 path->p_idx = l - 1; 523 ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), 524 ext4_idx_pblock(path->p_idx)); 525 526 #ifdef CHECK_BINSEARCH 527 { 528 struct ext4_extent_idx *chix, *ix; 529 int k; 530 531 chix = ix = EXT_FIRST_INDEX(eh); 532 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { 533 if (k != 0 && 534 le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { 535 printk(KERN_DEBUG "k=%d, ix=0x%p, " 536 "first=0x%p\n", k, 537 ix, EXT_FIRST_INDEX(eh)); 538 printk(KERN_DEBUG "%u <= %u\n", 539 le32_to_cpu(ix->ei_block), 540 le32_to_cpu(ix[-1].ei_block)); 541 } 542 BUG_ON(k && le32_to_cpu(ix->ei_block) 543 <= le32_to_cpu(ix[-1].ei_block)); 544 if (block < le32_to_cpu(ix->ei_block)) 545 break; 546 chix = ix; 547 } 548 BUG_ON(chix != path->p_idx); 549 } 550 #endif 551 552 } 553 554 /* 555 * ext4_ext_binsearch: 556 * binary search for closest extent of the given block 557 * the header must be checked before calling this 558 */ 559 static void 560 ext4_ext_binsearch(struct inode *inode, 561 struct ext4_ext_path *path, ext4_lblk_t block) 562 { 563 struct ext4_extent_header *eh = path->p_hdr; 564 struct ext4_extent *r, *l, *m; 565 566 if (eh->eh_entries == 0) { 567 /* 568 * this leaf is empty: 569 * we get such a leaf in split/add case 570 */ 571 return; 572 } 573 574 ext_debug("binsearch for %u: ", block); 575 576 l = EXT_FIRST_EXTENT(eh) + 1; 577 r = EXT_LAST_EXTENT(eh); 578 579 while (l <= r) { 580 m = l + (r - l) / 2; 581 if (block < le32_to_cpu(m->ee_block)) 582 r = m - 1; 583 else 584 l = m + 1; 585 ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block), 586 m, le32_to_cpu(m->ee_block), 587 r, le32_to_cpu(r->ee_block)); 588 } 589 590 path->p_ext = l - 1; 591 ext_debug(" -> %d:%llu:[%d]%d ", 592 le32_to_cpu(path->p_ext->ee_block), 593 ext4_ext_pblock(path->p_ext), 594 ext4_ext_is_uninitialized(path->p_ext), 595 ext4_ext_get_actual_len(path->p_ext)); 596 597 #ifdef CHECK_BINSEARCH 598 { 599 struct ext4_extent *chex, *ex; 600 int k; 601 602 chex = ex = EXT_FIRST_EXTENT(eh); 603 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) { 604 BUG_ON(k && le32_to_cpu(ex->ee_block) 605 <= le32_to_cpu(ex[-1].ee_block)); 606 if (block < le32_to_cpu(ex->ee_block)) 607 break; 608 chex = ex; 609 } 610 BUG_ON(chex != path->p_ext); 611 } 612 #endif 613 614 } 615 616 int ext4_ext_tree_init(handle_t *handle, struct inode *inode) 617 { 618 struct ext4_extent_header *eh; 619 620 eh = ext_inode_hdr(inode); 621 eh->eh_depth = 0; 622 eh->eh_entries = 0; 623 eh->eh_magic = EXT4_EXT_MAGIC; 624 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); 625 ext4_mark_inode_dirty(handle, inode); 626 ext4_ext_invalidate_cache(inode); 627 return 0; 628 } 629 630 struct ext4_ext_path * 631 ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, 632 struct ext4_ext_path *path) 633 { 634 struct ext4_extent_header *eh; 635 struct buffer_head *bh; 636 short int depth, i, ppos = 0, alloc = 0; 637 638 eh = ext_inode_hdr(inode); 639 depth = ext_depth(inode); 640 641 /* account possible depth increase */ 642 if (!path) { 643 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), 644 GFP_NOFS); 645 if (!path) 646 return ERR_PTR(-ENOMEM); 647 alloc = 1; 648 } 649 path[0].p_hdr = eh; 650 path[0].p_bh = NULL; 651 652 i = depth; 653 /* walk through the tree */ 654 while (i) { 655 int need_to_validate = 0; 656 657 ext_debug("depth %d: num %d, max %d\n", 658 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 659 660 ext4_ext_binsearch_idx(inode, path + ppos, block); 661 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); 662 path[ppos].p_depth = i; 663 path[ppos].p_ext = NULL; 664 665 bh = sb_getblk(inode->i_sb, path[ppos].p_block); 666 if (unlikely(!bh)) 667 goto err; 668 if (!bh_uptodate_or_lock(bh)) { 669 trace_ext4_ext_load_extent(inode, block, 670 path[ppos].p_block); 671 if (bh_submit_read(bh) < 0) { 672 put_bh(bh); 673 goto err; 674 } 675 /* validate the extent entries */ 676 need_to_validate = 1; 677 } 678 eh = ext_block_hdr(bh); 679 ppos++; 680 if (unlikely(ppos > depth)) { 681 put_bh(bh); 682 EXT4_ERROR_INODE(inode, 683 "ppos %d > depth %d", ppos, depth); 684 goto err; 685 } 686 path[ppos].p_bh = bh; 687 path[ppos].p_hdr = eh; 688 i--; 689 690 if (need_to_validate && ext4_ext_check(inode, eh, i)) 691 goto err; 692 } 693 694 path[ppos].p_depth = i; 695 path[ppos].p_ext = NULL; 696 path[ppos].p_idx = NULL; 697 698 /* find extent */ 699 ext4_ext_binsearch(inode, path + ppos, block); 700 /* if not an empty leaf */ 701 if (path[ppos].p_ext) 702 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); 703 704 ext4_ext_show_path(inode, path); 705 706 return path; 707 708 err: 709 ext4_ext_drop_refs(path); 710 if (alloc) 711 kfree(path); 712 return ERR_PTR(-EIO); 713 } 714 715 /* 716 * ext4_ext_insert_index: 717 * insert new index [@logical;@ptr] into the block at @curp; 718 * check where to insert: before @curp or after @curp 719 */ 720 static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, 721 struct ext4_ext_path *curp, 722 int logical, ext4_fsblk_t ptr) 723 { 724 struct ext4_extent_idx *ix; 725 int len, err; 726 727 err = ext4_ext_get_access(handle, inode, curp); 728 if (err) 729 return err; 730 731 if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) { 732 EXT4_ERROR_INODE(inode, 733 "logical %d == ei_block %d!", 734 logical, le32_to_cpu(curp->p_idx->ei_block)); 735 return -EIO; 736 } 737 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; 738 if (logical > le32_to_cpu(curp->p_idx->ei_block)) { 739 /* insert after */ 740 if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { 741 len = (len - 1) * sizeof(struct ext4_extent_idx); 742 len = len < 0 ? 0 : len; 743 ext_debug("insert new index %d after: %llu. " 744 "move %d from 0x%p to 0x%p\n", 745 logical, ptr, len, 746 (curp->p_idx + 1), (curp->p_idx + 2)); 747 memmove(curp->p_idx + 2, curp->p_idx + 1, len); 748 } 749 ix = curp->p_idx + 1; 750 } else { 751 /* insert before */ 752 len = len * sizeof(struct ext4_extent_idx); 753 len = len < 0 ? 0 : len; 754 ext_debug("insert new index %d before: %llu. " 755 "move %d from 0x%p to 0x%p\n", 756 logical, ptr, len, 757 curp->p_idx, (curp->p_idx + 1)); 758 memmove(curp->p_idx + 1, curp->p_idx, len); 759 ix = curp->p_idx; 760 } 761 762 ix->ei_block = cpu_to_le32(logical); 763 ext4_idx_store_pblock(ix, ptr); 764 le16_add_cpu(&curp->p_hdr->eh_entries, 1); 765 766 if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) 767 > le16_to_cpu(curp->p_hdr->eh_max))) { 768 EXT4_ERROR_INODE(inode, 769 "logical %d == ei_block %d!", 770 logical, le32_to_cpu(curp->p_idx->ei_block)); 771 return -EIO; 772 } 773 if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { 774 EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); 775 return -EIO; 776 } 777 778 err = ext4_ext_dirty(handle, inode, curp); 779 ext4_std_error(inode->i_sb, err); 780 781 return err; 782 } 783 784 /* 785 * ext4_ext_split: 786 * inserts new subtree into the path, using free index entry 787 * at depth @at: 788 * - allocates all needed blocks (new leaf and all intermediate index blocks) 789 * - makes decision where to split 790 * - moves remaining extents and index entries (right to the split point) 791 * into the newly allocated blocks 792 * - initializes subtree 793 */ 794 static int ext4_ext_split(handle_t *handle, struct inode *inode, 795 struct ext4_ext_path *path, 796 struct ext4_extent *newext, int at) 797 { 798 struct buffer_head *bh = NULL; 799 int depth = ext_depth(inode); 800 struct ext4_extent_header *neh; 801 struct ext4_extent_idx *fidx; 802 struct ext4_extent *ex; 803 int i = at, k, m, a; 804 ext4_fsblk_t newblock, oldblock; 805 __le32 border; 806 ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ 807 int err = 0; 808 809 /* make decision: where to split? */ 810 /* FIXME: now decision is simplest: at current extent */ 811 812 /* if current leaf will be split, then we should use 813 * border from split point */ 814 if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) { 815 EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!"); 816 return -EIO; 817 } 818 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { 819 border = path[depth].p_ext[1].ee_block; 820 ext_debug("leaf will be split." 821 " next leaf starts at %d\n", 822 le32_to_cpu(border)); 823 } else { 824 border = newext->ee_block; 825 ext_debug("leaf will be added." 826 " next leaf starts at %d\n", 827 le32_to_cpu(border)); 828 } 829 830 /* 831 * If error occurs, then we break processing 832 * and mark filesystem read-only. index won't 833 * be inserted and tree will be in consistent 834 * state. Next mount will repair buffers too. 835 */ 836 837 /* 838 * Get array to track all allocated blocks. 839 * We need this to handle errors and free blocks 840 * upon them. 841 */ 842 ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS); 843 if (!ablocks) 844 return -ENOMEM; 845 846 /* allocate all needed blocks */ 847 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); 848 for (a = 0; a < depth - at; a++) { 849 newblock = ext4_ext_new_meta_block(handle, inode, path, 850 newext, &err); 851 if (newblock == 0) 852 goto cleanup; 853 ablocks[a] = newblock; 854 } 855 856 /* initialize new leaf */ 857 newblock = ablocks[--a]; 858 if (unlikely(newblock == 0)) { 859 EXT4_ERROR_INODE(inode, "newblock == 0!"); 860 err = -EIO; 861 goto cleanup; 862 } 863 bh = sb_getblk(inode->i_sb, newblock); 864 if (!bh) { 865 err = -EIO; 866 goto cleanup; 867 } 868 lock_buffer(bh); 869 870 err = ext4_journal_get_create_access(handle, bh); 871 if (err) 872 goto cleanup; 873 874 neh = ext_block_hdr(bh); 875 neh->eh_entries = 0; 876 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); 877 neh->eh_magic = EXT4_EXT_MAGIC; 878 neh->eh_depth = 0; 879 ex = EXT_FIRST_EXTENT(neh); 880 881 /* move remainder of path[depth] to the new leaf */ 882 if (unlikely(path[depth].p_hdr->eh_entries != 883 path[depth].p_hdr->eh_max)) { 884 EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!", 885 path[depth].p_hdr->eh_entries, 886 path[depth].p_hdr->eh_max); 887 err = -EIO; 888 goto cleanup; 889 } 890 /* start copy from next extent */ 891 /* TODO: we could do it by single memmove */ 892 m = 0; 893 path[depth].p_ext++; 894 while (path[depth].p_ext <= 895 EXT_MAX_EXTENT(path[depth].p_hdr)) { 896 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", 897 le32_to_cpu(path[depth].p_ext->ee_block), 898 ext4_ext_pblock(path[depth].p_ext), 899 ext4_ext_is_uninitialized(path[depth].p_ext), 900 ext4_ext_get_actual_len(path[depth].p_ext), 901 newblock); 902 /*memmove(ex++, path[depth].p_ext++, 903 sizeof(struct ext4_extent)); 904 neh->eh_entries++;*/ 905 path[depth].p_ext++; 906 m++; 907 } 908 if (m) { 909 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); 910 le16_add_cpu(&neh->eh_entries, m); 911 } 912 913 set_buffer_uptodate(bh); 914 unlock_buffer(bh); 915 916 err = ext4_handle_dirty_metadata(handle, inode, bh); 917 if (err) 918 goto cleanup; 919 brelse(bh); 920 bh = NULL; 921 922 /* correct old leaf */ 923 if (m) { 924 err = ext4_ext_get_access(handle, inode, path + depth); 925 if (err) 926 goto cleanup; 927 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m); 928 err = ext4_ext_dirty(handle, inode, path + depth); 929 if (err) 930 goto cleanup; 931 932 } 933 934 /* create intermediate indexes */ 935 k = depth - at - 1; 936 if (unlikely(k < 0)) { 937 EXT4_ERROR_INODE(inode, "k %d < 0!", k); 938 err = -EIO; 939 goto cleanup; 940 } 941 if (k) 942 ext_debug("create %d intermediate indices\n", k); 943 /* insert new index into current index block */ 944 /* current depth stored in i var */ 945 i = depth - 1; 946 while (k--) { 947 oldblock = newblock; 948 newblock = ablocks[--a]; 949 bh = sb_getblk(inode->i_sb, newblock); 950 if (!bh) { 951 err = -EIO; 952 goto cleanup; 953 } 954 lock_buffer(bh); 955 956 err = ext4_journal_get_create_access(handle, bh); 957 if (err) 958 goto cleanup; 959 960 neh = ext_block_hdr(bh); 961 neh->eh_entries = cpu_to_le16(1); 962 neh->eh_magic = EXT4_EXT_MAGIC; 963 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); 964 neh->eh_depth = cpu_to_le16(depth - i); 965 fidx = EXT_FIRST_INDEX(neh); 966 fidx->ei_block = border; 967 ext4_idx_store_pblock(fidx, oldblock); 968 969 ext_debug("int.index at %d (block %llu): %u -> %llu\n", 970 i, newblock, le32_to_cpu(border), oldblock); 971 /* copy indexes */ 972 m = 0; 973 path[i].p_idx++; 974 975 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, 976 EXT_MAX_INDEX(path[i].p_hdr)); 977 if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != 978 EXT_LAST_INDEX(path[i].p_hdr))) { 979 EXT4_ERROR_INODE(inode, 980 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!", 981 le32_to_cpu(path[i].p_ext->ee_block)); 982 err = -EIO; 983 goto cleanup; 984 } 985 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { 986 ext_debug("%d: move %d:%llu in new index %llu\n", i, 987 le32_to_cpu(path[i].p_idx->ei_block), 988 ext4_idx_pblock(path[i].p_idx), 989 newblock); 990 /*memmove(++fidx, path[i].p_idx++, 991 sizeof(struct ext4_extent_idx)); 992 neh->eh_entries++; 993 BUG_ON(neh->eh_entries > neh->eh_max);*/ 994 path[i].p_idx++; 995 m++; 996 } 997 if (m) { 998 memmove(++fidx, path[i].p_idx - m, 999 sizeof(struct ext4_extent_idx) * m); 1000 le16_add_cpu(&neh->eh_entries, m); 1001 } 1002 set_buffer_uptodate(bh); 1003 unlock_buffer(bh); 1004 1005 err = ext4_handle_dirty_metadata(handle, inode, bh); 1006 if (err) 1007 goto cleanup; 1008 brelse(bh); 1009 bh = NULL; 1010 1011 /* correct old index */ 1012 if (m) { 1013 err = ext4_ext_get_access(handle, inode, path + i); 1014 if (err) 1015 goto cleanup; 1016 le16_add_cpu(&path[i].p_hdr->eh_entries, -m); 1017 err = ext4_ext_dirty(handle, inode, path + i); 1018 if (err) 1019 goto cleanup; 1020 } 1021 1022 i--; 1023 } 1024 1025 /* insert new index */ 1026 err = ext4_ext_insert_index(handle, inode, path + at, 1027 le32_to_cpu(border), newblock); 1028 1029 cleanup: 1030 if (bh) { 1031 if (buffer_locked(bh)) 1032 unlock_buffer(bh); 1033 brelse(bh); 1034 } 1035 1036 if (err) { 1037 /* free all allocated blocks in error case */ 1038 for (i = 0; i < depth; i++) { 1039 if (!ablocks[i]) 1040 continue; 1041 ext4_free_blocks(handle, inode, NULL, ablocks[i], 1, 1042 EXT4_FREE_BLOCKS_METADATA); 1043 } 1044 } 1045 kfree(ablocks); 1046 1047 return err; 1048 } 1049 1050 /* 1051 * ext4_ext_grow_indepth: 1052 * implements tree growing procedure: 1053 * - allocates new block 1054 * - moves top-level data (index block or leaf) into the new block 1055 * - initializes new top-level, creating index that points to the 1056 * just created block 1057 */ 1058 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, 1059 struct ext4_ext_path *path, 1060 struct ext4_extent *newext) 1061 { 1062 struct ext4_ext_path *curp = path; 1063 struct ext4_extent_header *neh; 1064 struct buffer_head *bh; 1065 ext4_fsblk_t newblock; 1066 int err = 0; 1067 1068 newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err); 1069 if (newblock == 0) 1070 return err; 1071 1072 bh = sb_getblk(inode->i_sb, newblock); 1073 if (!bh) { 1074 err = -EIO; 1075 ext4_std_error(inode->i_sb, err); 1076 return err; 1077 } 1078 lock_buffer(bh); 1079 1080 err = ext4_journal_get_create_access(handle, bh); 1081 if (err) { 1082 unlock_buffer(bh); 1083 goto out; 1084 } 1085 1086 /* move top-level index/leaf into new block */ 1087 memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data)); 1088 1089 /* set size of new block */ 1090 neh = ext_block_hdr(bh); 1091 /* old root could have indexes or leaves 1092 * so calculate e_max right way */ 1093 if (ext_depth(inode)) 1094 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); 1095 else 1096 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); 1097 neh->eh_magic = EXT4_EXT_MAGIC; 1098 set_buffer_uptodate(bh); 1099 unlock_buffer(bh); 1100 1101 err = ext4_handle_dirty_metadata(handle, inode, bh); 1102 if (err) 1103 goto out; 1104 1105 /* create index in new top-level index: num,max,pointer */ 1106 err = ext4_ext_get_access(handle, inode, curp); 1107 if (err) 1108 goto out; 1109 1110 curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; 1111 curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); 1112 curp->p_hdr->eh_entries = cpu_to_le16(1); 1113 curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); 1114 1115 if (path[0].p_hdr->eh_depth) 1116 curp->p_idx->ei_block = 1117 EXT_FIRST_INDEX(path[0].p_hdr)->ei_block; 1118 else 1119 curp->p_idx->ei_block = 1120 EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; 1121 ext4_idx_store_pblock(curp->p_idx, newblock); 1122 1123 neh = ext_inode_hdr(inode); 1124 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", 1125 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), 1126 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1127 ext4_idx_pblock(EXT_FIRST_INDEX(neh))); 1128 1129 neh->eh_depth = cpu_to_le16(path->p_depth + 1); 1130 err = ext4_ext_dirty(handle, inode, curp); 1131 out: 1132 brelse(bh); 1133 1134 return err; 1135 } 1136 1137 /* 1138 * ext4_ext_create_new_leaf: 1139 * finds empty index and adds new leaf. 1140 * if no free index is found, then it requests in-depth growing. 1141 */ 1142 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, 1143 struct ext4_ext_path *path, 1144 struct ext4_extent *newext) 1145 { 1146 struct ext4_ext_path *curp; 1147 int depth, i, err = 0; 1148 1149 repeat: 1150 i = depth = ext_depth(inode); 1151 1152 /* walk up to the tree and look for free index entry */ 1153 curp = path + depth; 1154 while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { 1155 i--; 1156 curp--; 1157 } 1158 1159 /* we use already allocated block for index block, 1160 * so subsequent data blocks should be contiguous */ 1161 if (EXT_HAS_FREE_INDEX(curp)) { 1162 /* if we found index with free entry, then use that 1163 * entry: create all needed subtree and add new leaf */ 1164 err = ext4_ext_split(handle, inode, path, newext, i); 1165 if (err) 1166 goto out; 1167 1168 /* refill path */ 1169 ext4_ext_drop_refs(path); 1170 path = ext4_ext_find_extent(inode, 1171 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1172 path); 1173 if (IS_ERR(path)) 1174 err = PTR_ERR(path); 1175 } else { 1176 /* tree is full, time to grow in depth */ 1177 err = ext4_ext_grow_indepth(handle, inode, path, newext); 1178 if (err) 1179 goto out; 1180 1181 /* refill path */ 1182 ext4_ext_drop_refs(path); 1183 path = ext4_ext_find_extent(inode, 1184 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1185 path); 1186 if (IS_ERR(path)) { 1187 err = PTR_ERR(path); 1188 goto out; 1189 } 1190 1191 /* 1192 * only first (depth 0 -> 1) produces free space; 1193 * in all other cases we have to split the grown tree 1194 */ 1195 depth = ext_depth(inode); 1196 if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { 1197 /* now we need to split */ 1198 goto repeat; 1199 } 1200 } 1201 1202 out: 1203 return err; 1204 } 1205 1206 /* 1207 * search the closest allocated block to the left for *logical 1208 * and returns it at @logical + it's physical address at @phys 1209 * if *logical is the smallest allocated block, the function 1210 * returns 0 at @phys 1211 * return value contains 0 (success) or error code 1212 */ 1213 static int ext4_ext_search_left(struct inode *inode, 1214 struct ext4_ext_path *path, 1215 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1216 { 1217 struct ext4_extent_idx *ix; 1218 struct ext4_extent *ex; 1219 int depth, ee_len; 1220 1221 if (unlikely(path == NULL)) { 1222 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); 1223 return -EIO; 1224 } 1225 depth = path->p_depth; 1226 *phys = 0; 1227 1228 if (depth == 0 && path->p_ext == NULL) 1229 return 0; 1230 1231 /* usually extent in the path covers blocks smaller 1232 * then *logical, but it can be that extent is the 1233 * first one in the file */ 1234 1235 ex = path[depth].p_ext; 1236 ee_len = ext4_ext_get_actual_len(ex); 1237 if (*logical < le32_to_cpu(ex->ee_block)) { 1238 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { 1239 EXT4_ERROR_INODE(inode, 1240 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!", 1241 *logical, le32_to_cpu(ex->ee_block)); 1242 return -EIO; 1243 } 1244 while (--depth >= 0) { 1245 ix = path[depth].p_idx; 1246 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { 1247 EXT4_ERROR_INODE(inode, 1248 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", 1249 ix != NULL ? ix->ei_block : 0, 1250 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? 1251 EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0, 1252 depth); 1253 return -EIO; 1254 } 1255 } 1256 return 0; 1257 } 1258 1259 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { 1260 EXT4_ERROR_INODE(inode, 1261 "logical %d < ee_block %d + ee_len %d!", 1262 *logical, le32_to_cpu(ex->ee_block), ee_len); 1263 return -EIO; 1264 } 1265 1266 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; 1267 *phys = ext4_ext_pblock(ex) + ee_len - 1; 1268 return 0; 1269 } 1270 1271 /* 1272 * search the closest allocated block to the right for *logical 1273 * and returns it at @logical + it's physical address at @phys 1274 * if *logical is the smallest allocated block, the function 1275 * returns 0 at @phys 1276 * return value contains 0 (success) or error code 1277 */ 1278 static int ext4_ext_search_right(struct inode *inode, 1279 struct ext4_ext_path *path, 1280 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1281 { 1282 struct buffer_head *bh = NULL; 1283 struct ext4_extent_header *eh; 1284 struct ext4_extent_idx *ix; 1285 struct ext4_extent *ex; 1286 ext4_fsblk_t block; 1287 int depth; /* Note, NOT eh_depth; depth from top of tree */ 1288 int ee_len; 1289 1290 if (unlikely(path == NULL)) { 1291 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); 1292 return -EIO; 1293 } 1294 depth = path->p_depth; 1295 *phys = 0; 1296 1297 if (depth == 0 && path->p_ext == NULL) 1298 return 0; 1299 1300 /* usually extent in the path covers blocks smaller 1301 * then *logical, but it can be that extent is the 1302 * first one in the file */ 1303 1304 ex = path[depth].p_ext; 1305 ee_len = ext4_ext_get_actual_len(ex); 1306 if (*logical < le32_to_cpu(ex->ee_block)) { 1307 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { 1308 EXT4_ERROR_INODE(inode, 1309 "first_extent(path[%d].p_hdr) != ex", 1310 depth); 1311 return -EIO; 1312 } 1313 while (--depth >= 0) { 1314 ix = path[depth].p_idx; 1315 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { 1316 EXT4_ERROR_INODE(inode, 1317 "ix != EXT_FIRST_INDEX *logical %d!", 1318 *logical); 1319 return -EIO; 1320 } 1321 } 1322 *logical = le32_to_cpu(ex->ee_block); 1323 *phys = ext4_ext_pblock(ex); 1324 return 0; 1325 } 1326 1327 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { 1328 EXT4_ERROR_INODE(inode, 1329 "logical %d < ee_block %d + ee_len %d!", 1330 *logical, le32_to_cpu(ex->ee_block), ee_len); 1331 return -EIO; 1332 } 1333 1334 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { 1335 /* next allocated block in this leaf */ 1336 ex++; 1337 *logical = le32_to_cpu(ex->ee_block); 1338 *phys = ext4_ext_pblock(ex); 1339 return 0; 1340 } 1341 1342 /* go up and search for index to the right */ 1343 while (--depth >= 0) { 1344 ix = path[depth].p_idx; 1345 if (ix != EXT_LAST_INDEX(path[depth].p_hdr)) 1346 goto got_index; 1347 } 1348 1349 /* we've gone up to the root and found no index to the right */ 1350 return 0; 1351 1352 got_index: 1353 /* we've found index to the right, let's 1354 * follow it and find the closest allocated 1355 * block to the right */ 1356 ix++; 1357 block = ext4_idx_pblock(ix); 1358 while (++depth < path->p_depth) { 1359 bh = sb_bread(inode->i_sb, block); 1360 if (bh == NULL) 1361 return -EIO; 1362 eh = ext_block_hdr(bh); 1363 /* subtract from p_depth to get proper eh_depth */ 1364 if (ext4_ext_check(inode, eh, path->p_depth - depth)) { 1365 put_bh(bh); 1366 return -EIO; 1367 } 1368 ix = EXT_FIRST_INDEX(eh); 1369 block = ext4_idx_pblock(ix); 1370 put_bh(bh); 1371 } 1372 1373 bh = sb_bread(inode->i_sb, block); 1374 if (bh == NULL) 1375 return -EIO; 1376 eh = ext_block_hdr(bh); 1377 if (ext4_ext_check(inode, eh, path->p_depth - depth)) { 1378 put_bh(bh); 1379 return -EIO; 1380 } 1381 ex = EXT_FIRST_EXTENT(eh); 1382 *logical = le32_to_cpu(ex->ee_block); 1383 *phys = ext4_ext_pblock(ex); 1384 put_bh(bh); 1385 return 0; 1386 } 1387 1388 /* 1389 * ext4_ext_next_allocated_block: 1390 * returns allocated block in subsequent extent or EXT_MAX_BLOCK. 1391 * NOTE: it considers block number from index entry as 1392 * allocated block. Thus, index entries have to be consistent 1393 * with leaves. 1394 */ 1395 static ext4_lblk_t 1396 ext4_ext_next_allocated_block(struct ext4_ext_path *path) 1397 { 1398 int depth; 1399 1400 BUG_ON(path == NULL); 1401 depth = path->p_depth; 1402 1403 if (depth == 0 && path->p_ext == NULL) 1404 return EXT_MAX_BLOCK; 1405 1406 while (depth >= 0) { 1407 if (depth == path->p_depth) { 1408 /* leaf */ 1409 if (path[depth].p_ext != 1410 EXT_LAST_EXTENT(path[depth].p_hdr)) 1411 return le32_to_cpu(path[depth].p_ext[1].ee_block); 1412 } else { 1413 /* index */ 1414 if (path[depth].p_idx != 1415 EXT_LAST_INDEX(path[depth].p_hdr)) 1416 return le32_to_cpu(path[depth].p_idx[1].ei_block); 1417 } 1418 depth--; 1419 } 1420 1421 return EXT_MAX_BLOCK; 1422 } 1423 1424 /* 1425 * ext4_ext_next_leaf_block: 1426 * returns first allocated block from next leaf or EXT_MAX_BLOCK 1427 */ 1428 static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, 1429 struct ext4_ext_path *path) 1430 { 1431 int depth; 1432 1433 BUG_ON(path == NULL); 1434 depth = path->p_depth; 1435 1436 /* zero-tree has no leaf blocks at all */ 1437 if (depth == 0) 1438 return EXT_MAX_BLOCK; 1439 1440 /* go to index block */ 1441 depth--; 1442 1443 while (depth >= 0) { 1444 if (path[depth].p_idx != 1445 EXT_LAST_INDEX(path[depth].p_hdr)) 1446 return (ext4_lblk_t) 1447 le32_to_cpu(path[depth].p_idx[1].ei_block); 1448 depth--; 1449 } 1450 1451 return EXT_MAX_BLOCK; 1452 } 1453 1454 /* 1455 * ext4_ext_correct_indexes: 1456 * if leaf gets modified and modified extent is first in the leaf, 1457 * then we have to correct all indexes above. 1458 * TODO: do we need to correct tree in all cases? 1459 */ 1460 static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, 1461 struct ext4_ext_path *path) 1462 { 1463 struct ext4_extent_header *eh; 1464 int depth = ext_depth(inode); 1465 struct ext4_extent *ex; 1466 __le32 border; 1467 int k, err = 0; 1468 1469 eh = path[depth].p_hdr; 1470 ex = path[depth].p_ext; 1471 1472 if (unlikely(ex == NULL || eh == NULL)) { 1473 EXT4_ERROR_INODE(inode, 1474 "ex %p == NULL or eh %p == NULL", ex, eh); 1475 return -EIO; 1476 } 1477 1478 if (depth == 0) { 1479 /* there is no tree at all */ 1480 return 0; 1481 } 1482 1483 if (ex != EXT_FIRST_EXTENT(eh)) { 1484 /* we correct tree if first leaf got modified only */ 1485 return 0; 1486 } 1487 1488 /* 1489 * TODO: we need correction if border is smaller than current one 1490 */ 1491 k = depth - 1; 1492 border = path[depth].p_ext->ee_block; 1493 err = ext4_ext_get_access(handle, inode, path + k); 1494 if (err) 1495 return err; 1496 path[k].p_idx->ei_block = border; 1497 err = ext4_ext_dirty(handle, inode, path + k); 1498 if (err) 1499 return err; 1500 1501 while (k--) { 1502 /* change all left-side indexes */ 1503 if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) 1504 break; 1505 err = ext4_ext_get_access(handle, inode, path + k); 1506 if (err) 1507 break; 1508 path[k].p_idx->ei_block = border; 1509 err = ext4_ext_dirty(handle, inode, path + k); 1510 if (err) 1511 break; 1512 } 1513 1514 return err; 1515 } 1516 1517 int 1518 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, 1519 struct ext4_extent *ex2) 1520 { 1521 unsigned short ext1_ee_len, ext2_ee_len, max_len; 1522 1523 /* 1524 * Make sure that either both extents are uninitialized, or 1525 * both are _not_. 1526 */ 1527 if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) 1528 return 0; 1529 1530 if (ext4_ext_is_uninitialized(ex1)) 1531 max_len = EXT_UNINIT_MAX_LEN; 1532 else 1533 max_len = EXT_INIT_MAX_LEN; 1534 1535 ext1_ee_len = ext4_ext_get_actual_len(ex1); 1536 ext2_ee_len = ext4_ext_get_actual_len(ex2); 1537 1538 if (le32_to_cpu(ex1->ee_block) + ext1_ee_len != 1539 le32_to_cpu(ex2->ee_block)) 1540 return 0; 1541 1542 /* 1543 * To allow future support for preallocated extents to be added 1544 * as an RO_COMPAT feature, refuse to merge to extents if 1545 * this can result in the top bit of ee_len being set. 1546 */ 1547 if (ext1_ee_len + ext2_ee_len > max_len) 1548 return 0; 1549 #ifdef AGGRESSIVE_TEST 1550 if (ext1_ee_len >= 4) 1551 return 0; 1552 #endif 1553 1554 if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2)) 1555 return 1; 1556 return 0; 1557 } 1558 1559 /* 1560 * This function tries to merge the "ex" extent to the next extent in the tree. 1561 * It always tries to merge towards right. If you want to merge towards 1562 * left, pass "ex - 1" as argument instead of "ex". 1563 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns 1564 * 1 if they got merged. 1565 */ 1566 static int ext4_ext_try_to_merge_right(struct inode *inode, 1567 struct ext4_ext_path *path, 1568 struct ext4_extent *ex) 1569 { 1570 struct ext4_extent_header *eh; 1571 unsigned int depth, len; 1572 int merge_done = 0; 1573 int uninitialized = 0; 1574 1575 depth = ext_depth(inode); 1576 BUG_ON(path[depth].p_hdr == NULL); 1577 eh = path[depth].p_hdr; 1578 1579 while (ex < EXT_LAST_EXTENT(eh)) { 1580 if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) 1581 break; 1582 /* merge with next extent! */ 1583 if (ext4_ext_is_uninitialized(ex)) 1584 uninitialized = 1; 1585 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1586 + ext4_ext_get_actual_len(ex + 1)); 1587 if (uninitialized) 1588 ext4_ext_mark_uninitialized(ex); 1589 1590 if (ex + 1 < EXT_LAST_EXTENT(eh)) { 1591 len = (EXT_LAST_EXTENT(eh) - ex - 1) 1592 * sizeof(struct ext4_extent); 1593 memmove(ex + 1, ex + 2, len); 1594 } 1595 le16_add_cpu(&eh->eh_entries, -1); 1596 merge_done = 1; 1597 WARN_ON(eh->eh_entries == 0); 1598 if (!eh->eh_entries) 1599 EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!"); 1600 } 1601 1602 return merge_done; 1603 } 1604 1605 /* 1606 * This function tries to merge the @ex extent to neighbours in the tree. 1607 * return 1 if merge left else 0. 1608 */ 1609 static int ext4_ext_try_to_merge(struct inode *inode, 1610 struct ext4_ext_path *path, 1611 struct ext4_extent *ex) { 1612 struct ext4_extent_header *eh; 1613 unsigned int depth; 1614 int merge_done = 0; 1615 int ret = 0; 1616 1617 depth = ext_depth(inode); 1618 BUG_ON(path[depth].p_hdr == NULL); 1619 eh = path[depth].p_hdr; 1620 1621 if (ex > EXT_FIRST_EXTENT(eh)) 1622 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); 1623 1624 if (!merge_done) 1625 ret = ext4_ext_try_to_merge_right(inode, path, ex); 1626 1627 return ret; 1628 } 1629 1630 /* 1631 * check if a portion of the "newext" extent overlaps with an 1632 * existing extent. 1633 * 1634 * If there is an overlap discovered, it updates the length of the newext 1635 * such that there will be no overlap, and then returns 1. 1636 * If there is no overlap found, it returns 0. 1637 */ 1638 static unsigned int ext4_ext_check_overlap(struct inode *inode, 1639 struct ext4_extent *newext, 1640 struct ext4_ext_path *path) 1641 { 1642 ext4_lblk_t b1, b2; 1643 unsigned int depth, len1; 1644 unsigned int ret = 0; 1645 1646 b1 = le32_to_cpu(newext->ee_block); 1647 len1 = ext4_ext_get_actual_len(newext); 1648 depth = ext_depth(inode); 1649 if (!path[depth].p_ext) 1650 goto out; 1651 b2 = le32_to_cpu(path[depth].p_ext->ee_block); 1652 1653 /* 1654 * get the next allocated block if the extent in the path 1655 * is before the requested block(s) 1656 */ 1657 if (b2 < b1) { 1658 b2 = ext4_ext_next_allocated_block(path); 1659 if (b2 == EXT_MAX_BLOCK) 1660 goto out; 1661 } 1662 1663 /* check for wrap through zero on extent logical start block*/ 1664 if (b1 + len1 < b1) { 1665 len1 = EXT_MAX_BLOCK - b1; 1666 newext->ee_len = cpu_to_le16(len1); 1667 ret = 1; 1668 } 1669 1670 /* check for overlap */ 1671 if (b1 + len1 > b2) { 1672 newext->ee_len = cpu_to_le16(b2 - b1); 1673 ret = 1; 1674 } 1675 out: 1676 return ret; 1677 } 1678 1679 /* 1680 * ext4_ext_insert_extent: 1681 * tries to merge requsted extent into the existing extent or 1682 * inserts requested extent as new one into the tree, 1683 * creating new leaf in the no-space case. 1684 */ 1685 int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, 1686 struct ext4_ext_path *path, 1687 struct ext4_extent *newext, int flag) 1688 { 1689 struct ext4_extent_header *eh; 1690 struct ext4_extent *ex, *fex; 1691 struct ext4_extent *nearex; /* nearest extent */ 1692 struct ext4_ext_path *npath = NULL; 1693 int depth, len, err; 1694 ext4_lblk_t next; 1695 unsigned uninitialized = 0; 1696 1697 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { 1698 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); 1699 return -EIO; 1700 } 1701 depth = ext_depth(inode); 1702 ex = path[depth].p_ext; 1703 if (unlikely(path[depth].p_hdr == NULL)) { 1704 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); 1705 return -EIO; 1706 } 1707 1708 /* try to insert block into found extent and return */ 1709 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) 1710 && ext4_can_extents_be_merged(inode, ex, newext)) { 1711 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", 1712 ext4_ext_is_uninitialized(newext), 1713 ext4_ext_get_actual_len(newext), 1714 le32_to_cpu(ex->ee_block), 1715 ext4_ext_is_uninitialized(ex), 1716 ext4_ext_get_actual_len(ex), 1717 ext4_ext_pblock(ex)); 1718 err = ext4_ext_get_access(handle, inode, path + depth); 1719 if (err) 1720 return err; 1721 1722 /* 1723 * ext4_can_extents_be_merged should have checked that either 1724 * both extents are uninitialized, or both aren't. Thus we 1725 * need to check only one of them here. 1726 */ 1727 if (ext4_ext_is_uninitialized(ex)) 1728 uninitialized = 1; 1729 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1730 + ext4_ext_get_actual_len(newext)); 1731 if (uninitialized) 1732 ext4_ext_mark_uninitialized(ex); 1733 eh = path[depth].p_hdr; 1734 nearex = ex; 1735 goto merge; 1736 } 1737 1738 repeat: 1739 depth = ext_depth(inode); 1740 eh = path[depth].p_hdr; 1741 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) 1742 goto has_space; 1743 1744 /* probably next leaf has space for us? */ 1745 fex = EXT_LAST_EXTENT(eh); 1746 next = ext4_ext_next_leaf_block(inode, path); 1747 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) 1748 && next != EXT_MAX_BLOCK) { 1749 ext_debug("next leaf block - %d\n", next); 1750 BUG_ON(npath != NULL); 1751 npath = ext4_ext_find_extent(inode, next, NULL); 1752 if (IS_ERR(npath)) 1753 return PTR_ERR(npath); 1754 BUG_ON(npath->p_depth != path->p_depth); 1755 eh = npath[depth].p_hdr; 1756 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { 1757 ext_debug("next leaf isn't full(%d)\n", 1758 le16_to_cpu(eh->eh_entries)); 1759 path = npath; 1760 goto repeat; 1761 } 1762 ext_debug("next leaf has no free space(%d,%d)\n", 1763 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 1764 } 1765 1766 /* 1767 * There is no free space in the found leaf. 1768 * We're gonna add a new leaf in the tree. 1769 */ 1770 err = ext4_ext_create_new_leaf(handle, inode, path, newext); 1771 if (err) 1772 goto cleanup; 1773 depth = ext_depth(inode); 1774 eh = path[depth].p_hdr; 1775 1776 has_space: 1777 nearex = path[depth].p_ext; 1778 1779 err = ext4_ext_get_access(handle, inode, path + depth); 1780 if (err) 1781 goto cleanup; 1782 1783 if (!nearex) { 1784 /* there is no extent in this leaf, create first one */ 1785 ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", 1786 le32_to_cpu(newext->ee_block), 1787 ext4_ext_pblock(newext), 1788 ext4_ext_is_uninitialized(newext), 1789 ext4_ext_get_actual_len(newext)); 1790 path[depth].p_ext = EXT_FIRST_EXTENT(eh); 1791 } else if (le32_to_cpu(newext->ee_block) 1792 > le32_to_cpu(nearex->ee_block)) { 1793 /* BUG_ON(newext->ee_block == nearex->ee_block); */ 1794 if (nearex != EXT_LAST_EXTENT(eh)) { 1795 len = EXT_MAX_EXTENT(eh) - nearex; 1796 len = (len - 1) * sizeof(struct ext4_extent); 1797 len = len < 0 ? 0 : len; 1798 ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " 1799 "move %d from 0x%p to 0x%p\n", 1800 le32_to_cpu(newext->ee_block), 1801 ext4_ext_pblock(newext), 1802 ext4_ext_is_uninitialized(newext), 1803 ext4_ext_get_actual_len(newext), 1804 nearex, len, nearex + 1, nearex + 2); 1805 memmove(nearex + 2, nearex + 1, len); 1806 } 1807 path[depth].p_ext = nearex + 1; 1808 } else { 1809 BUG_ON(newext->ee_block == nearex->ee_block); 1810 len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); 1811 len = len < 0 ? 0 : len; 1812 ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " 1813 "move %d from 0x%p to 0x%p\n", 1814 le32_to_cpu(newext->ee_block), 1815 ext4_ext_pblock(newext), 1816 ext4_ext_is_uninitialized(newext), 1817 ext4_ext_get_actual_len(newext), 1818 nearex, len, nearex + 1, nearex + 2); 1819 memmove(nearex + 1, nearex, len); 1820 path[depth].p_ext = nearex; 1821 } 1822 1823 le16_add_cpu(&eh->eh_entries, 1); 1824 nearex = path[depth].p_ext; 1825 nearex->ee_block = newext->ee_block; 1826 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); 1827 nearex->ee_len = newext->ee_len; 1828 1829 merge: 1830 /* try to merge extents to the right */ 1831 if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) 1832 ext4_ext_try_to_merge(inode, path, nearex); 1833 1834 /* try to merge extents to the left */ 1835 1836 /* time to correct all indexes above */ 1837 err = ext4_ext_correct_indexes(handle, inode, path); 1838 if (err) 1839 goto cleanup; 1840 1841 err = ext4_ext_dirty(handle, inode, path + depth); 1842 1843 cleanup: 1844 if (npath) { 1845 ext4_ext_drop_refs(npath); 1846 kfree(npath); 1847 } 1848 ext4_ext_invalidate_cache(inode); 1849 return err; 1850 } 1851 1852 static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, 1853 ext4_lblk_t num, ext_prepare_callback func, 1854 void *cbdata) 1855 { 1856 struct ext4_ext_path *path = NULL; 1857 struct ext4_ext_cache cbex; 1858 struct ext4_extent *ex; 1859 ext4_lblk_t next, start = 0, end = 0; 1860 ext4_lblk_t last = block + num; 1861 int depth, exists, err = 0; 1862 1863 BUG_ON(func == NULL); 1864 BUG_ON(inode == NULL); 1865 1866 while (block < last && block != EXT_MAX_BLOCK) { 1867 num = last - block; 1868 /* find extent for this block */ 1869 down_read(&EXT4_I(inode)->i_data_sem); 1870 path = ext4_ext_find_extent(inode, block, path); 1871 up_read(&EXT4_I(inode)->i_data_sem); 1872 if (IS_ERR(path)) { 1873 err = PTR_ERR(path); 1874 path = NULL; 1875 break; 1876 } 1877 1878 depth = ext_depth(inode); 1879 if (unlikely(path[depth].p_hdr == NULL)) { 1880 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); 1881 err = -EIO; 1882 break; 1883 } 1884 ex = path[depth].p_ext; 1885 next = ext4_ext_next_allocated_block(path); 1886 1887 exists = 0; 1888 if (!ex) { 1889 /* there is no extent yet, so try to allocate 1890 * all requested space */ 1891 start = block; 1892 end = block + num; 1893 } else if (le32_to_cpu(ex->ee_block) > block) { 1894 /* need to allocate space before found extent */ 1895 start = block; 1896 end = le32_to_cpu(ex->ee_block); 1897 if (block + num < end) 1898 end = block + num; 1899 } else if (block >= le32_to_cpu(ex->ee_block) 1900 + ext4_ext_get_actual_len(ex)) { 1901 /* need to allocate space after found extent */ 1902 start = block; 1903 end = block + num; 1904 if (end >= next) 1905 end = next; 1906 } else if (block >= le32_to_cpu(ex->ee_block)) { 1907 /* 1908 * some part of requested space is covered 1909 * by found extent 1910 */ 1911 start = block; 1912 end = le32_to_cpu(ex->ee_block) 1913 + ext4_ext_get_actual_len(ex); 1914 if (block + num < end) 1915 end = block + num; 1916 exists = 1; 1917 } else { 1918 BUG(); 1919 } 1920 BUG_ON(end <= start); 1921 1922 if (!exists) { 1923 cbex.ec_block = start; 1924 cbex.ec_len = end - start; 1925 cbex.ec_start = 0; 1926 } else { 1927 cbex.ec_block = le32_to_cpu(ex->ee_block); 1928 cbex.ec_len = ext4_ext_get_actual_len(ex); 1929 cbex.ec_start = ext4_ext_pblock(ex); 1930 } 1931 1932 if (unlikely(cbex.ec_len == 0)) { 1933 EXT4_ERROR_INODE(inode, "cbex.ec_len == 0"); 1934 err = -EIO; 1935 break; 1936 } 1937 err = func(inode, path, &cbex, ex, cbdata); 1938 ext4_ext_drop_refs(path); 1939 1940 if (err < 0) 1941 break; 1942 1943 if (err == EXT_REPEAT) 1944 continue; 1945 else if (err == EXT_BREAK) { 1946 err = 0; 1947 break; 1948 } 1949 1950 if (ext_depth(inode) != depth) { 1951 /* depth was changed. we have to realloc path */ 1952 kfree(path); 1953 path = NULL; 1954 } 1955 1956 block = cbex.ec_block + cbex.ec_len; 1957 } 1958 1959 if (path) { 1960 ext4_ext_drop_refs(path); 1961 kfree(path); 1962 } 1963 1964 return err; 1965 } 1966 1967 static void 1968 ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, 1969 __u32 len, ext4_fsblk_t start) 1970 { 1971 struct ext4_ext_cache *cex; 1972 BUG_ON(len == 0); 1973 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1974 cex = &EXT4_I(inode)->i_cached_extent; 1975 cex->ec_block = block; 1976 cex->ec_len = len; 1977 cex->ec_start = start; 1978 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1979 } 1980 1981 /* 1982 * ext4_ext_put_gap_in_cache: 1983 * calculate boundaries of the gap that the requested block fits into 1984 * and cache this gap 1985 */ 1986 static void 1987 ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, 1988 ext4_lblk_t block) 1989 { 1990 int depth = ext_depth(inode); 1991 unsigned long len; 1992 ext4_lblk_t lblock; 1993 struct ext4_extent *ex; 1994 1995 ex = path[depth].p_ext; 1996 if (ex == NULL) { 1997 /* there is no extent yet, so gap is [0;-] */ 1998 lblock = 0; 1999 len = EXT_MAX_BLOCK; 2000 ext_debug("cache gap(whole file):"); 2001 } else if (block < le32_to_cpu(ex->ee_block)) { 2002 lblock = block; 2003 len = le32_to_cpu(ex->ee_block) - block; 2004 ext_debug("cache gap(before): %u [%u:%u]", 2005 block, 2006 le32_to_cpu(ex->ee_block), 2007 ext4_ext_get_actual_len(ex)); 2008 } else if (block >= le32_to_cpu(ex->ee_block) 2009 + ext4_ext_get_actual_len(ex)) { 2010 ext4_lblk_t next; 2011 lblock = le32_to_cpu(ex->ee_block) 2012 + ext4_ext_get_actual_len(ex); 2013 2014 next = ext4_ext_next_allocated_block(path); 2015 ext_debug("cache gap(after): [%u:%u] %u", 2016 le32_to_cpu(ex->ee_block), 2017 ext4_ext_get_actual_len(ex), 2018 block); 2019 BUG_ON(next == lblock); 2020 len = next - lblock; 2021 } else { 2022 lblock = len = 0; 2023 BUG(); 2024 } 2025 2026 ext_debug(" -> %u:%lu\n", lblock, len); 2027 ext4_ext_put_in_cache(inode, lblock, len, 0); 2028 } 2029 2030 /* 2031 * Return 0 if cache is invalid; 1 if the cache is valid 2032 */ 2033 static int 2034 ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, 2035 struct ext4_extent *ex) 2036 { 2037 struct ext4_ext_cache *cex; 2038 int ret = 0; 2039 2040 /* 2041 * We borrow i_block_reservation_lock to protect i_cached_extent 2042 */ 2043 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 2044 cex = &EXT4_I(inode)->i_cached_extent; 2045 2046 /* has cache valid data? */ 2047 if (cex->ec_len == 0) 2048 goto errout; 2049 2050 if (in_range(block, cex->ec_block, cex->ec_len)) { 2051 ex->ee_block = cpu_to_le32(cex->ec_block); 2052 ext4_ext_store_pblock(ex, cex->ec_start); 2053 ex->ee_len = cpu_to_le16(cex->ec_len); 2054 ext_debug("%u cached by %u:%u:%llu\n", 2055 block, 2056 cex->ec_block, cex->ec_len, cex->ec_start); 2057 ret = 1; 2058 } 2059 errout: 2060 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 2061 return ret; 2062 } 2063 2064 /* 2065 * ext4_ext_rm_idx: 2066 * removes index from the index block. 2067 * It's used in truncate case only, thus all requests are for 2068 * last index in the block only. 2069 */ 2070 static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, 2071 struct ext4_ext_path *path) 2072 { 2073 int err; 2074 ext4_fsblk_t leaf; 2075 2076 /* free index block */ 2077 path--; 2078 leaf = ext4_idx_pblock(path->p_idx); 2079 if (unlikely(path->p_hdr->eh_entries == 0)) { 2080 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); 2081 return -EIO; 2082 } 2083 err = ext4_ext_get_access(handle, inode, path); 2084 if (err) 2085 return err; 2086 le16_add_cpu(&path->p_hdr->eh_entries, -1); 2087 err = ext4_ext_dirty(handle, inode, path); 2088 if (err) 2089 return err; 2090 ext_debug("index is empty, remove it, free block %llu\n", leaf); 2091 ext4_free_blocks(handle, inode, NULL, leaf, 1, 2092 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 2093 return err; 2094 } 2095 2096 /* 2097 * ext4_ext_calc_credits_for_single_extent: 2098 * This routine returns max. credits that needed to insert an extent 2099 * to the extent tree. 2100 * When pass the actual path, the caller should calculate credits 2101 * under i_data_sem. 2102 */ 2103 int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, 2104 struct ext4_ext_path *path) 2105 { 2106 if (path) { 2107 int depth = ext_depth(inode); 2108 int ret = 0; 2109 2110 /* probably there is space in leaf? */ 2111 if (le16_to_cpu(path[depth].p_hdr->eh_entries) 2112 < le16_to_cpu(path[depth].p_hdr->eh_max)) { 2113 2114 /* 2115 * There are some space in the leaf tree, no 2116 * need to account for leaf block credit 2117 * 2118 * bitmaps and block group descriptor blocks 2119 * and other metadat blocks still need to be 2120 * accounted. 2121 */ 2122 /* 1 bitmap, 1 block group descriptor */ 2123 ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); 2124 return ret; 2125 } 2126 } 2127 2128 return ext4_chunk_trans_blocks(inode, nrblocks); 2129 } 2130 2131 /* 2132 * How many index/leaf blocks need to change/allocate to modify nrblocks? 2133 * 2134 * if nrblocks are fit in a single extent (chunk flag is 1), then 2135 * in the worse case, each tree level index/leaf need to be changed 2136 * if the tree split due to insert a new extent, then the old tree 2137 * index/leaf need to be updated too 2138 * 2139 * If the nrblocks are discontiguous, they could cause 2140 * the whole tree split more than once, but this is really rare. 2141 */ 2142 int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) 2143 { 2144 int index; 2145 int depth = ext_depth(inode); 2146 2147 if (chunk) 2148 index = depth * 2; 2149 else 2150 index = depth * 3; 2151 2152 return index; 2153 } 2154 2155 static int ext4_remove_blocks(handle_t *handle, struct inode *inode, 2156 struct ext4_extent *ex, 2157 ext4_lblk_t from, ext4_lblk_t to) 2158 { 2159 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2160 int flags = EXT4_FREE_BLOCKS_FORGET; 2161 2162 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2163 flags |= EXT4_FREE_BLOCKS_METADATA; 2164 #ifdef EXTENTS_STATS 2165 { 2166 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2167 spin_lock(&sbi->s_ext_stats_lock); 2168 sbi->s_ext_blocks += ee_len; 2169 sbi->s_ext_extents++; 2170 if (ee_len < sbi->s_ext_min) 2171 sbi->s_ext_min = ee_len; 2172 if (ee_len > sbi->s_ext_max) 2173 sbi->s_ext_max = ee_len; 2174 if (ext_depth(inode) > sbi->s_depth_max) 2175 sbi->s_depth_max = ext_depth(inode); 2176 spin_unlock(&sbi->s_ext_stats_lock); 2177 } 2178 #endif 2179 if (from >= le32_to_cpu(ex->ee_block) 2180 && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { 2181 /* tail removal */ 2182 ext4_lblk_t num; 2183 ext4_fsblk_t start; 2184 2185 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2186 start = ext4_ext_pblock(ex) + ee_len - num; 2187 ext_debug("free last %u blocks starting %llu\n", num, start); 2188 ext4_free_blocks(handle, inode, NULL, start, num, flags); 2189 } else if (from == le32_to_cpu(ex->ee_block) 2190 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { 2191 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", 2192 from, to, le32_to_cpu(ex->ee_block), ee_len); 2193 } else { 2194 printk(KERN_INFO "strange request: removal(2) " 2195 "%u-%u from %u:%u\n", 2196 from, to, le32_to_cpu(ex->ee_block), ee_len); 2197 } 2198 return 0; 2199 } 2200 2201 static int 2202 ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, 2203 struct ext4_ext_path *path, ext4_lblk_t start) 2204 { 2205 int err = 0, correct_index = 0; 2206 int depth = ext_depth(inode), credits; 2207 struct ext4_extent_header *eh; 2208 ext4_lblk_t a, b, block; 2209 unsigned num; 2210 ext4_lblk_t ex_ee_block; 2211 unsigned short ex_ee_len; 2212 unsigned uninitialized = 0; 2213 struct ext4_extent *ex; 2214 2215 /* the header must be checked already in ext4_ext_remove_space() */ 2216 ext_debug("truncate since %u in leaf\n", start); 2217 if (!path[depth].p_hdr) 2218 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); 2219 eh = path[depth].p_hdr; 2220 if (unlikely(path[depth].p_hdr == NULL)) { 2221 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); 2222 return -EIO; 2223 } 2224 /* find where to start removing */ 2225 ex = EXT_LAST_EXTENT(eh); 2226 2227 ex_ee_block = le32_to_cpu(ex->ee_block); 2228 ex_ee_len = ext4_ext_get_actual_len(ex); 2229 2230 while (ex >= EXT_FIRST_EXTENT(eh) && 2231 ex_ee_block + ex_ee_len > start) { 2232 2233 if (ext4_ext_is_uninitialized(ex)) 2234 uninitialized = 1; 2235 else 2236 uninitialized = 0; 2237 2238 ext_debug("remove ext %u:[%d]%d\n", ex_ee_block, 2239 uninitialized, ex_ee_len); 2240 path[depth].p_ext = ex; 2241 2242 a = ex_ee_block > start ? ex_ee_block : start; 2243 b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ? 2244 ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK; 2245 2246 ext_debug(" border %u:%u\n", a, b); 2247 2248 if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { 2249 block = 0; 2250 num = 0; 2251 BUG(); 2252 } else if (a != ex_ee_block) { 2253 /* remove tail of the extent */ 2254 block = ex_ee_block; 2255 num = a - block; 2256 } else if (b != ex_ee_block + ex_ee_len - 1) { 2257 /* remove head of the extent */ 2258 block = a; 2259 num = b - a; 2260 /* there is no "make a hole" API yet */ 2261 BUG(); 2262 } else { 2263 /* remove whole extent: excellent! */ 2264 block = ex_ee_block; 2265 num = 0; 2266 BUG_ON(a != ex_ee_block); 2267 BUG_ON(b != ex_ee_block + ex_ee_len - 1); 2268 } 2269 2270 /* 2271 * 3 for leaf, sb, and inode plus 2 (bmap and group 2272 * descriptor) for each block group; assume two block 2273 * groups plus ex_ee_len/blocks_per_block_group for 2274 * the worst case 2275 */ 2276 credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb)); 2277 if (ex == EXT_FIRST_EXTENT(eh)) { 2278 correct_index = 1; 2279 credits += (ext_depth(inode)) + 1; 2280 } 2281 credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); 2282 2283 err = ext4_ext_truncate_extend_restart(handle, inode, credits); 2284 if (err) 2285 goto out; 2286 2287 err = ext4_ext_get_access(handle, inode, path + depth); 2288 if (err) 2289 goto out; 2290 2291 err = ext4_remove_blocks(handle, inode, ex, a, b); 2292 if (err) 2293 goto out; 2294 2295 if (num == 0) { 2296 /* this extent is removed; mark slot entirely unused */ 2297 ext4_ext_store_pblock(ex, 0); 2298 le16_add_cpu(&eh->eh_entries, -1); 2299 } 2300 2301 ex->ee_block = cpu_to_le32(block); 2302 ex->ee_len = cpu_to_le16(num); 2303 /* 2304 * Do not mark uninitialized if all the blocks in the 2305 * extent have been removed. 2306 */ 2307 if (uninitialized && num) 2308 ext4_ext_mark_uninitialized(ex); 2309 2310 err = ext4_ext_dirty(handle, inode, path + depth); 2311 if (err) 2312 goto out; 2313 2314 ext_debug("new extent: %u:%u:%llu\n", block, num, 2315 ext4_ext_pblock(ex)); 2316 ex--; 2317 ex_ee_block = le32_to_cpu(ex->ee_block); 2318 ex_ee_len = ext4_ext_get_actual_len(ex); 2319 } 2320 2321 if (correct_index && eh->eh_entries) 2322 err = ext4_ext_correct_indexes(handle, inode, path); 2323 2324 /* if this leaf is free, then we should 2325 * remove it from index block above */ 2326 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) 2327 err = ext4_ext_rm_idx(handle, inode, path + depth); 2328 2329 out: 2330 return err; 2331 } 2332 2333 /* 2334 * ext4_ext_more_to_rm: 2335 * returns 1 if current index has to be freed (even partial) 2336 */ 2337 static int 2338 ext4_ext_more_to_rm(struct ext4_ext_path *path) 2339 { 2340 BUG_ON(path->p_idx == NULL); 2341 2342 if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) 2343 return 0; 2344 2345 /* 2346 * if truncate on deeper level happened, it wasn't partial, 2347 * so we have to consider current index for truncation 2348 */ 2349 if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block) 2350 return 0; 2351 return 1; 2352 } 2353 2354 static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) 2355 { 2356 struct super_block *sb = inode->i_sb; 2357 int depth = ext_depth(inode); 2358 struct ext4_ext_path *path; 2359 handle_t *handle; 2360 int i, err; 2361 2362 ext_debug("truncate since %u\n", start); 2363 2364 /* probably first extent we're gonna free will be last in block */ 2365 handle = ext4_journal_start(inode, depth + 1); 2366 if (IS_ERR(handle)) 2367 return PTR_ERR(handle); 2368 2369 again: 2370 ext4_ext_invalidate_cache(inode); 2371 2372 /* 2373 * We start scanning from right side, freeing all the blocks 2374 * after i_size and walking into the tree depth-wise. 2375 */ 2376 depth = ext_depth(inode); 2377 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); 2378 if (path == NULL) { 2379 ext4_journal_stop(handle); 2380 return -ENOMEM; 2381 } 2382 path[0].p_depth = depth; 2383 path[0].p_hdr = ext_inode_hdr(inode); 2384 if (ext4_ext_check(inode, path[0].p_hdr, depth)) { 2385 err = -EIO; 2386 goto out; 2387 } 2388 i = err = 0; 2389 2390 while (i >= 0 && err == 0) { 2391 if (i == depth) { 2392 /* this is leaf block */ 2393 err = ext4_ext_rm_leaf(handle, inode, path, start); 2394 /* root level has p_bh == NULL, brelse() eats this */ 2395 brelse(path[i].p_bh); 2396 path[i].p_bh = NULL; 2397 i--; 2398 continue; 2399 } 2400 2401 /* this is index block */ 2402 if (!path[i].p_hdr) { 2403 ext_debug("initialize header\n"); 2404 path[i].p_hdr = ext_block_hdr(path[i].p_bh); 2405 } 2406 2407 if (!path[i].p_idx) { 2408 /* this level hasn't been touched yet */ 2409 path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); 2410 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1; 2411 ext_debug("init index ptr: hdr 0x%p, num %d\n", 2412 path[i].p_hdr, 2413 le16_to_cpu(path[i].p_hdr->eh_entries)); 2414 } else { 2415 /* we were already here, see at next index */ 2416 path[i].p_idx--; 2417 } 2418 2419 ext_debug("level %d - index, first 0x%p, cur 0x%p\n", 2420 i, EXT_FIRST_INDEX(path[i].p_hdr), 2421 path[i].p_idx); 2422 if (ext4_ext_more_to_rm(path + i)) { 2423 struct buffer_head *bh; 2424 /* go to the next level */ 2425 ext_debug("move to level %d (block %llu)\n", 2426 i + 1, ext4_idx_pblock(path[i].p_idx)); 2427 memset(path + i + 1, 0, sizeof(*path)); 2428 bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx)); 2429 if (!bh) { 2430 /* should we reset i_size? */ 2431 err = -EIO; 2432 break; 2433 } 2434 if (WARN_ON(i + 1 > depth)) { 2435 err = -EIO; 2436 break; 2437 } 2438 if (ext4_ext_check(inode, ext_block_hdr(bh), 2439 depth - i - 1)) { 2440 err = -EIO; 2441 break; 2442 } 2443 path[i + 1].p_bh = bh; 2444 2445 /* save actual number of indexes since this 2446 * number is changed at the next iteration */ 2447 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries); 2448 i++; 2449 } else { 2450 /* we finished processing this index, go up */ 2451 if (path[i].p_hdr->eh_entries == 0 && i > 0) { 2452 /* index is empty, remove it; 2453 * handle must be already prepared by the 2454 * truncatei_leaf() */ 2455 err = ext4_ext_rm_idx(handle, inode, path + i); 2456 } 2457 /* root level has p_bh == NULL, brelse() eats this */ 2458 brelse(path[i].p_bh); 2459 path[i].p_bh = NULL; 2460 i--; 2461 ext_debug("return to level %d\n", i); 2462 } 2463 } 2464 2465 /* TODO: flexible tree reduction should be here */ 2466 if (path->p_hdr->eh_entries == 0) { 2467 /* 2468 * truncate to zero freed all the tree, 2469 * so we need to correct eh_depth 2470 */ 2471 err = ext4_ext_get_access(handle, inode, path); 2472 if (err == 0) { 2473 ext_inode_hdr(inode)->eh_depth = 0; 2474 ext_inode_hdr(inode)->eh_max = 2475 cpu_to_le16(ext4_ext_space_root(inode, 0)); 2476 err = ext4_ext_dirty(handle, inode, path); 2477 } 2478 } 2479 out: 2480 ext4_ext_drop_refs(path); 2481 kfree(path); 2482 if (err == -EAGAIN) 2483 goto again; 2484 ext4_journal_stop(handle); 2485 2486 return err; 2487 } 2488 2489 /* 2490 * called at mount time 2491 */ 2492 void ext4_ext_init(struct super_block *sb) 2493 { 2494 /* 2495 * possible initialization would be here 2496 */ 2497 2498 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { 2499 #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS) 2500 printk(KERN_INFO "EXT4-fs: file extents enabled"); 2501 #ifdef AGGRESSIVE_TEST 2502 printk(", aggressive tests"); 2503 #endif 2504 #ifdef CHECK_BINSEARCH 2505 printk(", check binsearch"); 2506 #endif 2507 #ifdef EXTENTS_STATS 2508 printk(", stats"); 2509 #endif 2510 printk("\n"); 2511 #endif 2512 #ifdef EXTENTS_STATS 2513 spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock); 2514 EXT4_SB(sb)->s_ext_min = 1 << 30; 2515 EXT4_SB(sb)->s_ext_max = 0; 2516 #endif 2517 } 2518 } 2519 2520 /* 2521 * called at umount time 2522 */ 2523 void ext4_ext_release(struct super_block *sb) 2524 { 2525 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) 2526 return; 2527 2528 #ifdef EXTENTS_STATS 2529 if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) { 2530 struct ext4_sb_info *sbi = EXT4_SB(sb); 2531 printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n", 2532 sbi->s_ext_blocks, sbi->s_ext_extents, 2533 sbi->s_ext_blocks / sbi->s_ext_extents); 2534 printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n", 2535 sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max); 2536 } 2537 #endif 2538 } 2539 2540 /* FIXME!! we need to try to merge to left or right after zero-out */ 2541 static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) 2542 { 2543 ext4_fsblk_t ee_pblock; 2544 unsigned int ee_len; 2545 int ret; 2546 2547 ee_len = ext4_ext_get_actual_len(ex); 2548 ee_pblock = ext4_ext_pblock(ex); 2549 2550 ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS); 2551 if (ret > 0) 2552 ret = 0; 2553 2554 return ret; 2555 } 2556 2557 /* 2558 * used by extent splitting. 2559 */ 2560 #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ 2561 due to ENOSPC */ 2562 #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ 2563 #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ 2564 2565 /* 2566 * ext4_split_extent_at() splits an extent at given block. 2567 * 2568 * @handle: the journal handle 2569 * @inode: the file inode 2570 * @path: the path to the extent 2571 * @split: the logical block where the extent is splitted. 2572 * @split_flags: indicates if the extent could be zeroout if split fails, and 2573 * the states(init or uninit) of new extents. 2574 * @flags: flags used to insert new extent to extent tree. 2575 * 2576 * 2577 * Splits extent [a, b] into two extents [a, @split) and [@split, b], states 2578 * of which are deterimined by split_flag. 2579 * 2580 * There are two cases: 2581 * a> the extent are splitted into two extent. 2582 * b> split is not needed, and just mark the extent. 2583 * 2584 * return 0 on success. 2585 */ 2586 static int ext4_split_extent_at(handle_t *handle, 2587 struct inode *inode, 2588 struct ext4_ext_path *path, 2589 ext4_lblk_t split, 2590 int split_flag, 2591 int flags) 2592 { 2593 ext4_fsblk_t newblock; 2594 ext4_lblk_t ee_block; 2595 struct ext4_extent *ex, newex, orig_ex; 2596 struct ext4_extent *ex2 = NULL; 2597 unsigned int ee_len, depth; 2598 int err = 0; 2599 2600 ext_debug("ext4_split_extents_at: inode %lu, logical" 2601 "block %llu\n", inode->i_ino, (unsigned long long)split); 2602 2603 ext4_ext_show_leaf(inode, path); 2604 2605 depth = ext_depth(inode); 2606 ex = path[depth].p_ext; 2607 ee_block = le32_to_cpu(ex->ee_block); 2608 ee_len = ext4_ext_get_actual_len(ex); 2609 newblock = split - ee_block + ext4_ext_pblock(ex); 2610 2611 BUG_ON(split < ee_block || split >= (ee_block + ee_len)); 2612 2613 err = ext4_ext_get_access(handle, inode, path + depth); 2614 if (err) 2615 goto out; 2616 2617 if (split == ee_block) { 2618 /* 2619 * case b: block @split is the block that the extent begins with 2620 * then we just change the state of the extent, and splitting 2621 * is not needed. 2622 */ 2623 if (split_flag & EXT4_EXT_MARK_UNINIT2) 2624 ext4_ext_mark_uninitialized(ex); 2625 else 2626 ext4_ext_mark_initialized(ex); 2627 2628 if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) 2629 ext4_ext_try_to_merge(inode, path, ex); 2630 2631 err = ext4_ext_dirty(handle, inode, path + depth); 2632 goto out; 2633 } 2634 2635 /* case a */ 2636 memcpy(&orig_ex, ex, sizeof(orig_ex)); 2637 ex->ee_len = cpu_to_le16(split - ee_block); 2638 if (split_flag & EXT4_EXT_MARK_UNINIT1) 2639 ext4_ext_mark_uninitialized(ex); 2640 2641 /* 2642 * path may lead to new leaf, not to original leaf any more 2643 * after ext4_ext_insert_extent() returns, 2644 */ 2645 err = ext4_ext_dirty(handle, inode, path + depth); 2646 if (err) 2647 goto fix_extent_len; 2648 2649 ex2 = &newex; 2650 ex2->ee_block = cpu_to_le32(split); 2651 ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block)); 2652 ext4_ext_store_pblock(ex2, newblock); 2653 if (split_flag & EXT4_EXT_MARK_UNINIT2) 2654 ext4_ext_mark_uninitialized(ex2); 2655 2656 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 2657 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 2658 err = ext4_ext_zeroout(inode, &orig_ex); 2659 if (err) 2660 goto fix_extent_len; 2661 /* update the extent length and mark as initialized */ 2662 ex->ee_len = cpu_to_le32(ee_len); 2663 ext4_ext_try_to_merge(inode, path, ex); 2664 err = ext4_ext_dirty(handle, inode, path + depth); 2665 goto out; 2666 } else if (err) 2667 goto fix_extent_len; 2668 2669 out: 2670 ext4_ext_show_leaf(inode, path); 2671 return err; 2672 2673 fix_extent_len: 2674 ex->ee_len = orig_ex.ee_len; 2675 ext4_ext_dirty(handle, inode, path + depth); 2676 return err; 2677 } 2678 2679 /* 2680 * ext4_split_extents() splits an extent and mark extent which is covered 2681 * by @map as split_flags indicates 2682 * 2683 * It may result in splitting the extent into multiple extents (upto three) 2684 * There are three possibilities: 2685 * a> There is no split required 2686 * b> Splits in two extents: Split is happening at either end of the extent 2687 * c> Splits in three extents: Somone is splitting in middle of the extent 2688 * 2689 */ 2690 static int ext4_split_extent(handle_t *handle, 2691 struct inode *inode, 2692 struct ext4_ext_path *path, 2693 struct ext4_map_blocks *map, 2694 int split_flag, 2695 int flags) 2696 { 2697 ext4_lblk_t ee_block; 2698 struct ext4_extent *ex; 2699 unsigned int ee_len, depth; 2700 int err = 0; 2701 int uninitialized; 2702 int split_flag1, flags1; 2703 2704 depth = ext_depth(inode); 2705 ex = path[depth].p_ext; 2706 ee_block = le32_to_cpu(ex->ee_block); 2707 ee_len = ext4_ext_get_actual_len(ex); 2708 uninitialized = ext4_ext_is_uninitialized(ex); 2709 2710 if (map->m_lblk + map->m_len < ee_block + ee_len) { 2711 split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? 2712 EXT4_EXT_MAY_ZEROOUT : 0; 2713 flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; 2714 if (uninitialized) 2715 split_flag1 |= EXT4_EXT_MARK_UNINIT1 | 2716 EXT4_EXT_MARK_UNINIT2; 2717 err = ext4_split_extent_at(handle, inode, path, 2718 map->m_lblk + map->m_len, split_flag1, flags1); 2719 } 2720 2721 ext4_ext_drop_refs(path); 2722 path = ext4_ext_find_extent(inode, map->m_lblk, path); 2723 if (IS_ERR(path)) 2724 return PTR_ERR(path); 2725 2726 if (map->m_lblk >= ee_block) { 2727 split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? 2728 EXT4_EXT_MAY_ZEROOUT : 0; 2729 if (uninitialized) 2730 split_flag1 |= EXT4_EXT_MARK_UNINIT1; 2731 if (split_flag & EXT4_EXT_MARK_UNINIT2) 2732 split_flag1 |= EXT4_EXT_MARK_UNINIT2; 2733 err = ext4_split_extent_at(handle, inode, path, 2734 map->m_lblk, split_flag1, flags); 2735 if (err) 2736 goto out; 2737 } 2738 2739 ext4_ext_show_leaf(inode, path); 2740 out: 2741 return err ? err : map->m_len; 2742 } 2743 2744 #define EXT4_EXT_ZERO_LEN 7 2745 /* 2746 * This function is called by ext4_ext_map_blocks() if someone tries to write 2747 * to an uninitialized extent. It may result in splitting the uninitialized 2748 * extent into multiple extents (up to three - one initialized and two 2749 * uninitialized). 2750 * There are three possibilities: 2751 * a> There is no split required: Entire extent should be initialized 2752 * b> Splits in two extents: Write is happening at either end of the extent 2753 * c> Splits in three extents: Somone is writing in middle of the extent 2754 */ 2755 static int ext4_ext_convert_to_initialized(handle_t *handle, 2756 struct inode *inode, 2757 struct ext4_map_blocks *map, 2758 struct ext4_ext_path *path) 2759 { 2760 struct ext4_map_blocks split_map; 2761 struct ext4_extent zero_ex; 2762 struct ext4_extent *ex; 2763 ext4_lblk_t ee_block, eof_block; 2764 unsigned int allocated, ee_len, depth; 2765 int err = 0; 2766 int split_flag = 0; 2767 2768 ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" 2769 "block %llu, max_blocks %u\n", inode->i_ino, 2770 (unsigned long long)map->m_lblk, map->m_len); 2771 2772 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 2773 inode->i_sb->s_blocksize_bits; 2774 if (eof_block < map->m_lblk + map->m_len) 2775 eof_block = map->m_lblk + map->m_len; 2776 2777 depth = ext_depth(inode); 2778 ex = path[depth].p_ext; 2779 ee_block = le32_to_cpu(ex->ee_block); 2780 ee_len = ext4_ext_get_actual_len(ex); 2781 allocated = ee_len - (map->m_lblk - ee_block); 2782 2783 WARN_ON(map->m_lblk < ee_block); 2784 /* 2785 * It is safe to convert extent to initialized via explicit 2786 * zeroout only if extent is fully insde i_size or new_size. 2787 */ 2788 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; 2789 2790 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ 2791 if (ee_len <= 2*EXT4_EXT_ZERO_LEN && 2792 (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 2793 err = ext4_ext_zeroout(inode, ex); 2794 if (err) 2795 goto out; 2796 2797 err = ext4_ext_get_access(handle, inode, path + depth); 2798 if (err) 2799 goto out; 2800 ext4_ext_mark_initialized(ex); 2801 ext4_ext_try_to_merge(inode, path, ex); 2802 err = ext4_ext_dirty(handle, inode, path + depth); 2803 goto out; 2804 } 2805 2806 /* 2807 * four cases: 2808 * 1. split the extent into three extents. 2809 * 2. split the extent into two extents, zeroout the first half. 2810 * 3. split the extent into two extents, zeroout the second half. 2811 * 4. split the extent into two extents with out zeroout. 2812 */ 2813 split_map.m_lblk = map->m_lblk; 2814 split_map.m_len = map->m_len; 2815 2816 if (allocated > map->m_len) { 2817 if (allocated <= EXT4_EXT_ZERO_LEN && 2818 (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 2819 /* case 3 */ 2820 zero_ex.ee_block = 2821 cpu_to_le32(map->m_lblk + map->m_len); 2822 zero_ex.ee_len = cpu_to_le16(allocated - map->m_len); 2823 ext4_ext_store_pblock(&zero_ex, 2824 ext4_ext_pblock(ex) + map->m_lblk - ee_block); 2825 err = ext4_ext_zeroout(inode, &zero_ex); 2826 if (err) 2827 goto out; 2828 split_map.m_lblk = map->m_lblk; 2829 split_map.m_len = allocated; 2830 } else if ((map->m_lblk - ee_block + map->m_len < 2831 EXT4_EXT_ZERO_LEN) && 2832 (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 2833 /* case 2 */ 2834 if (map->m_lblk != ee_block) { 2835 zero_ex.ee_block = ex->ee_block; 2836 zero_ex.ee_len = cpu_to_le16(map->m_lblk - 2837 ee_block); 2838 ext4_ext_store_pblock(&zero_ex, 2839 ext4_ext_pblock(ex)); 2840 err = ext4_ext_zeroout(inode, &zero_ex); 2841 if (err) 2842 goto out; 2843 } 2844 2845 allocated = map->m_lblk - ee_block + map->m_len; 2846 2847 split_map.m_lblk = ee_block; 2848 split_map.m_len = allocated; 2849 } 2850 } 2851 2852 allocated = ext4_split_extent(handle, inode, path, 2853 &split_map, split_flag, 0); 2854 if (allocated < 0) 2855 err = allocated; 2856 2857 out: 2858 return err ? err : allocated; 2859 } 2860 2861 /* 2862 * This function is called by ext4_ext_map_blocks() from 2863 * ext4_get_blocks_dio_write() when DIO to write 2864 * to an uninitialized extent. 2865 * 2866 * Writing to an uninitialized extent may result in splitting the uninitialized 2867 * extent into multiple /initialized uninitialized extents (up to three) 2868 * There are three possibilities: 2869 * a> There is no split required: Entire extent should be uninitialized 2870 * b> Splits in two extents: Write is happening at either end of the extent 2871 * c> Splits in three extents: Somone is writing in middle of the extent 2872 * 2873 * One of more index blocks maybe needed if the extent tree grow after 2874 * the uninitialized extent split. To prevent ENOSPC occur at the IO 2875 * complete, we need to split the uninitialized extent before DIO submit 2876 * the IO. The uninitialized extent called at this time will be split 2877 * into three uninitialized extent(at most). After IO complete, the part 2878 * being filled will be convert to initialized by the end_io callback function 2879 * via ext4_convert_unwritten_extents(). 2880 * 2881 * Returns the size of uninitialized extent to be written on success. 2882 */ 2883 static int ext4_split_unwritten_extents(handle_t *handle, 2884 struct inode *inode, 2885 struct ext4_map_blocks *map, 2886 struct ext4_ext_path *path, 2887 int flags) 2888 { 2889 ext4_lblk_t eof_block; 2890 ext4_lblk_t ee_block; 2891 struct ext4_extent *ex; 2892 unsigned int ee_len; 2893 int split_flag = 0, depth; 2894 2895 ext_debug("ext4_split_unwritten_extents: inode %lu, logical" 2896 "block %llu, max_blocks %u\n", inode->i_ino, 2897 (unsigned long long)map->m_lblk, map->m_len); 2898 2899 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 2900 inode->i_sb->s_blocksize_bits; 2901 if (eof_block < map->m_lblk + map->m_len) 2902 eof_block = map->m_lblk + map->m_len; 2903 /* 2904 * It is safe to convert extent to initialized via explicit 2905 * zeroout only if extent is fully insde i_size or new_size. 2906 */ 2907 depth = ext_depth(inode); 2908 ex = path[depth].p_ext; 2909 ee_block = le32_to_cpu(ex->ee_block); 2910 ee_len = ext4_ext_get_actual_len(ex); 2911 2912 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; 2913 split_flag |= EXT4_EXT_MARK_UNINIT2; 2914 2915 flags |= EXT4_GET_BLOCKS_PRE_IO; 2916 return ext4_split_extent(handle, inode, path, map, split_flag, flags); 2917 } 2918 2919 static int ext4_convert_unwritten_extents_endio(handle_t *handle, 2920 struct inode *inode, 2921 struct ext4_ext_path *path) 2922 { 2923 struct ext4_extent *ex; 2924 struct ext4_extent_header *eh; 2925 int depth; 2926 int err = 0; 2927 2928 depth = ext_depth(inode); 2929 eh = path[depth].p_hdr; 2930 ex = path[depth].p_ext; 2931 2932 ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" 2933 "block %llu, max_blocks %u\n", inode->i_ino, 2934 (unsigned long long)le32_to_cpu(ex->ee_block), 2935 ext4_ext_get_actual_len(ex)); 2936 2937 err = ext4_ext_get_access(handle, inode, path + depth); 2938 if (err) 2939 goto out; 2940 /* first mark the extent as initialized */ 2941 ext4_ext_mark_initialized(ex); 2942 2943 /* note: ext4_ext_correct_indexes() isn't needed here because 2944 * borders are not changed 2945 */ 2946 ext4_ext_try_to_merge(inode, path, ex); 2947 2948 /* Mark modified extent as dirty */ 2949 err = ext4_ext_dirty(handle, inode, path + depth); 2950 out: 2951 ext4_ext_show_leaf(inode, path); 2952 return err; 2953 } 2954 2955 static void unmap_underlying_metadata_blocks(struct block_device *bdev, 2956 sector_t block, int count) 2957 { 2958 int i; 2959 for (i = 0; i < count; i++) 2960 unmap_underlying_metadata(bdev, block + i); 2961 } 2962 2963 /* 2964 * Handle EOFBLOCKS_FL flag, clearing it if necessary 2965 */ 2966 static int check_eofblocks_fl(handle_t *handle, struct inode *inode, 2967 ext4_lblk_t lblk, 2968 struct ext4_ext_path *path, 2969 unsigned int len) 2970 { 2971 int i, depth; 2972 struct ext4_extent_header *eh; 2973 struct ext4_extent *last_ex; 2974 2975 if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) 2976 return 0; 2977 2978 depth = ext_depth(inode); 2979 eh = path[depth].p_hdr; 2980 2981 if (unlikely(!eh->eh_entries)) { 2982 EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " 2983 "EOFBLOCKS_FL set"); 2984 return -EIO; 2985 } 2986 last_ex = EXT_LAST_EXTENT(eh); 2987 /* 2988 * We should clear the EOFBLOCKS_FL flag if we are writing the 2989 * last block in the last extent in the file. We test this by 2990 * first checking to see if the caller to 2991 * ext4_ext_get_blocks() was interested in the last block (or 2992 * a block beyond the last block) in the current extent. If 2993 * this turns out to be false, we can bail out from this 2994 * function immediately. 2995 */ 2996 if (lblk + len < le32_to_cpu(last_ex->ee_block) + 2997 ext4_ext_get_actual_len(last_ex)) 2998 return 0; 2999 /* 3000 * If the caller does appear to be planning to write at or 3001 * beyond the end of the current extent, we then test to see 3002 * if the current extent is the last extent in the file, by 3003 * checking to make sure it was reached via the rightmost node 3004 * at each level of the tree. 3005 */ 3006 for (i = depth-1; i >= 0; i--) 3007 if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) 3008 return 0; 3009 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 3010 return ext4_mark_inode_dirty(handle, inode); 3011 } 3012 3013 static int 3014 ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3015 struct ext4_map_blocks *map, 3016 struct ext4_ext_path *path, int flags, 3017 unsigned int allocated, ext4_fsblk_t newblock) 3018 { 3019 int ret = 0; 3020 int err = 0; 3021 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3022 3023 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" 3024 "block %llu, max_blocks %u, flags %d, allocated %u", 3025 inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, 3026 flags, allocated); 3027 ext4_ext_show_leaf(inode, path); 3028 3029 /* get_block() before submit the IO, split the extent */ 3030 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3031 ret = ext4_split_unwritten_extents(handle, inode, map, 3032 path, flags); 3033 /* 3034 * Flag the inode(non aio case) or end_io struct (aio case) 3035 * that this IO needs to conversion to written when IO is 3036 * completed 3037 */ 3038 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { 3039 io->flag = EXT4_IO_END_UNWRITTEN; 3040 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); 3041 } else 3042 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3043 if (ext4_should_dioread_nolock(inode)) 3044 map->m_flags |= EXT4_MAP_UNINIT; 3045 goto out; 3046 } 3047 /* IO end_io complete, convert the filled extent to written */ 3048 if ((flags & EXT4_GET_BLOCKS_CONVERT)) { 3049 ret = ext4_convert_unwritten_extents_endio(handle, inode, 3050 path); 3051 if (ret >= 0) { 3052 ext4_update_inode_fsync_trans(handle, inode, 1); 3053 err = check_eofblocks_fl(handle, inode, map->m_lblk, 3054 path, map->m_len); 3055 } else 3056 err = ret; 3057 goto out2; 3058 } 3059 /* buffered IO case */ 3060 /* 3061 * repeat fallocate creation request 3062 * we already have an unwritten extent 3063 */ 3064 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) 3065 goto map_out; 3066 3067 /* buffered READ or buffered write_begin() lookup */ 3068 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3069 /* 3070 * We have blocks reserved already. We 3071 * return allocated blocks so that delalloc 3072 * won't do block reservation for us. But 3073 * the buffer head will be unmapped so that 3074 * a read from the block returns 0s. 3075 */ 3076 map->m_flags |= EXT4_MAP_UNWRITTEN; 3077 goto out1; 3078 } 3079 3080 /* buffered write, writepage time, convert*/ 3081 ret = ext4_ext_convert_to_initialized(handle, inode, map, path); 3082 if (ret >= 0) { 3083 ext4_update_inode_fsync_trans(handle, inode, 1); 3084 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, 3085 map->m_len); 3086 if (err < 0) 3087 goto out2; 3088 } 3089 3090 out: 3091 if (ret <= 0) { 3092 err = ret; 3093 goto out2; 3094 } else 3095 allocated = ret; 3096 map->m_flags |= EXT4_MAP_NEW; 3097 /* 3098 * if we allocated more blocks than requested 3099 * we need to make sure we unmap the extra block 3100 * allocated. The actual needed block will get 3101 * unmapped later when we find the buffer_head marked 3102 * new. 3103 */ 3104 if (allocated > map->m_len) { 3105 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, 3106 newblock + map->m_len, 3107 allocated - map->m_len); 3108 allocated = map->m_len; 3109 } 3110 3111 /* 3112 * If we have done fallocate with the offset that is already 3113 * delayed allocated, we would have block reservation 3114 * and quota reservation done in the delayed write path. 3115 * But fallocate would have already updated quota and block 3116 * count for this offset. So cancel these reservation 3117 */ 3118 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 3119 ext4_da_update_reserve_space(inode, allocated, 0); 3120 3121 map_out: 3122 map->m_flags |= EXT4_MAP_MAPPED; 3123 out1: 3124 if (allocated > map->m_len) 3125 allocated = map->m_len; 3126 ext4_ext_show_leaf(inode, path); 3127 map->m_pblk = newblock; 3128 map->m_len = allocated; 3129 out2: 3130 if (path) { 3131 ext4_ext_drop_refs(path); 3132 kfree(path); 3133 } 3134 return err ? err : allocated; 3135 } 3136 3137 /* 3138 * Block allocation/map/preallocation routine for extents based files 3139 * 3140 * 3141 * Need to be called with 3142 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block 3143 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) 3144 * 3145 * return > 0, number of of blocks already mapped/allocated 3146 * if create == 0 and these are pre-allocated blocks 3147 * buffer head is unmapped 3148 * otherwise blocks are mapped 3149 * 3150 * return = 0, if plain look up failed (blocks have not been allocated) 3151 * buffer head is unmapped 3152 * 3153 * return < 0, error case. 3154 */ 3155 int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, 3156 struct ext4_map_blocks *map, int flags) 3157 { 3158 struct ext4_ext_path *path = NULL; 3159 struct ext4_extent newex, *ex; 3160 ext4_fsblk_t newblock = 0; 3161 int err = 0, depth, ret; 3162 unsigned int allocated = 0; 3163 struct ext4_allocation_request ar; 3164 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3165 3166 ext_debug("blocks %u/%u requested for inode %lu\n", 3167 map->m_lblk, map->m_len, inode->i_ino); 3168 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); 3169 3170 /* check in cache */ 3171 if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { 3172 if (!newex.ee_start_lo && !newex.ee_start_hi) { 3173 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3174 /* 3175 * block isn't allocated yet and 3176 * user doesn't want to allocate it 3177 */ 3178 goto out2; 3179 } 3180 /* we should allocate requested block */ 3181 } else { 3182 /* block is already allocated */ 3183 newblock = map->m_lblk 3184 - le32_to_cpu(newex.ee_block) 3185 + ext4_ext_pblock(&newex); 3186 /* number of remaining blocks in the extent */ 3187 allocated = ext4_ext_get_actual_len(&newex) - 3188 (map->m_lblk - le32_to_cpu(newex.ee_block)); 3189 goto out; 3190 } 3191 } 3192 3193 /* find extent for this block */ 3194 path = ext4_ext_find_extent(inode, map->m_lblk, NULL); 3195 if (IS_ERR(path)) { 3196 err = PTR_ERR(path); 3197 path = NULL; 3198 goto out2; 3199 } 3200 3201 depth = ext_depth(inode); 3202 3203 /* 3204 * consistent leaf must not be empty; 3205 * this situation is possible, though, _during_ tree modification; 3206 * this is why assert can't be put in ext4_ext_find_extent() 3207 */ 3208 if (unlikely(path[depth].p_ext == NULL && depth != 0)) { 3209 EXT4_ERROR_INODE(inode, "bad extent address " 3210 "lblock: %lu, depth: %d pblock %lld", 3211 (unsigned long) map->m_lblk, depth, 3212 path[depth].p_block); 3213 err = -EIO; 3214 goto out2; 3215 } 3216 3217 ex = path[depth].p_ext; 3218 if (ex) { 3219 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); 3220 ext4_fsblk_t ee_start = ext4_ext_pblock(ex); 3221 unsigned short ee_len; 3222 3223 /* 3224 * Uninitialized extents are treated as holes, except that 3225 * we split out initialized portions during a write. 3226 */ 3227 ee_len = ext4_ext_get_actual_len(ex); 3228 /* if found extent covers block, simply return it */ 3229 if (in_range(map->m_lblk, ee_block, ee_len)) { 3230 newblock = map->m_lblk - ee_block + ee_start; 3231 /* number of remaining blocks in the extent */ 3232 allocated = ee_len - (map->m_lblk - ee_block); 3233 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, 3234 ee_block, ee_len, newblock); 3235 3236 /* Do not put uninitialized extent in the cache */ 3237 if (!ext4_ext_is_uninitialized(ex)) { 3238 ext4_ext_put_in_cache(inode, ee_block, 3239 ee_len, ee_start); 3240 goto out; 3241 } 3242 ret = ext4_ext_handle_uninitialized_extents(handle, 3243 inode, map, path, flags, allocated, 3244 newblock); 3245 return ret; 3246 } 3247 } 3248 3249 /* 3250 * requested block isn't allocated yet; 3251 * we couldn't try to create block if create flag is zero 3252 */ 3253 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3254 /* 3255 * put just found gap into cache to speed up 3256 * subsequent requests 3257 */ 3258 ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); 3259 goto out2; 3260 } 3261 /* 3262 * Okay, we need to do block allocation. 3263 */ 3264 3265 /* find neighbour allocated blocks */ 3266 ar.lleft = map->m_lblk; 3267 err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); 3268 if (err) 3269 goto out2; 3270 ar.lright = map->m_lblk; 3271 err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); 3272 if (err) 3273 goto out2; 3274 3275 /* 3276 * See if request is beyond maximum number of blocks we can have in 3277 * a single extent. For an initialized extent this limit is 3278 * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is 3279 * EXT_UNINIT_MAX_LEN. 3280 */ 3281 if (map->m_len > EXT_INIT_MAX_LEN && 3282 !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) 3283 map->m_len = EXT_INIT_MAX_LEN; 3284 else if (map->m_len > EXT_UNINIT_MAX_LEN && 3285 (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) 3286 map->m_len = EXT_UNINIT_MAX_LEN; 3287 3288 /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ 3289 newex.ee_block = cpu_to_le32(map->m_lblk); 3290 newex.ee_len = cpu_to_le16(map->m_len); 3291 err = ext4_ext_check_overlap(inode, &newex, path); 3292 if (err) 3293 allocated = ext4_ext_get_actual_len(&newex); 3294 else 3295 allocated = map->m_len; 3296 3297 /* allocate new block */ 3298 ar.inode = inode; 3299 ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); 3300 ar.logical = map->m_lblk; 3301 ar.len = allocated; 3302 if (S_ISREG(inode->i_mode)) 3303 ar.flags = EXT4_MB_HINT_DATA; 3304 else 3305 /* disable in-core preallocation for non-regular files */ 3306 ar.flags = 0; 3307 newblock = ext4_mb_new_blocks(handle, &ar, &err); 3308 if (!newblock) 3309 goto out2; 3310 ext_debug("allocate new block: goal %llu, found %llu/%u\n", 3311 ar.goal, newblock, allocated); 3312 3313 /* try to insert new extent into found leaf and return */ 3314 ext4_ext_store_pblock(&newex, newblock); 3315 newex.ee_len = cpu_to_le16(ar.len); 3316 /* Mark uninitialized */ 3317 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ 3318 ext4_ext_mark_uninitialized(&newex); 3319 /* 3320 * io_end structure was created for every IO write to an 3321 * uninitialized extent. To avoid unnecessary conversion, 3322 * here we flag the IO that really needs the conversion. 3323 * For non asycn direct IO case, flag the inode state 3324 * that we need to perform conversion when IO is done. 3325 */ 3326 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3327 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { 3328 io->flag = EXT4_IO_END_UNWRITTEN; 3329 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); 3330 } else 3331 ext4_set_inode_state(inode, 3332 EXT4_STATE_DIO_UNWRITTEN); 3333 } 3334 if (ext4_should_dioread_nolock(inode)) 3335 map->m_flags |= EXT4_MAP_UNINIT; 3336 } 3337 3338 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); 3339 if (err) 3340 goto out2; 3341 3342 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3343 if (err) { 3344 /* free data blocks we just allocated */ 3345 /* not a good idea to call discard here directly, 3346 * but otherwise we'd need to call it every free() */ 3347 ext4_discard_preallocations(inode); 3348 ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), 3349 ext4_ext_get_actual_len(&newex), 0); 3350 goto out2; 3351 } 3352 3353 /* previous routine could use block we allocated */ 3354 newblock = ext4_ext_pblock(&newex); 3355 allocated = ext4_ext_get_actual_len(&newex); 3356 if (allocated > map->m_len) 3357 allocated = map->m_len; 3358 map->m_flags |= EXT4_MAP_NEW; 3359 3360 /* 3361 * Update reserved blocks/metadata blocks after successful 3362 * block allocation which had been deferred till now. 3363 */ 3364 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 3365 ext4_da_update_reserve_space(inode, allocated, 1); 3366 3367 /* 3368 * Cache the extent and update transaction to commit on fdatasync only 3369 * when it is _not_ an uninitialized extent. 3370 */ 3371 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { 3372 ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock); 3373 ext4_update_inode_fsync_trans(handle, inode, 1); 3374 } else 3375 ext4_update_inode_fsync_trans(handle, inode, 0); 3376 out: 3377 if (allocated > map->m_len) 3378 allocated = map->m_len; 3379 ext4_ext_show_leaf(inode, path); 3380 map->m_flags |= EXT4_MAP_MAPPED; 3381 map->m_pblk = newblock; 3382 map->m_len = allocated; 3383 out2: 3384 if (path) { 3385 ext4_ext_drop_refs(path); 3386 kfree(path); 3387 } 3388 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, 3389 newblock, map->m_len, err ? err : allocated); 3390 return err ? err : allocated; 3391 } 3392 3393 void ext4_ext_truncate(struct inode *inode) 3394 { 3395 struct address_space *mapping = inode->i_mapping; 3396 struct super_block *sb = inode->i_sb; 3397 ext4_lblk_t last_block; 3398 handle_t *handle; 3399 int err = 0; 3400 3401 /* 3402 * finish any pending end_io work so we won't run the risk of 3403 * converting any truncated blocks to initialized later 3404 */ 3405 ext4_flush_completed_IO(inode); 3406 3407 /* 3408 * probably first extent we're gonna free will be last in block 3409 */ 3410 err = ext4_writepage_trans_blocks(inode); 3411 handle = ext4_journal_start(inode, err); 3412 if (IS_ERR(handle)) 3413 return; 3414 3415 if (inode->i_size & (sb->s_blocksize - 1)) 3416 ext4_block_truncate_page(handle, mapping, inode->i_size); 3417 3418 if (ext4_orphan_add(handle, inode)) 3419 goto out_stop; 3420 3421 down_write(&EXT4_I(inode)->i_data_sem); 3422 ext4_ext_invalidate_cache(inode); 3423 3424 ext4_discard_preallocations(inode); 3425 3426 /* 3427 * TODO: optimization is possible here. 3428 * Probably we need not scan at all, 3429 * because page truncation is enough. 3430 */ 3431 3432 /* we have to know where to truncate from in crash case */ 3433 EXT4_I(inode)->i_disksize = inode->i_size; 3434 ext4_mark_inode_dirty(handle, inode); 3435 3436 last_block = (inode->i_size + sb->s_blocksize - 1) 3437 >> EXT4_BLOCK_SIZE_BITS(sb); 3438 err = ext4_ext_remove_space(inode, last_block); 3439 3440 /* In a multi-transaction truncate, we only make the final 3441 * transaction synchronous. 3442 */ 3443 if (IS_SYNC(inode)) 3444 ext4_handle_sync(handle); 3445 3446 out_stop: 3447 up_write(&EXT4_I(inode)->i_data_sem); 3448 /* 3449 * If this was a simple ftruncate() and the file will remain alive, 3450 * then we need to clear up the orphan record which we created above. 3451 * However, if this was a real unlink then we were called by 3452 * ext4_delete_inode(), and we allow that function to clean up the 3453 * orphan info for us. 3454 */ 3455 if (inode->i_nlink) 3456 ext4_orphan_del(handle, inode); 3457 3458 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 3459 ext4_mark_inode_dirty(handle, inode); 3460 ext4_journal_stop(handle); 3461 } 3462 3463 static void ext4_falloc_update_inode(struct inode *inode, 3464 int mode, loff_t new_size, int update_ctime) 3465 { 3466 struct timespec now; 3467 3468 if (update_ctime) { 3469 now = current_fs_time(inode->i_sb); 3470 if (!timespec_equal(&inode->i_ctime, &now)) 3471 inode->i_ctime = now; 3472 } 3473 /* 3474 * Update only when preallocation was requested beyond 3475 * the file size. 3476 */ 3477 if (!(mode & FALLOC_FL_KEEP_SIZE)) { 3478 if (new_size > i_size_read(inode)) 3479 i_size_write(inode, new_size); 3480 if (new_size > EXT4_I(inode)->i_disksize) 3481 ext4_update_i_disksize(inode, new_size); 3482 } else { 3483 /* 3484 * Mark that we allocate beyond EOF so the subsequent truncate 3485 * can proceed even if the new size is the same as i_size. 3486 */ 3487 if (new_size > i_size_read(inode)) 3488 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 3489 } 3490 3491 } 3492 3493 /* 3494 * preallocate space for a file. This implements ext4's fallocate file 3495 * operation, which gets called from sys_fallocate system call. 3496 * For block-mapped files, posix_fallocate should fall back to the method 3497 * of writing zeroes to the required new blocks (the same behavior which is 3498 * expected for file systems which do not support fallocate() system call). 3499 */ 3500 long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) 3501 { 3502 struct inode *inode = file->f_path.dentry->d_inode; 3503 handle_t *handle; 3504 loff_t new_size; 3505 unsigned int max_blocks; 3506 int ret = 0; 3507 int ret2 = 0; 3508 int retries = 0; 3509 struct ext4_map_blocks map; 3510 unsigned int credits, blkbits = inode->i_blkbits; 3511 3512 /* We only support the FALLOC_FL_KEEP_SIZE mode */ 3513 if (mode & ~FALLOC_FL_KEEP_SIZE) 3514 return -EOPNOTSUPP; 3515 3516 /* 3517 * currently supporting (pre)allocate mode for extent-based 3518 * files _only_ 3519 */ 3520 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 3521 return -EOPNOTSUPP; 3522 3523 trace_ext4_fallocate_enter(inode, offset, len, mode); 3524 map.m_lblk = offset >> blkbits; 3525 /* 3526 * We can't just convert len to max_blocks because 3527 * If blocksize = 4096 offset = 3072 and len = 2048 3528 */ 3529 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 3530 - map.m_lblk; 3531 /* 3532 * credits to insert 1 extent into extent tree 3533 */ 3534 credits = ext4_chunk_trans_blocks(inode, max_blocks); 3535 mutex_lock(&inode->i_mutex); 3536 ret = inode_newsize_ok(inode, (len + offset)); 3537 if (ret) { 3538 mutex_unlock(&inode->i_mutex); 3539 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); 3540 return ret; 3541 } 3542 retry: 3543 while (ret >= 0 && ret < max_blocks) { 3544 map.m_lblk = map.m_lblk + ret; 3545 map.m_len = max_blocks = max_blocks - ret; 3546 handle = ext4_journal_start(inode, credits); 3547 if (IS_ERR(handle)) { 3548 ret = PTR_ERR(handle); 3549 break; 3550 } 3551 ret = ext4_map_blocks(handle, inode, &map, 3552 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); 3553 if (ret <= 0) { 3554 #ifdef EXT4FS_DEBUG 3555 WARN_ON(ret <= 0); 3556 printk(KERN_ERR "%s: ext4_ext_map_blocks " 3557 "returned error inode#%lu, block=%u, " 3558 "max_blocks=%u", __func__, 3559 inode->i_ino, map.m_lblk, max_blocks); 3560 #endif 3561 ext4_mark_inode_dirty(handle, inode); 3562 ret2 = ext4_journal_stop(handle); 3563 break; 3564 } 3565 if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len, 3566 blkbits) >> blkbits)) 3567 new_size = offset + len; 3568 else 3569 new_size = (map.m_lblk + ret) << blkbits; 3570 3571 ext4_falloc_update_inode(inode, mode, new_size, 3572 (map.m_flags & EXT4_MAP_NEW)); 3573 ext4_mark_inode_dirty(handle, inode); 3574 ret2 = ext4_journal_stop(handle); 3575 if (ret2) 3576 break; 3577 } 3578 if (ret == -ENOSPC && 3579 ext4_should_retry_alloc(inode->i_sb, &retries)) { 3580 ret = 0; 3581 goto retry; 3582 } 3583 mutex_unlock(&inode->i_mutex); 3584 trace_ext4_fallocate_exit(inode, offset, max_blocks, 3585 ret > 0 ? ret2 : ret); 3586 return ret > 0 ? ret2 : ret; 3587 } 3588 3589 /* 3590 * This function convert a range of blocks to written extents 3591 * The caller of this function will pass the start offset and the size. 3592 * all unwritten extents within this range will be converted to 3593 * written extents. 3594 * 3595 * This function is called from the direct IO end io call back 3596 * function, to convert the fallocated extents after IO is completed. 3597 * Returns 0 on success. 3598 */ 3599 int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, 3600 ssize_t len) 3601 { 3602 handle_t *handle; 3603 unsigned int max_blocks; 3604 int ret = 0; 3605 int ret2 = 0; 3606 struct ext4_map_blocks map; 3607 unsigned int credits, blkbits = inode->i_blkbits; 3608 3609 map.m_lblk = offset >> blkbits; 3610 /* 3611 * We can't just convert len to max_blocks because 3612 * If blocksize = 4096 offset = 3072 and len = 2048 3613 */ 3614 max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - 3615 map.m_lblk); 3616 /* 3617 * credits to insert 1 extent into extent tree 3618 */ 3619 credits = ext4_chunk_trans_blocks(inode, max_blocks); 3620 while (ret >= 0 && ret < max_blocks) { 3621 map.m_lblk += ret; 3622 map.m_len = (max_blocks -= ret); 3623 handle = ext4_journal_start(inode, credits); 3624 if (IS_ERR(handle)) { 3625 ret = PTR_ERR(handle); 3626 break; 3627 } 3628 ret = ext4_map_blocks(handle, inode, &map, 3629 EXT4_GET_BLOCKS_IO_CONVERT_EXT); 3630 if (ret <= 0) { 3631 WARN_ON(ret <= 0); 3632 printk(KERN_ERR "%s: ext4_ext_map_blocks " 3633 "returned error inode#%lu, block=%u, " 3634 "max_blocks=%u", __func__, 3635 inode->i_ino, map.m_lblk, map.m_len); 3636 } 3637 ext4_mark_inode_dirty(handle, inode); 3638 ret2 = ext4_journal_stop(handle); 3639 if (ret <= 0 || ret2 ) 3640 break; 3641 } 3642 return ret > 0 ? ret2 : ret; 3643 } 3644 3645 /* 3646 * Callback function called for each extent to gather FIEMAP information. 3647 */ 3648 static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, 3649 struct ext4_ext_cache *newex, struct ext4_extent *ex, 3650 void *data) 3651 { 3652 __u64 logical; 3653 __u64 physical; 3654 __u64 length; 3655 loff_t size; 3656 __u32 flags = 0; 3657 int ret = 0; 3658 struct fiemap_extent_info *fieinfo = data; 3659 unsigned char blksize_bits; 3660 3661 blksize_bits = inode->i_sb->s_blocksize_bits; 3662 logical = (__u64)newex->ec_block << blksize_bits; 3663 3664 if (newex->ec_start == 0) { 3665 /* 3666 * No extent in extent-tree contains block @newex->ec_start, 3667 * then the block may stay in 1)a hole or 2)delayed-extent. 3668 * 3669 * Holes or delayed-extents are processed as follows. 3670 * 1. lookup dirty pages with specified range in pagecache. 3671 * If no page is got, then there is no delayed-extent and 3672 * return with EXT_CONTINUE. 3673 * 2. find the 1st mapped buffer, 3674 * 3. check if the mapped buffer is both in the request range 3675 * and a delayed buffer. If not, there is no delayed-extent, 3676 * then return. 3677 * 4. a delayed-extent is found, the extent will be collected. 3678 */ 3679 ext4_lblk_t end = 0; 3680 pgoff_t last_offset; 3681 pgoff_t offset; 3682 pgoff_t index; 3683 struct page **pages = NULL; 3684 struct buffer_head *bh = NULL; 3685 struct buffer_head *head = NULL; 3686 unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *); 3687 3688 pages = kmalloc(PAGE_SIZE, GFP_KERNEL); 3689 if (pages == NULL) 3690 return -ENOMEM; 3691 3692 offset = logical >> PAGE_SHIFT; 3693 repeat: 3694 last_offset = offset; 3695 head = NULL; 3696 ret = find_get_pages_tag(inode->i_mapping, &offset, 3697 PAGECACHE_TAG_DIRTY, nr_pages, pages); 3698 3699 if (!(flags & FIEMAP_EXTENT_DELALLOC)) { 3700 /* First time, try to find a mapped buffer. */ 3701 if (ret == 0) { 3702 out: 3703 for (index = 0; index < ret; index++) 3704 page_cache_release(pages[index]); 3705 /* just a hole. */ 3706 kfree(pages); 3707 return EXT_CONTINUE; 3708 } 3709 3710 /* Try to find the 1st mapped buffer. */ 3711 end = ((__u64)pages[0]->index << PAGE_SHIFT) >> 3712 blksize_bits; 3713 if (!page_has_buffers(pages[0])) 3714 goto out; 3715 head = page_buffers(pages[0]); 3716 if (!head) 3717 goto out; 3718 3719 bh = head; 3720 do { 3721 if (buffer_mapped(bh)) { 3722 /* get the 1st mapped buffer. */ 3723 if (end > newex->ec_block + 3724 newex->ec_len) 3725 /* The buffer is out of 3726 * the request range. 3727 */ 3728 goto out; 3729 goto found_mapped_buffer; 3730 } 3731 bh = bh->b_this_page; 3732 end++; 3733 } while (bh != head); 3734 3735 /* No mapped buffer found. */ 3736 goto out; 3737 } else { 3738 /*Find contiguous delayed buffers. */ 3739 if (ret > 0 && pages[0]->index == last_offset) 3740 head = page_buffers(pages[0]); 3741 bh = head; 3742 } 3743 3744 found_mapped_buffer: 3745 if (bh != NULL && buffer_delay(bh)) { 3746 /* 1st or contiguous delayed buffer found. */ 3747 if (!(flags & FIEMAP_EXTENT_DELALLOC)) { 3748 /* 3749 * 1st delayed buffer found, record 3750 * the start of extent. 3751 */ 3752 flags |= FIEMAP_EXTENT_DELALLOC; 3753 newex->ec_block = end; 3754 logical = (__u64)end << blksize_bits; 3755 } 3756 /* Find contiguous delayed buffers. */ 3757 do { 3758 if (!buffer_delay(bh)) 3759 goto found_delayed_extent; 3760 bh = bh->b_this_page; 3761 end++; 3762 } while (bh != head); 3763 3764 for (index = 1; index < ret; index++) { 3765 if (!page_has_buffers(pages[index])) { 3766 bh = NULL; 3767 break; 3768 } 3769 head = page_buffers(pages[index]); 3770 if (!head) { 3771 bh = NULL; 3772 break; 3773 } 3774 if (pages[index]->index != 3775 pages[0]->index + index) { 3776 /* Blocks are not contiguous. */ 3777 bh = NULL; 3778 break; 3779 } 3780 bh = head; 3781 do { 3782 if (!buffer_delay(bh)) 3783 /* Delayed-extent ends. */ 3784 goto found_delayed_extent; 3785 bh = bh->b_this_page; 3786 end++; 3787 } while (bh != head); 3788 } 3789 } else if (!(flags & FIEMAP_EXTENT_DELALLOC)) 3790 /* a hole found. */ 3791 goto out; 3792 3793 found_delayed_extent: 3794 newex->ec_len = min(end - newex->ec_block, 3795 (ext4_lblk_t)EXT_INIT_MAX_LEN); 3796 if (ret == nr_pages && bh != NULL && 3797 newex->ec_len < EXT_INIT_MAX_LEN && 3798 buffer_delay(bh)) { 3799 /* Have not collected an extent and continue. */ 3800 for (index = 0; index < ret; index++) 3801 page_cache_release(pages[index]); 3802 goto repeat; 3803 } 3804 3805 for (index = 0; index < ret; index++) 3806 page_cache_release(pages[index]); 3807 kfree(pages); 3808 } 3809 3810 physical = (__u64)newex->ec_start << blksize_bits; 3811 length = (__u64)newex->ec_len << blksize_bits; 3812 3813 if (ex && ext4_ext_is_uninitialized(ex)) 3814 flags |= FIEMAP_EXTENT_UNWRITTEN; 3815 3816 size = i_size_read(inode); 3817 if (logical + length >= size) 3818 flags |= FIEMAP_EXTENT_LAST; 3819 3820 ret = fiemap_fill_next_extent(fieinfo, logical, physical, 3821 length, flags); 3822 if (ret < 0) 3823 return ret; 3824 if (ret == 1) 3825 return EXT_BREAK; 3826 return EXT_CONTINUE; 3827 } 3828 3829 /* fiemap flags we can handle specified here */ 3830 #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) 3831 3832 static int ext4_xattr_fiemap(struct inode *inode, 3833 struct fiemap_extent_info *fieinfo) 3834 { 3835 __u64 physical = 0; 3836 __u64 length; 3837 __u32 flags = FIEMAP_EXTENT_LAST; 3838 int blockbits = inode->i_sb->s_blocksize_bits; 3839 int error = 0; 3840 3841 /* in-inode? */ 3842 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { 3843 struct ext4_iloc iloc; 3844 int offset; /* offset of xattr in inode */ 3845 3846 error = ext4_get_inode_loc(inode, &iloc); 3847 if (error) 3848 return error; 3849 physical = iloc.bh->b_blocknr << blockbits; 3850 offset = EXT4_GOOD_OLD_INODE_SIZE + 3851 EXT4_I(inode)->i_extra_isize; 3852 physical += offset; 3853 length = EXT4_SB(inode->i_sb)->s_inode_size - offset; 3854 flags |= FIEMAP_EXTENT_DATA_INLINE; 3855 brelse(iloc.bh); 3856 } else { /* external block */ 3857 physical = EXT4_I(inode)->i_file_acl << blockbits; 3858 length = inode->i_sb->s_blocksize; 3859 } 3860 3861 if (physical) 3862 error = fiemap_fill_next_extent(fieinfo, 0, physical, 3863 length, flags); 3864 return (error < 0 ? error : 0); 3865 } 3866 3867 int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 3868 __u64 start, __u64 len) 3869 { 3870 ext4_lblk_t start_blk; 3871 int error = 0; 3872 3873 /* fallback to generic here if not in extents fmt */ 3874 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 3875 return generic_block_fiemap(inode, fieinfo, start, len, 3876 ext4_get_block); 3877 3878 if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) 3879 return -EBADR; 3880 3881 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { 3882 error = ext4_xattr_fiemap(inode, fieinfo); 3883 } else { 3884 ext4_lblk_t len_blks; 3885 __u64 last_blk; 3886 3887 start_blk = start >> inode->i_sb->s_blocksize_bits; 3888 last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; 3889 if (last_blk >= EXT_MAX_BLOCK) 3890 last_blk = EXT_MAX_BLOCK-1; 3891 len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; 3892 3893 /* 3894 * Walk the extent tree gathering extent information. 3895 * ext4_ext_fiemap_cb will push extents back to user. 3896 */ 3897 error = ext4_ext_walk_space(inode, start_blk, len_blks, 3898 ext4_ext_fiemap_cb, fieinfo); 3899 } 3900 3901 return error; 3902 } 3903 3904