1 /* 2 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com 3 * Written by Alex Tomas <alex@clusterfs.com> 4 * 5 * Architecture independence: 6 * Copyright (c) 2005, Bull S.A. 7 * Written by Pierre Peiffer <pierre.peiffer@bull.net> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public Licens 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- 21 */ 22 23 /* 24 * Extents support for EXT4 25 * 26 * TODO: 27 * - ext4*_error() should be used in some situations 28 * - analyze all BUG()/BUG_ON(), use -EIO where appropriate 29 * - smart tree reduction 30 */ 31 32 #include <linux/module.h> 33 #include <linux/fs.h> 34 #include <linux/time.h> 35 #include <linux/jbd2.h> 36 #include <linux/highuid.h> 37 #include <linux/pagemap.h> 38 #include <linux/quotaops.h> 39 #include <linux/string.h> 40 #include <linux/slab.h> 41 #include <linux/falloc.h> 42 #include <asm/uaccess.h> 43 #include <linux/fiemap.h> 44 #include "ext4_jbd2.h" 45 #include "ext4_extents.h" 46 47 static int ext4_ext_truncate_extend_restart(handle_t *handle, 48 struct inode *inode, 49 int needed) 50 { 51 int err; 52 53 if (!ext4_handle_valid(handle)) 54 return 0; 55 if (handle->h_buffer_credits > needed) 56 return 0; 57 err = ext4_journal_extend(handle, needed); 58 if (err <= 0) 59 return err; 60 err = ext4_truncate_restart_trans(handle, inode, needed); 61 if (err == 0) 62 err = -EAGAIN; 63 64 return err; 65 } 66 67 /* 68 * could return: 69 * - EROFS 70 * - ENOMEM 71 */ 72 static int ext4_ext_get_access(handle_t *handle, struct inode *inode, 73 struct ext4_ext_path *path) 74 { 75 if (path->p_bh) { 76 /* path points to block */ 77 return ext4_journal_get_write_access(handle, path->p_bh); 78 } 79 /* path points to leaf/index in inode body */ 80 /* we use in-core data, no need to protect them */ 81 return 0; 82 } 83 84 /* 85 * could return: 86 * - EROFS 87 * - ENOMEM 88 * - EIO 89 */ 90 static int ext4_ext_dirty(handle_t *handle, struct inode *inode, 91 struct ext4_ext_path *path) 92 { 93 int err; 94 if (path->p_bh) { 95 /* path points to block */ 96 err = ext4_handle_dirty_metadata(handle, inode, path->p_bh); 97 } else { 98 /* path points to leaf/index in inode body */ 99 err = ext4_mark_inode_dirty(handle, inode); 100 } 101 return err; 102 } 103 104 static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, 105 struct ext4_ext_path *path, 106 ext4_lblk_t block) 107 { 108 struct ext4_inode_info *ei = EXT4_I(inode); 109 ext4_fsblk_t bg_start; 110 ext4_fsblk_t last_block; 111 ext4_grpblk_t colour; 112 ext4_group_t block_group; 113 int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); 114 int depth; 115 116 if (path) { 117 struct ext4_extent *ex; 118 depth = path->p_depth; 119 120 /* 121 * Try to predict block placement assuming that we are 122 * filling in a file which will eventually be 123 * non-sparse --- i.e., in the case of libbfd writing 124 * an ELF object sections out-of-order but in a way 125 * the eventually results in a contiguous object or 126 * executable file, or some database extending a table 127 * space file. However, this is actually somewhat 128 * non-ideal if we are writing a sparse file such as 129 * qemu or KVM writing a raw image file that is going 130 * to stay fairly sparse, since it will end up 131 * fragmenting the file system's free space. Maybe we 132 * should have some hueristics or some way to allow 133 * userspace to pass a hint to file system, 134 * especiially if the latter case turns out to be 135 * common. 136 */ 137 ex = path[depth].p_ext; 138 if (ex) { 139 ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex); 140 ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block); 141 142 if (block > ext_block) 143 return ext_pblk + (block - ext_block); 144 else 145 return ext_pblk - (ext_block - block); 146 } 147 148 /* it looks like index is empty; 149 * try to find starting block from index itself */ 150 if (path[depth].p_bh) 151 return path[depth].p_bh->b_blocknr; 152 } 153 154 /* OK. use inode's group */ 155 block_group = ei->i_block_group; 156 if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { 157 /* 158 * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 159 * block groups per flexgroup, reserve the first block 160 * group for directories and special files. Regular 161 * files will start at the second block group. This 162 * tends to speed up directory access and improves 163 * fsck times. 164 */ 165 block_group &= ~(flex_size-1); 166 if (S_ISREG(inode->i_mode)) 167 block_group++; 168 } 169 bg_start = ext4_group_first_block_no(inode->i_sb, block_group); 170 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; 171 172 /* 173 * If we are doing delayed allocation, we don't need take 174 * colour into account. 175 */ 176 if (test_opt(inode->i_sb, DELALLOC)) 177 return bg_start; 178 179 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) 180 colour = (current->pid % 16) * 181 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); 182 else 183 colour = (current->pid % 16) * ((last_block - bg_start) / 16); 184 return bg_start + colour + block; 185 } 186 187 /* 188 * Allocation for a meta data block 189 */ 190 static ext4_fsblk_t 191 ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, 192 struct ext4_ext_path *path, 193 struct ext4_extent *ex, int *err) 194 { 195 ext4_fsblk_t goal, newblock; 196 197 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); 198 newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err); 199 return newblock; 200 } 201 202 static inline int ext4_ext_space_block(struct inode *inode, int check) 203 { 204 int size; 205 206 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 207 / sizeof(struct ext4_extent); 208 if (!check) { 209 #ifdef AGGRESSIVE_TEST 210 if (size > 6) 211 size = 6; 212 #endif 213 } 214 return size; 215 } 216 217 static inline int ext4_ext_space_block_idx(struct inode *inode, int check) 218 { 219 int size; 220 221 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 222 / sizeof(struct ext4_extent_idx); 223 if (!check) { 224 #ifdef AGGRESSIVE_TEST 225 if (size > 5) 226 size = 5; 227 #endif 228 } 229 return size; 230 } 231 232 static inline int ext4_ext_space_root(struct inode *inode, int check) 233 { 234 int size; 235 236 size = sizeof(EXT4_I(inode)->i_data); 237 size -= sizeof(struct ext4_extent_header); 238 size /= sizeof(struct ext4_extent); 239 if (!check) { 240 #ifdef AGGRESSIVE_TEST 241 if (size > 3) 242 size = 3; 243 #endif 244 } 245 return size; 246 } 247 248 static inline int ext4_ext_space_root_idx(struct inode *inode, int check) 249 { 250 int size; 251 252 size = sizeof(EXT4_I(inode)->i_data); 253 size -= sizeof(struct ext4_extent_header); 254 size /= sizeof(struct ext4_extent_idx); 255 if (!check) { 256 #ifdef AGGRESSIVE_TEST 257 if (size > 4) 258 size = 4; 259 #endif 260 } 261 return size; 262 } 263 264 /* 265 * Calculate the number of metadata blocks needed 266 * to allocate @blocks 267 * Worse case is one block per extent 268 */ 269 int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) 270 { 271 struct ext4_inode_info *ei = EXT4_I(inode); 272 int idxs, num = 0; 273 274 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 275 / sizeof(struct ext4_extent_idx)); 276 277 /* 278 * If the new delayed allocation block is contiguous with the 279 * previous da block, it can share index blocks with the 280 * previous block, so we only need to allocate a new index 281 * block every idxs leaf blocks. At ldxs**2 blocks, we need 282 * an additional index block, and at ldxs**3 blocks, yet 283 * another index blocks. 284 */ 285 if (ei->i_da_metadata_calc_len && 286 ei->i_da_metadata_calc_last_lblock+1 == lblock) { 287 if ((ei->i_da_metadata_calc_len % idxs) == 0) 288 num++; 289 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) 290 num++; 291 if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) { 292 num++; 293 ei->i_da_metadata_calc_len = 0; 294 } else 295 ei->i_da_metadata_calc_len++; 296 ei->i_da_metadata_calc_last_lblock++; 297 return num; 298 } 299 300 /* 301 * In the worst case we need a new set of index blocks at 302 * every level of the inode's extent tree. 303 */ 304 ei->i_da_metadata_calc_len = 1; 305 ei->i_da_metadata_calc_last_lblock = lblock; 306 return ext_depth(inode) + 1; 307 } 308 309 static int 310 ext4_ext_max_entries(struct inode *inode, int depth) 311 { 312 int max; 313 314 if (depth == ext_depth(inode)) { 315 if (depth == 0) 316 max = ext4_ext_space_root(inode, 1); 317 else 318 max = ext4_ext_space_root_idx(inode, 1); 319 } else { 320 if (depth == 0) 321 max = ext4_ext_space_block(inode, 1); 322 else 323 max = ext4_ext_space_block_idx(inode, 1); 324 } 325 326 return max; 327 } 328 329 static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) 330 { 331 ext4_fsblk_t block = ext4_ext_pblock(ext); 332 int len = ext4_ext_get_actual_len(ext); 333 334 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); 335 } 336 337 static int ext4_valid_extent_idx(struct inode *inode, 338 struct ext4_extent_idx *ext_idx) 339 { 340 ext4_fsblk_t block = ext4_idx_pblock(ext_idx); 341 342 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); 343 } 344 345 static int ext4_valid_extent_entries(struct inode *inode, 346 struct ext4_extent_header *eh, 347 int depth) 348 { 349 struct ext4_extent *ext; 350 struct ext4_extent_idx *ext_idx; 351 unsigned short entries; 352 if (eh->eh_entries == 0) 353 return 1; 354 355 entries = le16_to_cpu(eh->eh_entries); 356 357 if (depth == 0) { 358 /* leaf entries */ 359 ext = EXT_FIRST_EXTENT(eh); 360 while (entries) { 361 if (!ext4_valid_extent(inode, ext)) 362 return 0; 363 ext++; 364 entries--; 365 } 366 } else { 367 ext_idx = EXT_FIRST_INDEX(eh); 368 while (entries) { 369 if (!ext4_valid_extent_idx(inode, ext_idx)) 370 return 0; 371 ext_idx++; 372 entries--; 373 } 374 } 375 return 1; 376 } 377 378 static int __ext4_ext_check(const char *function, unsigned int line, 379 struct inode *inode, struct ext4_extent_header *eh, 380 int depth) 381 { 382 const char *error_msg; 383 int max = 0; 384 385 if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) { 386 error_msg = "invalid magic"; 387 goto corrupted; 388 } 389 if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) { 390 error_msg = "unexpected eh_depth"; 391 goto corrupted; 392 } 393 if (unlikely(eh->eh_max == 0)) { 394 error_msg = "invalid eh_max"; 395 goto corrupted; 396 } 397 max = ext4_ext_max_entries(inode, depth); 398 if (unlikely(le16_to_cpu(eh->eh_max) > max)) { 399 error_msg = "too large eh_max"; 400 goto corrupted; 401 } 402 if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) { 403 error_msg = "invalid eh_entries"; 404 goto corrupted; 405 } 406 if (!ext4_valid_extent_entries(inode, eh, depth)) { 407 error_msg = "invalid extent entries"; 408 goto corrupted; 409 } 410 return 0; 411 412 corrupted: 413 ext4_error_inode(inode, function, line, 0, 414 "bad header/extent: %s - magic %x, " 415 "entries %u, max %u(%u), depth %u(%u)", 416 error_msg, le16_to_cpu(eh->eh_magic), 417 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), 418 max, le16_to_cpu(eh->eh_depth), depth); 419 420 return -EIO; 421 } 422 423 #define ext4_ext_check(inode, eh, depth) \ 424 __ext4_ext_check(__func__, __LINE__, inode, eh, depth) 425 426 int ext4_ext_check_inode(struct inode *inode) 427 { 428 return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode)); 429 } 430 431 #ifdef EXT_DEBUG 432 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) 433 { 434 int k, l = path->p_depth; 435 436 ext_debug("path:"); 437 for (k = 0; k <= l; k++, path++) { 438 if (path->p_idx) { 439 ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), 440 ext4_idx_pblock(path->p_idx)); 441 } else if (path->p_ext) { 442 ext_debug(" %d:[%d]%d:%llu ", 443 le32_to_cpu(path->p_ext->ee_block), 444 ext4_ext_is_uninitialized(path->p_ext), 445 ext4_ext_get_actual_len(path->p_ext), 446 ext4_ext_pblock(path->p_ext)); 447 } else 448 ext_debug(" []"); 449 } 450 ext_debug("\n"); 451 } 452 453 static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) 454 { 455 int depth = ext_depth(inode); 456 struct ext4_extent_header *eh; 457 struct ext4_extent *ex; 458 int i; 459 460 if (!path) 461 return; 462 463 eh = path[depth].p_hdr; 464 ex = EXT_FIRST_EXTENT(eh); 465 466 ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino); 467 468 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { 469 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), 470 ext4_ext_is_uninitialized(ex), 471 ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); 472 } 473 ext_debug("\n"); 474 } 475 #else 476 #define ext4_ext_show_path(inode, path) 477 #define ext4_ext_show_leaf(inode, path) 478 #endif 479 480 void ext4_ext_drop_refs(struct ext4_ext_path *path) 481 { 482 int depth = path->p_depth; 483 int i; 484 485 for (i = 0; i <= depth; i++, path++) 486 if (path->p_bh) { 487 brelse(path->p_bh); 488 path->p_bh = NULL; 489 } 490 } 491 492 /* 493 * ext4_ext_binsearch_idx: 494 * binary search for the closest index of the given block 495 * the header must be checked before calling this 496 */ 497 static void 498 ext4_ext_binsearch_idx(struct inode *inode, 499 struct ext4_ext_path *path, ext4_lblk_t block) 500 { 501 struct ext4_extent_header *eh = path->p_hdr; 502 struct ext4_extent_idx *r, *l, *m; 503 504 505 ext_debug("binsearch for %u(idx): ", block); 506 507 l = EXT_FIRST_INDEX(eh) + 1; 508 r = EXT_LAST_INDEX(eh); 509 while (l <= r) { 510 m = l + (r - l) / 2; 511 if (block < le32_to_cpu(m->ei_block)) 512 r = m - 1; 513 else 514 l = m + 1; 515 ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block), 516 m, le32_to_cpu(m->ei_block), 517 r, le32_to_cpu(r->ei_block)); 518 } 519 520 path->p_idx = l - 1; 521 ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), 522 ext4_idx_pblock(path->p_idx)); 523 524 #ifdef CHECK_BINSEARCH 525 { 526 struct ext4_extent_idx *chix, *ix; 527 int k; 528 529 chix = ix = EXT_FIRST_INDEX(eh); 530 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { 531 if (k != 0 && 532 le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { 533 printk(KERN_DEBUG "k=%d, ix=0x%p, " 534 "first=0x%p\n", k, 535 ix, EXT_FIRST_INDEX(eh)); 536 printk(KERN_DEBUG "%u <= %u\n", 537 le32_to_cpu(ix->ei_block), 538 le32_to_cpu(ix[-1].ei_block)); 539 } 540 BUG_ON(k && le32_to_cpu(ix->ei_block) 541 <= le32_to_cpu(ix[-1].ei_block)); 542 if (block < le32_to_cpu(ix->ei_block)) 543 break; 544 chix = ix; 545 } 546 BUG_ON(chix != path->p_idx); 547 } 548 #endif 549 550 } 551 552 /* 553 * ext4_ext_binsearch: 554 * binary search for closest extent of the given block 555 * the header must be checked before calling this 556 */ 557 static void 558 ext4_ext_binsearch(struct inode *inode, 559 struct ext4_ext_path *path, ext4_lblk_t block) 560 { 561 struct ext4_extent_header *eh = path->p_hdr; 562 struct ext4_extent *r, *l, *m; 563 564 if (eh->eh_entries == 0) { 565 /* 566 * this leaf is empty: 567 * we get such a leaf in split/add case 568 */ 569 return; 570 } 571 572 ext_debug("binsearch for %u: ", block); 573 574 l = EXT_FIRST_EXTENT(eh) + 1; 575 r = EXT_LAST_EXTENT(eh); 576 577 while (l <= r) { 578 m = l + (r - l) / 2; 579 if (block < le32_to_cpu(m->ee_block)) 580 r = m - 1; 581 else 582 l = m + 1; 583 ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block), 584 m, le32_to_cpu(m->ee_block), 585 r, le32_to_cpu(r->ee_block)); 586 } 587 588 path->p_ext = l - 1; 589 ext_debug(" -> %d:%llu:[%d]%d ", 590 le32_to_cpu(path->p_ext->ee_block), 591 ext4_ext_pblock(path->p_ext), 592 ext4_ext_is_uninitialized(path->p_ext), 593 ext4_ext_get_actual_len(path->p_ext)); 594 595 #ifdef CHECK_BINSEARCH 596 { 597 struct ext4_extent *chex, *ex; 598 int k; 599 600 chex = ex = EXT_FIRST_EXTENT(eh); 601 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) { 602 BUG_ON(k && le32_to_cpu(ex->ee_block) 603 <= le32_to_cpu(ex[-1].ee_block)); 604 if (block < le32_to_cpu(ex->ee_block)) 605 break; 606 chex = ex; 607 } 608 BUG_ON(chex != path->p_ext); 609 } 610 #endif 611 612 } 613 614 int ext4_ext_tree_init(handle_t *handle, struct inode *inode) 615 { 616 struct ext4_extent_header *eh; 617 618 eh = ext_inode_hdr(inode); 619 eh->eh_depth = 0; 620 eh->eh_entries = 0; 621 eh->eh_magic = EXT4_EXT_MAGIC; 622 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); 623 ext4_mark_inode_dirty(handle, inode); 624 ext4_ext_invalidate_cache(inode); 625 return 0; 626 } 627 628 struct ext4_ext_path * 629 ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, 630 struct ext4_ext_path *path) 631 { 632 struct ext4_extent_header *eh; 633 struct buffer_head *bh; 634 short int depth, i, ppos = 0, alloc = 0; 635 636 eh = ext_inode_hdr(inode); 637 depth = ext_depth(inode); 638 639 /* account possible depth increase */ 640 if (!path) { 641 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), 642 GFP_NOFS); 643 if (!path) 644 return ERR_PTR(-ENOMEM); 645 alloc = 1; 646 } 647 path[0].p_hdr = eh; 648 path[0].p_bh = NULL; 649 650 i = depth; 651 /* walk through the tree */ 652 while (i) { 653 int need_to_validate = 0; 654 655 ext_debug("depth %d: num %d, max %d\n", 656 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 657 658 ext4_ext_binsearch_idx(inode, path + ppos, block); 659 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); 660 path[ppos].p_depth = i; 661 path[ppos].p_ext = NULL; 662 663 bh = sb_getblk(inode->i_sb, path[ppos].p_block); 664 if (unlikely(!bh)) 665 goto err; 666 if (!bh_uptodate_or_lock(bh)) { 667 if (bh_submit_read(bh) < 0) { 668 put_bh(bh); 669 goto err; 670 } 671 /* validate the extent entries */ 672 need_to_validate = 1; 673 } 674 eh = ext_block_hdr(bh); 675 ppos++; 676 if (unlikely(ppos > depth)) { 677 put_bh(bh); 678 EXT4_ERROR_INODE(inode, 679 "ppos %d > depth %d", ppos, depth); 680 goto err; 681 } 682 path[ppos].p_bh = bh; 683 path[ppos].p_hdr = eh; 684 i--; 685 686 if (need_to_validate && ext4_ext_check(inode, eh, i)) 687 goto err; 688 } 689 690 path[ppos].p_depth = i; 691 path[ppos].p_ext = NULL; 692 path[ppos].p_idx = NULL; 693 694 /* find extent */ 695 ext4_ext_binsearch(inode, path + ppos, block); 696 /* if not an empty leaf */ 697 if (path[ppos].p_ext) 698 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); 699 700 ext4_ext_show_path(inode, path); 701 702 return path; 703 704 err: 705 ext4_ext_drop_refs(path); 706 if (alloc) 707 kfree(path); 708 return ERR_PTR(-EIO); 709 } 710 711 /* 712 * ext4_ext_insert_index: 713 * insert new index [@logical;@ptr] into the block at @curp; 714 * check where to insert: before @curp or after @curp 715 */ 716 static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, 717 struct ext4_ext_path *curp, 718 int logical, ext4_fsblk_t ptr) 719 { 720 struct ext4_extent_idx *ix; 721 int len, err; 722 723 err = ext4_ext_get_access(handle, inode, curp); 724 if (err) 725 return err; 726 727 if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) { 728 EXT4_ERROR_INODE(inode, 729 "logical %d == ei_block %d!", 730 logical, le32_to_cpu(curp->p_idx->ei_block)); 731 return -EIO; 732 } 733 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; 734 if (logical > le32_to_cpu(curp->p_idx->ei_block)) { 735 /* insert after */ 736 if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { 737 len = (len - 1) * sizeof(struct ext4_extent_idx); 738 len = len < 0 ? 0 : len; 739 ext_debug("insert new index %d after: %llu. " 740 "move %d from 0x%p to 0x%p\n", 741 logical, ptr, len, 742 (curp->p_idx + 1), (curp->p_idx + 2)); 743 memmove(curp->p_idx + 2, curp->p_idx + 1, len); 744 } 745 ix = curp->p_idx + 1; 746 } else { 747 /* insert before */ 748 len = len * sizeof(struct ext4_extent_idx); 749 len = len < 0 ? 0 : len; 750 ext_debug("insert new index %d before: %llu. " 751 "move %d from 0x%p to 0x%p\n", 752 logical, ptr, len, 753 curp->p_idx, (curp->p_idx + 1)); 754 memmove(curp->p_idx + 1, curp->p_idx, len); 755 ix = curp->p_idx; 756 } 757 758 ix->ei_block = cpu_to_le32(logical); 759 ext4_idx_store_pblock(ix, ptr); 760 le16_add_cpu(&curp->p_hdr->eh_entries, 1); 761 762 if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) 763 > le16_to_cpu(curp->p_hdr->eh_max))) { 764 EXT4_ERROR_INODE(inode, 765 "logical %d == ei_block %d!", 766 logical, le32_to_cpu(curp->p_idx->ei_block)); 767 return -EIO; 768 } 769 if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { 770 EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); 771 return -EIO; 772 } 773 774 err = ext4_ext_dirty(handle, inode, curp); 775 ext4_std_error(inode->i_sb, err); 776 777 return err; 778 } 779 780 /* 781 * ext4_ext_split: 782 * inserts new subtree into the path, using free index entry 783 * at depth @at: 784 * - allocates all needed blocks (new leaf and all intermediate index blocks) 785 * - makes decision where to split 786 * - moves remaining extents and index entries (right to the split point) 787 * into the newly allocated blocks 788 * - initializes subtree 789 */ 790 static int ext4_ext_split(handle_t *handle, struct inode *inode, 791 struct ext4_ext_path *path, 792 struct ext4_extent *newext, int at) 793 { 794 struct buffer_head *bh = NULL; 795 int depth = ext_depth(inode); 796 struct ext4_extent_header *neh; 797 struct ext4_extent_idx *fidx; 798 struct ext4_extent *ex; 799 int i = at, k, m, a; 800 ext4_fsblk_t newblock, oldblock; 801 __le32 border; 802 ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ 803 int err = 0; 804 805 /* make decision: where to split? */ 806 /* FIXME: now decision is simplest: at current extent */ 807 808 /* if current leaf will be split, then we should use 809 * border from split point */ 810 if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) { 811 EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!"); 812 return -EIO; 813 } 814 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { 815 border = path[depth].p_ext[1].ee_block; 816 ext_debug("leaf will be split." 817 " next leaf starts at %d\n", 818 le32_to_cpu(border)); 819 } else { 820 border = newext->ee_block; 821 ext_debug("leaf will be added." 822 " next leaf starts at %d\n", 823 le32_to_cpu(border)); 824 } 825 826 /* 827 * If error occurs, then we break processing 828 * and mark filesystem read-only. index won't 829 * be inserted and tree will be in consistent 830 * state. Next mount will repair buffers too. 831 */ 832 833 /* 834 * Get array to track all allocated blocks. 835 * We need this to handle errors and free blocks 836 * upon them. 837 */ 838 ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS); 839 if (!ablocks) 840 return -ENOMEM; 841 842 /* allocate all needed blocks */ 843 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); 844 for (a = 0; a < depth - at; a++) { 845 newblock = ext4_ext_new_meta_block(handle, inode, path, 846 newext, &err); 847 if (newblock == 0) 848 goto cleanup; 849 ablocks[a] = newblock; 850 } 851 852 /* initialize new leaf */ 853 newblock = ablocks[--a]; 854 if (unlikely(newblock == 0)) { 855 EXT4_ERROR_INODE(inode, "newblock == 0!"); 856 err = -EIO; 857 goto cleanup; 858 } 859 bh = sb_getblk(inode->i_sb, newblock); 860 if (!bh) { 861 err = -EIO; 862 goto cleanup; 863 } 864 lock_buffer(bh); 865 866 err = ext4_journal_get_create_access(handle, bh); 867 if (err) 868 goto cleanup; 869 870 neh = ext_block_hdr(bh); 871 neh->eh_entries = 0; 872 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); 873 neh->eh_magic = EXT4_EXT_MAGIC; 874 neh->eh_depth = 0; 875 ex = EXT_FIRST_EXTENT(neh); 876 877 /* move remainder of path[depth] to the new leaf */ 878 if (unlikely(path[depth].p_hdr->eh_entries != 879 path[depth].p_hdr->eh_max)) { 880 EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!", 881 path[depth].p_hdr->eh_entries, 882 path[depth].p_hdr->eh_max); 883 err = -EIO; 884 goto cleanup; 885 } 886 /* start copy from next extent */ 887 /* TODO: we could do it by single memmove */ 888 m = 0; 889 path[depth].p_ext++; 890 while (path[depth].p_ext <= 891 EXT_MAX_EXTENT(path[depth].p_hdr)) { 892 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", 893 le32_to_cpu(path[depth].p_ext->ee_block), 894 ext4_ext_pblock(path[depth].p_ext), 895 ext4_ext_is_uninitialized(path[depth].p_ext), 896 ext4_ext_get_actual_len(path[depth].p_ext), 897 newblock); 898 /*memmove(ex++, path[depth].p_ext++, 899 sizeof(struct ext4_extent)); 900 neh->eh_entries++;*/ 901 path[depth].p_ext++; 902 m++; 903 } 904 if (m) { 905 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); 906 le16_add_cpu(&neh->eh_entries, m); 907 } 908 909 set_buffer_uptodate(bh); 910 unlock_buffer(bh); 911 912 err = ext4_handle_dirty_metadata(handle, inode, bh); 913 if (err) 914 goto cleanup; 915 brelse(bh); 916 bh = NULL; 917 918 /* correct old leaf */ 919 if (m) { 920 err = ext4_ext_get_access(handle, inode, path + depth); 921 if (err) 922 goto cleanup; 923 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m); 924 err = ext4_ext_dirty(handle, inode, path + depth); 925 if (err) 926 goto cleanup; 927 928 } 929 930 /* create intermediate indexes */ 931 k = depth - at - 1; 932 if (unlikely(k < 0)) { 933 EXT4_ERROR_INODE(inode, "k %d < 0!", k); 934 err = -EIO; 935 goto cleanup; 936 } 937 if (k) 938 ext_debug("create %d intermediate indices\n", k); 939 /* insert new index into current index block */ 940 /* current depth stored in i var */ 941 i = depth - 1; 942 while (k--) { 943 oldblock = newblock; 944 newblock = ablocks[--a]; 945 bh = sb_getblk(inode->i_sb, newblock); 946 if (!bh) { 947 err = -EIO; 948 goto cleanup; 949 } 950 lock_buffer(bh); 951 952 err = ext4_journal_get_create_access(handle, bh); 953 if (err) 954 goto cleanup; 955 956 neh = ext_block_hdr(bh); 957 neh->eh_entries = cpu_to_le16(1); 958 neh->eh_magic = EXT4_EXT_MAGIC; 959 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); 960 neh->eh_depth = cpu_to_le16(depth - i); 961 fidx = EXT_FIRST_INDEX(neh); 962 fidx->ei_block = border; 963 ext4_idx_store_pblock(fidx, oldblock); 964 965 ext_debug("int.index at %d (block %llu): %u -> %llu\n", 966 i, newblock, le32_to_cpu(border), oldblock); 967 /* copy indexes */ 968 m = 0; 969 path[i].p_idx++; 970 971 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, 972 EXT_MAX_INDEX(path[i].p_hdr)); 973 if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != 974 EXT_LAST_INDEX(path[i].p_hdr))) { 975 EXT4_ERROR_INODE(inode, 976 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!", 977 le32_to_cpu(path[i].p_ext->ee_block)); 978 err = -EIO; 979 goto cleanup; 980 } 981 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { 982 ext_debug("%d: move %d:%llu in new index %llu\n", i, 983 le32_to_cpu(path[i].p_idx->ei_block), 984 ext4_idx_pblock(path[i].p_idx), 985 newblock); 986 /*memmove(++fidx, path[i].p_idx++, 987 sizeof(struct ext4_extent_idx)); 988 neh->eh_entries++; 989 BUG_ON(neh->eh_entries > neh->eh_max);*/ 990 path[i].p_idx++; 991 m++; 992 } 993 if (m) { 994 memmove(++fidx, path[i].p_idx - m, 995 sizeof(struct ext4_extent_idx) * m); 996 le16_add_cpu(&neh->eh_entries, m); 997 } 998 set_buffer_uptodate(bh); 999 unlock_buffer(bh); 1000 1001 err = ext4_handle_dirty_metadata(handle, inode, bh); 1002 if (err) 1003 goto cleanup; 1004 brelse(bh); 1005 bh = NULL; 1006 1007 /* correct old index */ 1008 if (m) { 1009 err = ext4_ext_get_access(handle, inode, path + i); 1010 if (err) 1011 goto cleanup; 1012 le16_add_cpu(&path[i].p_hdr->eh_entries, -m); 1013 err = ext4_ext_dirty(handle, inode, path + i); 1014 if (err) 1015 goto cleanup; 1016 } 1017 1018 i--; 1019 } 1020 1021 /* insert new index */ 1022 err = ext4_ext_insert_index(handle, inode, path + at, 1023 le32_to_cpu(border), newblock); 1024 1025 cleanup: 1026 if (bh) { 1027 if (buffer_locked(bh)) 1028 unlock_buffer(bh); 1029 brelse(bh); 1030 } 1031 1032 if (err) { 1033 /* free all allocated blocks in error case */ 1034 for (i = 0; i < depth; i++) { 1035 if (!ablocks[i]) 1036 continue; 1037 ext4_free_blocks(handle, inode, 0, ablocks[i], 1, 1038 EXT4_FREE_BLOCKS_METADATA); 1039 } 1040 } 1041 kfree(ablocks); 1042 1043 return err; 1044 } 1045 1046 /* 1047 * ext4_ext_grow_indepth: 1048 * implements tree growing procedure: 1049 * - allocates new block 1050 * - moves top-level data (index block or leaf) into the new block 1051 * - initializes new top-level, creating index that points to the 1052 * just created block 1053 */ 1054 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, 1055 struct ext4_ext_path *path, 1056 struct ext4_extent *newext) 1057 { 1058 struct ext4_ext_path *curp = path; 1059 struct ext4_extent_header *neh; 1060 struct buffer_head *bh; 1061 ext4_fsblk_t newblock; 1062 int err = 0; 1063 1064 newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err); 1065 if (newblock == 0) 1066 return err; 1067 1068 bh = sb_getblk(inode->i_sb, newblock); 1069 if (!bh) { 1070 err = -EIO; 1071 ext4_std_error(inode->i_sb, err); 1072 return err; 1073 } 1074 lock_buffer(bh); 1075 1076 err = ext4_journal_get_create_access(handle, bh); 1077 if (err) { 1078 unlock_buffer(bh); 1079 goto out; 1080 } 1081 1082 /* move top-level index/leaf into new block */ 1083 memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data)); 1084 1085 /* set size of new block */ 1086 neh = ext_block_hdr(bh); 1087 /* old root could have indexes or leaves 1088 * so calculate e_max right way */ 1089 if (ext_depth(inode)) 1090 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); 1091 else 1092 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); 1093 neh->eh_magic = EXT4_EXT_MAGIC; 1094 set_buffer_uptodate(bh); 1095 unlock_buffer(bh); 1096 1097 err = ext4_handle_dirty_metadata(handle, inode, bh); 1098 if (err) 1099 goto out; 1100 1101 /* create index in new top-level index: num,max,pointer */ 1102 err = ext4_ext_get_access(handle, inode, curp); 1103 if (err) 1104 goto out; 1105 1106 curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; 1107 curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); 1108 curp->p_hdr->eh_entries = cpu_to_le16(1); 1109 curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); 1110 1111 if (path[0].p_hdr->eh_depth) 1112 curp->p_idx->ei_block = 1113 EXT_FIRST_INDEX(path[0].p_hdr)->ei_block; 1114 else 1115 curp->p_idx->ei_block = 1116 EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; 1117 ext4_idx_store_pblock(curp->p_idx, newblock); 1118 1119 neh = ext_inode_hdr(inode); 1120 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", 1121 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), 1122 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1123 ext4_idx_pblock(EXT_FIRST_INDEX(neh))); 1124 1125 neh->eh_depth = cpu_to_le16(path->p_depth + 1); 1126 err = ext4_ext_dirty(handle, inode, curp); 1127 out: 1128 brelse(bh); 1129 1130 return err; 1131 } 1132 1133 /* 1134 * ext4_ext_create_new_leaf: 1135 * finds empty index and adds new leaf. 1136 * if no free index is found, then it requests in-depth growing. 1137 */ 1138 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, 1139 struct ext4_ext_path *path, 1140 struct ext4_extent *newext) 1141 { 1142 struct ext4_ext_path *curp; 1143 int depth, i, err = 0; 1144 1145 repeat: 1146 i = depth = ext_depth(inode); 1147 1148 /* walk up to the tree and look for free index entry */ 1149 curp = path + depth; 1150 while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { 1151 i--; 1152 curp--; 1153 } 1154 1155 /* we use already allocated block for index block, 1156 * so subsequent data blocks should be contiguous */ 1157 if (EXT_HAS_FREE_INDEX(curp)) { 1158 /* if we found index with free entry, then use that 1159 * entry: create all needed subtree and add new leaf */ 1160 err = ext4_ext_split(handle, inode, path, newext, i); 1161 if (err) 1162 goto out; 1163 1164 /* refill path */ 1165 ext4_ext_drop_refs(path); 1166 path = ext4_ext_find_extent(inode, 1167 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1168 path); 1169 if (IS_ERR(path)) 1170 err = PTR_ERR(path); 1171 } else { 1172 /* tree is full, time to grow in depth */ 1173 err = ext4_ext_grow_indepth(handle, inode, path, newext); 1174 if (err) 1175 goto out; 1176 1177 /* refill path */ 1178 ext4_ext_drop_refs(path); 1179 path = ext4_ext_find_extent(inode, 1180 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1181 path); 1182 if (IS_ERR(path)) { 1183 err = PTR_ERR(path); 1184 goto out; 1185 } 1186 1187 /* 1188 * only first (depth 0 -> 1) produces free space; 1189 * in all other cases we have to split the grown tree 1190 */ 1191 depth = ext_depth(inode); 1192 if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { 1193 /* now we need to split */ 1194 goto repeat; 1195 } 1196 } 1197 1198 out: 1199 return err; 1200 } 1201 1202 /* 1203 * search the closest allocated block to the left for *logical 1204 * and returns it at @logical + it's physical address at @phys 1205 * if *logical is the smallest allocated block, the function 1206 * returns 0 at @phys 1207 * return value contains 0 (success) or error code 1208 */ 1209 static int ext4_ext_search_left(struct inode *inode, 1210 struct ext4_ext_path *path, 1211 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1212 { 1213 struct ext4_extent_idx *ix; 1214 struct ext4_extent *ex; 1215 int depth, ee_len; 1216 1217 if (unlikely(path == NULL)) { 1218 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); 1219 return -EIO; 1220 } 1221 depth = path->p_depth; 1222 *phys = 0; 1223 1224 if (depth == 0 && path->p_ext == NULL) 1225 return 0; 1226 1227 /* usually extent in the path covers blocks smaller 1228 * then *logical, but it can be that extent is the 1229 * first one in the file */ 1230 1231 ex = path[depth].p_ext; 1232 ee_len = ext4_ext_get_actual_len(ex); 1233 if (*logical < le32_to_cpu(ex->ee_block)) { 1234 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { 1235 EXT4_ERROR_INODE(inode, 1236 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!", 1237 *logical, le32_to_cpu(ex->ee_block)); 1238 return -EIO; 1239 } 1240 while (--depth >= 0) { 1241 ix = path[depth].p_idx; 1242 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { 1243 EXT4_ERROR_INODE(inode, 1244 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", 1245 ix != NULL ? ix->ei_block : 0, 1246 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? 1247 EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0, 1248 depth); 1249 return -EIO; 1250 } 1251 } 1252 return 0; 1253 } 1254 1255 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { 1256 EXT4_ERROR_INODE(inode, 1257 "logical %d < ee_block %d + ee_len %d!", 1258 *logical, le32_to_cpu(ex->ee_block), ee_len); 1259 return -EIO; 1260 } 1261 1262 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; 1263 *phys = ext4_ext_pblock(ex) + ee_len - 1; 1264 return 0; 1265 } 1266 1267 /* 1268 * search the closest allocated block to the right for *logical 1269 * and returns it at @logical + it's physical address at @phys 1270 * if *logical is the smallest allocated block, the function 1271 * returns 0 at @phys 1272 * return value contains 0 (success) or error code 1273 */ 1274 static int ext4_ext_search_right(struct inode *inode, 1275 struct ext4_ext_path *path, 1276 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1277 { 1278 struct buffer_head *bh = NULL; 1279 struct ext4_extent_header *eh; 1280 struct ext4_extent_idx *ix; 1281 struct ext4_extent *ex; 1282 ext4_fsblk_t block; 1283 int depth; /* Note, NOT eh_depth; depth from top of tree */ 1284 int ee_len; 1285 1286 if (unlikely(path == NULL)) { 1287 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); 1288 return -EIO; 1289 } 1290 depth = path->p_depth; 1291 *phys = 0; 1292 1293 if (depth == 0 && path->p_ext == NULL) 1294 return 0; 1295 1296 /* usually extent in the path covers blocks smaller 1297 * then *logical, but it can be that extent is the 1298 * first one in the file */ 1299 1300 ex = path[depth].p_ext; 1301 ee_len = ext4_ext_get_actual_len(ex); 1302 if (*logical < le32_to_cpu(ex->ee_block)) { 1303 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { 1304 EXT4_ERROR_INODE(inode, 1305 "first_extent(path[%d].p_hdr) != ex", 1306 depth); 1307 return -EIO; 1308 } 1309 while (--depth >= 0) { 1310 ix = path[depth].p_idx; 1311 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { 1312 EXT4_ERROR_INODE(inode, 1313 "ix != EXT_FIRST_INDEX *logical %d!", 1314 *logical); 1315 return -EIO; 1316 } 1317 } 1318 *logical = le32_to_cpu(ex->ee_block); 1319 *phys = ext4_ext_pblock(ex); 1320 return 0; 1321 } 1322 1323 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { 1324 EXT4_ERROR_INODE(inode, 1325 "logical %d < ee_block %d + ee_len %d!", 1326 *logical, le32_to_cpu(ex->ee_block), ee_len); 1327 return -EIO; 1328 } 1329 1330 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { 1331 /* next allocated block in this leaf */ 1332 ex++; 1333 *logical = le32_to_cpu(ex->ee_block); 1334 *phys = ext4_ext_pblock(ex); 1335 return 0; 1336 } 1337 1338 /* go up and search for index to the right */ 1339 while (--depth >= 0) { 1340 ix = path[depth].p_idx; 1341 if (ix != EXT_LAST_INDEX(path[depth].p_hdr)) 1342 goto got_index; 1343 } 1344 1345 /* we've gone up to the root and found no index to the right */ 1346 return 0; 1347 1348 got_index: 1349 /* we've found index to the right, let's 1350 * follow it and find the closest allocated 1351 * block to the right */ 1352 ix++; 1353 block = ext4_idx_pblock(ix); 1354 while (++depth < path->p_depth) { 1355 bh = sb_bread(inode->i_sb, block); 1356 if (bh == NULL) 1357 return -EIO; 1358 eh = ext_block_hdr(bh); 1359 /* subtract from p_depth to get proper eh_depth */ 1360 if (ext4_ext_check(inode, eh, path->p_depth - depth)) { 1361 put_bh(bh); 1362 return -EIO; 1363 } 1364 ix = EXT_FIRST_INDEX(eh); 1365 block = ext4_idx_pblock(ix); 1366 put_bh(bh); 1367 } 1368 1369 bh = sb_bread(inode->i_sb, block); 1370 if (bh == NULL) 1371 return -EIO; 1372 eh = ext_block_hdr(bh); 1373 if (ext4_ext_check(inode, eh, path->p_depth - depth)) { 1374 put_bh(bh); 1375 return -EIO; 1376 } 1377 ex = EXT_FIRST_EXTENT(eh); 1378 *logical = le32_to_cpu(ex->ee_block); 1379 *phys = ext4_ext_pblock(ex); 1380 put_bh(bh); 1381 return 0; 1382 } 1383 1384 /* 1385 * ext4_ext_next_allocated_block: 1386 * returns allocated block in subsequent extent or EXT_MAX_BLOCK. 1387 * NOTE: it considers block number from index entry as 1388 * allocated block. Thus, index entries have to be consistent 1389 * with leaves. 1390 */ 1391 static ext4_lblk_t 1392 ext4_ext_next_allocated_block(struct ext4_ext_path *path) 1393 { 1394 int depth; 1395 1396 BUG_ON(path == NULL); 1397 depth = path->p_depth; 1398 1399 if (depth == 0 && path->p_ext == NULL) 1400 return EXT_MAX_BLOCK; 1401 1402 while (depth >= 0) { 1403 if (depth == path->p_depth) { 1404 /* leaf */ 1405 if (path[depth].p_ext != 1406 EXT_LAST_EXTENT(path[depth].p_hdr)) 1407 return le32_to_cpu(path[depth].p_ext[1].ee_block); 1408 } else { 1409 /* index */ 1410 if (path[depth].p_idx != 1411 EXT_LAST_INDEX(path[depth].p_hdr)) 1412 return le32_to_cpu(path[depth].p_idx[1].ei_block); 1413 } 1414 depth--; 1415 } 1416 1417 return EXT_MAX_BLOCK; 1418 } 1419 1420 /* 1421 * ext4_ext_next_leaf_block: 1422 * returns first allocated block from next leaf or EXT_MAX_BLOCK 1423 */ 1424 static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, 1425 struct ext4_ext_path *path) 1426 { 1427 int depth; 1428 1429 BUG_ON(path == NULL); 1430 depth = path->p_depth; 1431 1432 /* zero-tree has no leaf blocks at all */ 1433 if (depth == 0) 1434 return EXT_MAX_BLOCK; 1435 1436 /* go to index block */ 1437 depth--; 1438 1439 while (depth >= 0) { 1440 if (path[depth].p_idx != 1441 EXT_LAST_INDEX(path[depth].p_hdr)) 1442 return (ext4_lblk_t) 1443 le32_to_cpu(path[depth].p_idx[1].ei_block); 1444 depth--; 1445 } 1446 1447 return EXT_MAX_BLOCK; 1448 } 1449 1450 /* 1451 * ext4_ext_correct_indexes: 1452 * if leaf gets modified and modified extent is first in the leaf, 1453 * then we have to correct all indexes above. 1454 * TODO: do we need to correct tree in all cases? 1455 */ 1456 static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, 1457 struct ext4_ext_path *path) 1458 { 1459 struct ext4_extent_header *eh; 1460 int depth = ext_depth(inode); 1461 struct ext4_extent *ex; 1462 __le32 border; 1463 int k, err = 0; 1464 1465 eh = path[depth].p_hdr; 1466 ex = path[depth].p_ext; 1467 1468 if (unlikely(ex == NULL || eh == NULL)) { 1469 EXT4_ERROR_INODE(inode, 1470 "ex %p == NULL or eh %p == NULL", ex, eh); 1471 return -EIO; 1472 } 1473 1474 if (depth == 0) { 1475 /* there is no tree at all */ 1476 return 0; 1477 } 1478 1479 if (ex != EXT_FIRST_EXTENT(eh)) { 1480 /* we correct tree if first leaf got modified only */ 1481 return 0; 1482 } 1483 1484 /* 1485 * TODO: we need correction if border is smaller than current one 1486 */ 1487 k = depth - 1; 1488 border = path[depth].p_ext->ee_block; 1489 err = ext4_ext_get_access(handle, inode, path + k); 1490 if (err) 1491 return err; 1492 path[k].p_idx->ei_block = border; 1493 err = ext4_ext_dirty(handle, inode, path + k); 1494 if (err) 1495 return err; 1496 1497 while (k--) { 1498 /* change all left-side indexes */ 1499 if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) 1500 break; 1501 err = ext4_ext_get_access(handle, inode, path + k); 1502 if (err) 1503 break; 1504 path[k].p_idx->ei_block = border; 1505 err = ext4_ext_dirty(handle, inode, path + k); 1506 if (err) 1507 break; 1508 } 1509 1510 return err; 1511 } 1512 1513 int 1514 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, 1515 struct ext4_extent *ex2) 1516 { 1517 unsigned short ext1_ee_len, ext2_ee_len, max_len; 1518 1519 /* 1520 * Make sure that either both extents are uninitialized, or 1521 * both are _not_. 1522 */ 1523 if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) 1524 return 0; 1525 1526 if (ext4_ext_is_uninitialized(ex1)) 1527 max_len = EXT_UNINIT_MAX_LEN; 1528 else 1529 max_len = EXT_INIT_MAX_LEN; 1530 1531 ext1_ee_len = ext4_ext_get_actual_len(ex1); 1532 ext2_ee_len = ext4_ext_get_actual_len(ex2); 1533 1534 if (le32_to_cpu(ex1->ee_block) + ext1_ee_len != 1535 le32_to_cpu(ex2->ee_block)) 1536 return 0; 1537 1538 /* 1539 * To allow future support for preallocated extents to be added 1540 * as an RO_COMPAT feature, refuse to merge to extents if 1541 * this can result in the top bit of ee_len being set. 1542 */ 1543 if (ext1_ee_len + ext2_ee_len > max_len) 1544 return 0; 1545 #ifdef AGGRESSIVE_TEST 1546 if (ext1_ee_len >= 4) 1547 return 0; 1548 #endif 1549 1550 if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2)) 1551 return 1; 1552 return 0; 1553 } 1554 1555 /* 1556 * This function tries to merge the "ex" extent to the next extent in the tree. 1557 * It always tries to merge towards right. If you want to merge towards 1558 * left, pass "ex - 1" as argument instead of "ex". 1559 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns 1560 * 1 if they got merged. 1561 */ 1562 static int ext4_ext_try_to_merge(struct inode *inode, 1563 struct ext4_ext_path *path, 1564 struct ext4_extent *ex) 1565 { 1566 struct ext4_extent_header *eh; 1567 unsigned int depth, len; 1568 int merge_done = 0; 1569 int uninitialized = 0; 1570 1571 depth = ext_depth(inode); 1572 BUG_ON(path[depth].p_hdr == NULL); 1573 eh = path[depth].p_hdr; 1574 1575 while (ex < EXT_LAST_EXTENT(eh)) { 1576 if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) 1577 break; 1578 /* merge with next extent! */ 1579 if (ext4_ext_is_uninitialized(ex)) 1580 uninitialized = 1; 1581 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1582 + ext4_ext_get_actual_len(ex + 1)); 1583 if (uninitialized) 1584 ext4_ext_mark_uninitialized(ex); 1585 1586 if (ex + 1 < EXT_LAST_EXTENT(eh)) { 1587 len = (EXT_LAST_EXTENT(eh) - ex - 1) 1588 * sizeof(struct ext4_extent); 1589 memmove(ex + 1, ex + 2, len); 1590 } 1591 le16_add_cpu(&eh->eh_entries, -1); 1592 merge_done = 1; 1593 WARN_ON(eh->eh_entries == 0); 1594 if (!eh->eh_entries) 1595 EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!"); 1596 } 1597 1598 return merge_done; 1599 } 1600 1601 /* 1602 * check if a portion of the "newext" extent overlaps with an 1603 * existing extent. 1604 * 1605 * If there is an overlap discovered, it updates the length of the newext 1606 * such that there will be no overlap, and then returns 1. 1607 * If there is no overlap found, it returns 0. 1608 */ 1609 static unsigned int ext4_ext_check_overlap(struct inode *inode, 1610 struct ext4_extent *newext, 1611 struct ext4_ext_path *path) 1612 { 1613 ext4_lblk_t b1, b2; 1614 unsigned int depth, len1; 1615 unsigned int ret = 0; 1616 1617 b1 = le32_to_cpu(newext->ee_block); 1618 len1 = ext4_ext_get_actual_len(newext); 1619 depth = ext_depth(inode); 1620 if (!path[depth].p_ext) 1621 goto out; 1622 b2 = le32_to_cpu(path[depth].p_ext->ee_block); 1623 1624 /* 1625 * get the next allocated block if the extent in the path 1626 * is before the requested block(s) 1627 */ 1628 if (b2 < b1) { 1629 b2 = ext4_ext_next_allocated_block(path); 1630 if (b2 == EXT_MAX_BLOCK) 1631 goto out; 1632 } 1633 1634 /* check for wrap through zero on extent logical start block*/ 1635 if (b1 + len1 < b1) { 1636 len1 = EXT_MAX_BLOCK - b1; 1637 newext->ee_len = cpu_to_le16(len1); 1638 ret = 1; 1639 } 1640 1641 /* check for overlap */ 1642 if (b1 + len1 > b2) { 1643 newext->ee_len = cpu_to_le16(b2 - b1); 1644 ret = 1; 1645 } 1646 out: 1647 return ret; 1648 } 1649 1650 /* 1651 * ext4_ext_insert_extent: 1652 * tries to merge requsted extent into the existing extent or 1653 * inserts requested extent as new one into the tree, 1654 * creating new leaf in the no-space case. 1655 */ 1656 int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, 1657 struct ext4_ext_path *path, 1658 struct ext4_extent *newext, int flag) 1659 { 1660 struct ext4_extent_header *eh; 1661 struct ext4_extent *ex, *fex; 1662 struct ext4_extent *nearex; /* nearest extent */ 1663 struct ext4_ext_path *npath = NULL; 1664 int depth, len, err; 1665 ext4_lblk_t next; 1666 unsigned uninitialized = 0; 1667 1668 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { 1669 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); 1670 return -EIO; 1671 } 1672 depth = ext_depth(inode); 1673 ex = path[depth].p_ext; 1674 if (unlikely(path[depth].p_hdr == NULL)) { 1675 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); 1676 return -EIO; 1677 } 1678 1679 /* try to insert block into found extent and return */ 1680 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) 1681 && ext4_can_extents_be_merged(inode, ex, newext)) { 1682 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", 1683 ext4_ext_is_uninitialized(newext), 1684 ext4_ext_get_actual_len(newext), 1685 le32_to_cpu(ex->ee_block), 1686 ext4_ext_is_uninitialized(ex), 1687 ext4_ext_get_actual_len(ex), 1688 ext4_ext_pblock(ex)); 1689 err = ext4_ext_get_access(handle, inode, path + depth); 1690 if (err) 1691 return err; 1692 1693 /* 1694 * ext4_can_extents_be_merged should have checked that either 1695 * both extents are uninitialized, or both aren't. Thus we 1696 * need to check only one of them here. 1697 */ 1698 if (ext4_ext_is_uninitialized(ex)) 1699 uninitialized = 1; 1700 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1701 + ext4_ext_get_actual_len(newext)); 1702 if (uninitialized) 1703 ext4_ext_mark_uninitialized(ex); 1704 eh = path[depth].p_hdr; 1705 nearex = ex; 1706 goto merge; 1707 } 1708 1709 repeat: 1710 depth = ext_depth(inode); 1711 eh = path[depth].p_hdr; 1712 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) 1713 goto has_space; 1714 1715 /* probably next leaf has space for us? */ 1716 fex = EXT_LAST_EXTENT(eh); 1717 next = ext4_ext_next_leaf_block(inode, path); 1718 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) 1719 && next != EXT_MAX_BLOCK) { 1720 ext_debug("next leaf block - %d\n", next); 1721 BUG_ON(npath != NULL); 1722 npath = ext4_ext_find_extent(inode, next, NULL); 1723 if (IS_ERR(npath)) 1724 return PTR_ERR(npath); 1725 BUG_ON(npath->p_depth != path->p_depth); 1726 eh = npath[depth].p_hdr; 1727 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { 1728 ext_debug("next leaf isnt full(%d)\n", 1729 le16_to_cpu(eh->eh_entries)); 1730 path = npath; 1731 goto repeat; 1732 } 1733 ext_debug("next leaf has no free space(%d,%d)\n", 1734 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 1735 } 1736 1737 /* 1738 * There is no free space in the found leaf. 1739 * We're gonna add a new leaf in the tree. 1740 */ 1741 err = ext4_ext_create_new_leaf(handle, inode, path, newext); 1742 if (err) 1743 goto cleanup; 1744 depth = ext_depth(inode); 1745 eh = path[depth].p_hdr; 1746 1747 has_space: 1748 nearex = path[depth].p_ext; 1749 1750 err = ext4_ext_get_access(handle, inode, path + depth); 1751 if (err) 1752 goto cleanup; 1753 1754 if (!nearex) { 1755 /* there is no extent in this leaf, create first one */ 1756 ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", 1757 le32_to_cpu(newext->ee_block), 1758 ext4_ext_pblock(newext), 1759 ext4_ext_is_uninitialized(newext), 1760 ext4_ext_get_actual_len(newext)); 1761 path[depth].p_ext = EXT_FIRST_EXTENT(eh); 1762 } else if (le32_to_cpu(newext->ee_block) 1763 > le32_to_cpu(nearex->ee_block)) { 1764 /* BUG_ON(newext->ee_block == nearex->ee_block); */ 1765 if (nearex != EXT_LAST_EXTENT(eh)) { 1766 len = EXT_MAX_EXTENT(eh) - nearex; 1767 len = (len - 1) * sizeof(struct ext4_extent); 1768 len = len < 0 ? 0 : len; 1769 ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " 1770 "move %d from 0x%p to 0x%p\n", 1771 le32_to_cpu(newext->ee_block), 1772 ext4_ext_pblock(newext), 1773 ext4_ext_is_uninitialized(newext), 1774 ext4_ext_get_actual_len(newext), 1775 nearex, len, nearex + 1, nearex + 2); 1776 memmove(nearex + 2, nearex + 1, len); 1777 } 1778 path[depth].p_ext = nearex + 1; 1779 } else { 1780 BUG_ON(newext->ee_block == nearex->ee_block); 1781 len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); 1782 len = len < 0 ? 0 : len; 1783 ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " 1784 "move %d from 0x%p to 0x%p\n", 1785 le32_to_cpu(newext->ee_block), 1786 ext4_ext_pblock(newext), 1787 ext4_ext_is_uninitialized(newext), 1788 ext4_ext_get_actual_len(newext), 1789 nearex, len, nearex + 1, nearex + 2); 1790 memmove(nearex + 1, nearex, len); 1791 path[depth].p_ext = nearex; 1792 } 1793 1794 le16_add_cpu(&eh->eh_entries, 1); 1795 nearex = path[depth].p_ext; 1796 nearex->ee_block = newext->ee_block; 1797 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); 1798 nearex->ee_len = newext->ee_len; 1799 1800 merge: 1801 /* try to merge extents to the right */ 1802 if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) 1803 ext4_ext_try_to_merge(inode, path, nearex); 1804 1805 /* try to merge extents to the left */ 1806 1807 /* time to correct all indexes above */ 1808 err = ext4_ext_correct_indexes(handle, inode, path); 1809 if (err) 1810 goto cleanup; 1811 1812 err = ext4_ext_dirty(handle, inode, path + depth); 1813 1814 cleanup: 1815 if (npath) { 1816 ext4_ext_drop_refs(npath); 1817 kfree(npath); 1818 } 1819 ext4_ext_invalidate_cache(inode); 1820 return err; 1821 } 1822 1823 static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, 1824 ext4_lblk_t num, ext_prepare_callback func, 1825 void *cbdata) 1826 { 1827 struct ext4_ext_path *path = NULL; 1828 struct ext4_ext_cache cbex; 1829 struct ext4_extent *ex; 1830 ext4_lblk_t next, start = 0, end = 0; 1831 ext4_lblk_t last = block + num; 1832 int depth, exists, err = 0; 1833 1834 BUG_ON(func == NULL); 1835 BUG_ON(inode == NULL); 1836 1837 while (block < last && block != EXT_MAX_BLOCK) { 1838 num = last - block; 1839 /* find extent for this block */ 1840 down_read(&EXT4_I(inode)->i_data_sem); 1841 path = ext4_ext_find_extent(inode, block, path); 1842 up_read(&EXT4_I(inode)->i_data_sem); 1843 if (IS_ERR(path)) { 1844 err = PTR_ERR(path); 1845 path = NULL; 1846 break; 1847 } 1848 1849 depth = ext_depth(inode); 1850 if (unlikely(path[depth].p_hdr == NULL)) { 1851 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); 1852 err = -EIO; 1853 break; 1854 } 1855 ex = path[depth].p_ext; 1856 next = ext4_ext_next_allocated_block(path); 1857 1858 exists = 0; 1859 if (!ex) { 1860 /* there is no extent yet, so try to allocate 1861 * all requested space */ 1862 start = block; 1863 end = block + num; 1864 } else if (le32_to_cpu(ex->ee_block) > block) { 1865 /* need to allocate space before found extent */ 1866 start = block; 1867 end = le32_to_cpu(ex->ee_block); 1868 if (block + num < end) 1869 end = block + num; 1870 } else if (block >= le32_to_cpu(ex->ee_block) 1871 + ext4_ext_get_actual_len(ex)) { 1872 /* need to allocate space after found extent */ 1873 start = block; 1874 end = block + num; 1875 if (end >= next) 1876 end = next; 1877 } else if (block >= le32_to_cpu(ex->ee_block)) { 1878 /* 1879 * some part of requested space is covered 1880 * by found extent 1881 */ 1882 start = block; 1883 end = le32_to_cpu(ex->ee_block) 1884 + ext4_ext_get_actual_len(ex); 1885 if (block + num < end) 1886 end = block + num; 1887 exists = 1; 1888 } else { 1889 BUG(); 1890 } 1891 BUG_ON(end <= start); 1892 1893 if (!exists) { 1894 cbex.ec_block = start; 1895 cbex.ec_len = end - start; 1896 cbex.ec_start = 0; 1897 } else { 1898 cbex.ec_block = le32_to_cpu(ex->ee_block); 1899 cbex.ec_len = ext4_ext_get_actual_len(ex); 1900 cbex.ec_start = ext4_ext_pblock(ex); 1901 } 1902 1903 if (unlikely(cbex.ec_len == 0)) { 1904 EXT4_ERROR_INODE(inode, "cbex.ec_len == 0"); 1905 err = -EIO; 1906 break; 1907 } 1908 err = func(inode, path, &cbex, ex, cbdata); 1909 ext4_ext_drop_refs(path); 1910 1911 if (err < 0) 1912 break; 1913 1914 if (err == EXT_REPEAT) 1915 continue; 1916 else if (err == EXT_BREAK) { 1917 err = 0; 1918 break; 1919 } 1920 1921 if (ext_depth(inode) != depth) { 1922 /* depth was changed. we have to realloc path */ 1923 kfree(path); 1924 path = NULL; 1925 } 1926 1927 block = cbex.ec_block + cbex.ec_len; 1928 } 1929 1930 if (path) { 1931 ext4_ext_drop_refs(path); 1932 kfree(path); 1933 } 1934 1935 return err; 1936 } 1937 1938 static void 1939 ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, 1940 __u32 len, ext4_fsblk_t start) 1941 { 1942 struct ext4_ext_cache *cex; 1943 BUG_ON(len == 0); 1944 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1945 cex = &EXT4_I(inode)->i_cached_extent; 1946 cex->ec_block = block; 1947 cex->ec_len = len; 1948 cex->ec_start = start; 1949 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1950 } 1951 1952 /* 1953 * ext4_ext_put_gap_in_cache: 1954 * calculate boundaries of the gap that the requested block fits into 1955 * and cache this gap 1956 */ 1957 static void 1958 ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, 1959 ext4_lblk_t block) 1960 { 1961 int depth = ext_depth(inode); 1962 unsigned long len; 1963 ext4_lblk_t lblock; 1964 struct ext4_extent *ex; 1965 1966 ex = path[depth].p_ext; 1967 if (ex == NULL) { 1968 /* there is no extent yet, so gap is [0;-] */ 1969 lblock = 0; 1970 len = EXT_MAX_BLOCK; 1971 ext_debug("cache gap(whole file):"); 1972 } else if (block < le32_to_cpu(ex->ee_block)) { 1973 lblock = block; 1974 len = le32_to_cpu(ex->ee_block) - block; 1975 ext_debug("cache gap(before): %u [%u:%u]", 1976 block, 1977 le32_to_cpu(ex->ee_block), 1978 ext4_ext_get_actual_len(ex)); 1979 } else if (block >= le32_to_cpu(ex->ee_block) 1980 + ext4_ext_get_actual_len(ex)) { 1981 ext4_lblk_t next; 1982 lblock = le32_to_cpu(ex->ee_block) 1983 + ext4_ext_get_actual_len(ex); 1984 1985 next = ext4_ext_next_allocated_block(path); 1986 ext_debug("cache gap(after): [%u:%u] %u", 1987 le32_to_cpu(ex->ee_block), 1988 ext4_ext_get_actual_len(ex), 1989 block); 1990 BUG_ON(next == lblock); 1991 len = next - lblock; 1992 } else { 1993 lblock = len = 0; 1994 BUG(); 1995 } 1996 1997 ext_debug(" -> %u:%lu\n", lblock, len); 1998 ext4_ext_put_in_cache(inode, lblock, len, 0); 1999 } 2000 2001 /* 2002 * Return 0 if cache is invalid; 1 if the cache is valid 2003 */ 2004 static int 2005 ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, 2006 struct ext4_extent *ex) 2007 { 2008 struct ext4_ext_cache *cex; 2009 int ret = 0; 2010 2011 /* 2012 * We borrow i_block_reservation_lock to protect i_cached_extent 2013 */ 2014 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 2015 cex = &EXT4_I(inode)->i_cached_extent; 2016 2017 /* has cache valid data? */ 2018 if (cex->ec_len == 0) 2019 goto errout; 2020 2021 if (in_range(block, cex->ec_block, cex->ec_len)) { 2022 ex->ee_block = cpu_to_le32(cex->ec_block); 2023 ext4_ext_store_pblock(ex, cex->ec_start); 2024 ex->ee_len = cpu_to_le16(cex->ec_len); 2025 ext_debug("%u cached by %u:%u:%llu\n", 2026 block, 2027 cex->ec_block, cex->ec_len, cex->ec_start); 2028 ret = 1; 2029 } 2030 errout: 2031 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 2032 return ret; 2033 } 2034 2035 /* 2036 * ext4_ext_rm_idx: 2037 * removes index from the index block. 2038 * It's used in truncate case only, thus all requests are for 2039 * last index in the block only. 2040 */ 2041 static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, 2042 struct ext4_ext_path *path) 2043 { 2044 int err; 2045 ext4_fsblk_t leaf; 2046 2047 /* free index block */ 2048 path--; 2049 leaf = ext4_idx_pblock(path->p_idx); 2050 if (unlikely(path->p_hdr->eh_entries == 0)) { 2051 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); 2052 return -EIO; 2053 } 2054 err = ext4_ext_get_access(handle, inode, path); 2055 if (err) 2056 return err; 2057 le16_add_cpu(&path->p_hdr->eh_entries, -1); 2058 err = ext4_ext_dirty(handle, inode, path); 2059 if (err) 2060 return err; 2061 ext_debug("index is empty, remove it, free block %llu\n", leaf); 2062 ext4_free_blocks(handle, inode, 0, leaf, 1, 2063 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 2064 return err; 2065 } 2066 2067 /* 2068 * ext4_ext_calc_credits_for_single_extent: 2069 * This routine returns max. credits that needed to insert an extent 2070 * to the extent tree. 2071 * When pass the actual path, the caller should calculate credits 2072 * under i_data_sem. 2073 */ 2074 int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, 2075 struct ext4_ext_path *path) 2076 { 2077 if (path) { 2078 int depth = ext_depth(inode); 2079 int ret = 0; 2080 2081 /* probably there is space in leaf? */ 2082 if (le16_to_cpu(path[depth].p_hdr->eh_entries) 2083 < le16_to_cpu(path[depth].p_hdr->eh_max)) { 2084 2085 /* 2086 * There are some space in the leaf tree, no 2087 * need to account for leaf block credit 2088 * 2089 * bitmaps and block group descriptor blocks 2090 * and other metadat blocks still need to be 2091 * accounted. 2092 */ 2093 /* 1 bitmap, 1 block group descriptor */ 2094 ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); 2095 return ret; 2096 } 2097 } 2098 2099 return ext4_chunk_trans_blocks(inode, nrblocks); 2100 } 2101 2102 /* 2103 * How many index/leaf blocks need to change/allocate to modify nrblocks? 2104 * 2105 * if nrblocks are fit in a single extent (chunk flag is 1), then 2106 * in the worse case, each tree level index/leaf need to be changed 2107 * if the tree split due to insert a new extent, then the old tree 2108 * index/leaf need to be updated too 2109 * 2110 * If the nrblocks are discontiguous, they could cause 2111 * the whole tree split more than once, but this is really rare. 2112 */ 2113 int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) 2114 { 2115 int index; 2116 int depth = ext_depth(inode); 2117 2118 if (chunk) 2119 index = depth * 2; 2120 else 2121 index = depth * 3; 2122 2123 return index; 2124 } 2125 2126 static int ext4_remove_blocks(handle_t *handle, struct inode *inode, 2127 struct ext4_extent *ex, 2128 ext4_lblk_t from, ext4_lblk_t to) 2129 { 2130 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2131 int flags = EXT4_FREE_BLOCKS_FORGET; 2132 2133 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2134 flags |= EXT4_FREE_BLOCKS_METADATA; 2135 #ifdef EXTENTS_STATS 2136 { 2137 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2138 spin_lock(&sbi->s_ext_stats_lock); 2139 sbi->s_ext_blocks += ee_len; 2140 sbi->s_ext_extents++; 2141 if (ee_len < sbi->s_ext_min) 2142 sbi->s_ext_min = ee_len; 2143 if (ee_len > sbi->s_ext_max) 2144 sbi->s_ext_max = ee_len; 2145 if (ext_depth(inode) > sbi->s_depth_max) 2146 sbi->s_depth_max = ext_depth(inode); 2147 spin_unlock(&sbi->s_ext_stats_lock); 2148 } 2149 #endif 2150 if (from >= le32_to_cpu(ex->ee_block) 2151 && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { 2152 /* tail removal */ 2153 ext4_lblk_t num; 2154 ext4_fsblk_t start; 2155 2156 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2157 start = ext4_ext_pblock(ex) + ee_len - num; 2158 ext_debug("free last %u blocks starting %llu\n", num, start); 2159 ext4_free_blocks(handle, inode, 0, start, num, flags); 2160 } else if (from == le32_to_cpu(ex->ee_block) 2161 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { 2162 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", 2163 from, to, le32_to_cpu(ex->ee_block), ee_len); 2164 } else { 2165 printk(KERN_INFO "strange request: removal(2) " 2166 "%u-%u from %u:%u\n", 2167 from, to, le32_to_cpu(ex->ee_block), ee_len); 2168 } 2169 return 0; 2170 } 2171 2172 static int 2173 ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, 2174 struct ext4_ext_path *path, ext4_lblk_t start) 2175 { 2176 int err = 0, correct_index = 0; 2177 int depth = ext_depth(inode), credits; 2178 struct ext4_extent_header *eh; 2179 ext4_lblk_t a, b, block; 2180 unsigned num; 2181 ext4_lblk_t ex_ee_block; 2182 unsigned short ex_ee_len; 2183 unsigned uninitialized = 0; 2184 struct ext4_extent *ex; 2185 2186 /* the header must be checked already in ext4_ext_remove_space() */ 2187 ext_debug("truncate since %u in leaf\n", start); 2188 if (!path[depth].p_hdr) 2189 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); 2190 eh = path[depth].p_hdr; 2191 if (unlikely(path[depth].p_hdr == NULL)) { 2192 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); 2193 return -EIO; 2194 } 2195 /* find where to start removing */ 2196 ex = EXT_LAST_EXTENT(eh); 2197 2198 ex_ee_block = le32_to_cpu(ex->ee_block); 2199 ex_ee_len = ext4_ext_get_actual_len(ex); 2200 2201 while (ex >= EXT_FIRST_EXTENT(eh) && 2202 ex_ee_block + ex_ee_len > start) { 2203 2204 if (ext4_ext_is_uninitialized(ex)) 2205 uninitialized = 1; 2206 else 2207 uninitialized = 0; 2208 2209 ext_debug("remove ext %u:[%d]%d\n", ex_ee_block, 2210 uninitialized, ex_ee_len); 2211 path[depth].p_ext = ex; 2212 2213 a = ex_ee_block > start ? ex_ee_block : start; 2214 b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ? 2215 ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK; 2216 2217 ext_debug(" border %u:%u\n", a, b); 2218 2219 if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { 2220 block = 0; 2221 num = 0; 2222 BUG(); 2223 } else if (a != ex_ee_block) { 2224 /* remove tail of the extent */ 2225 block = ex_ee_block; 2226 num = a - block; 2227 } else if (b != ex_ee_block + ex_ee_len - 1) { 2228 /* remove head of the extent */ 2229 block = a; 2230 num = b - a; 2231 /* there is no "make a hole" API yet */ 2232 BUG(); 2233 } else { 2234 /* remove whole extent: excellent! */ 2235 block = ex_ee_block; 2236 num = 0; 2237 BUG_ON(a != ex_ee_block); 2238 BUG_ON(b != ex_ee_block + ex_ee_len - 1); 2239 } 2240 2241 /* 2242 * 3 for leaf, sb, and inode plus 2 (bmap and group 2243 * descriptor) for each block group; assume two block 2244 * groups plus ex_ee_len/blocks_per_block_group for 2245 * the worst case 2246 */ 2247 credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb)); 2248 if (ex == EXT_FIRST_EXTENT(eh)) { 2249 correct_index = 1; 2250 credits += (ext_depth(inode)) + 1; 2251 } 2252 credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); 2253 2254 err = ext4_ext_truncate_extend_restart(handle, inode, credits); 2255 if (err) 2256 goto out; 2257 2258 err = ext4_ext_get_access(handle, inode, path + depth); 2259 if (err) 2260 goto out; 2261 2262 err = ext4_remove_blocks(handle, inode, ex, a, b); 2263 if (err) 2264 goto out; 2265 2266 if (num == 0) { 2267 /* this extent is removed; mark slot entirely unused */ 2268 ext4_ext_store_pblock(ex, 0); 2269 le16_add_cpu(&eh->eh_entries, -1); 2270 } 2271 2272 ex->ee_block = cpu_to_le32(block); 2273 ex->ee_len = cpu_to_le16(num); 2274 /* 2275 * Do not mark uninitialized if all the blocks in the 2276 * extent have been removed. 2277 */ 2278 if (uninitialized && num) 2279 ext4_ext_mark_uninitialized(ex); 2280 2281 err = ext4_ext_dirty(handle, inode, path + depth); 2282 if (err) 2283 goto out; 2284 2285 ext_debug("new extent: %u:%u:%llu\n", block, num, 2286 ext4_ext_pblock(ex)); 2287 ex--; 2288 ex_ee_block = le32_to_cpu(ex->ee_block); 2289 ex_ee_len = ext4_ext_get_actual_len(ex); 2290 } 2291 2292 if (correct_index && eh->eh_entries) 2293 err = ext4_ext_correct_indexes(handle, inode, path); 2294 2295 /* if this leaf is free, then we should 2296 * remove it from index block above */ 2297 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) 2298 err = ext4_ext_rm_idx(handle, inode, path + depth); 2299 2300 out: 2301 return err; 2302 } 2303 2304 /* 2305 * ext4_ext_more_to_rm: 2306 * returns 1 if current index has to be freed (even partial) 2307 */ 2308 static int 2309 ext4_ext_more_to_rm(struct ext4_ext_path *path) 2310 { 2311 BUG_ON(path->p_idx == NULL); 2312 2313 if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) 2314 return 0; 2315 2316 /* 2317 * if truncate on deeper level happened, it wasn't partial, 2318 * so we have to consider current index for truncation 2319 */ 2320 if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block) 2321 return 0; 2322 return 1; 2323 } 2324 2325 static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) 2326 { 2327 struct super_block *sb = inode->i_sb; 2328 int depth = ext_depth(inode); 2329 struct ext4_ext_path *path; 2330 handle_t *handle; 2331 int i, err; 2332 2333 ext_debug("truncate since %u\n", start); 2334 2335 /* probably first extent we're gonna free will be last in block */ 2336 handle = ext4_journal_start(inode, depth + 1); 2337 if (IS_ERR(handle)) 2338 return PTR_ERR(handle); 2339 2340 again: 2341 ext4_ext_invalidate_cache(inode); 2342 2343 /* 2344 * We start scanning from right side, freeing all the blocks 2345 * after i_size and walking into the tree depth-wise. 2346 */ 2347 depth = ext_depth(inode); 2348 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); 2349 if (path == NULL) { 2350 ext4_journal_stop(handle); 2351 return -ENOMEM; 2352 } 2353 path[0].p_depth = depth; 2354 path[0].p_hdr = ext_inode_hdr(inode); 2355 if (ext4_ext_check(inode, path[0].p_hdr, depth)) { 2356 err = -EIO; 2357 goto out; 2358 } 2359 i = err = 0; 2360 2361 while (i >= 0 && err == 0) { 2362 if (i == depth) { 2363 /* this is leaf block */ 2364 err = ext4_ext_rm_leaf(handle, inode, path, start); 2365 /* root level has p_bh == NULL, brelse() eats this */ 2366 brelse(path[i].p_bh); 2367 path[i].p_bh = NULL; 2368 i--; 2369 continue; 2370 } 2371 2372 /* this is index block */ 2373 if (!path[i].p_hdr) { 2374 ext_debug("initialize header\n"); 2375 path[i].p_hdr = ext_block_hdr(path[i].p_bh); 2376 } 2377 2378 if (!path[i].p_idx) { 2379 /* this level hasn't been touched yet */ 2380 path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); 2381 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1; 2382 ext_debug("init index ptr: hdr 0x%p, num %d\n", 2383 path[i].p_hdr, 2384 le16_to_cpu(path[i].p_hdr->eh_entries)); 2385 } else { 2386 /* we were already here, see at next index */ 2387 path[i].p_idx--; 2388 } 2389 2390 ext_debug("level %d - index, first 0x%p, cur 0x%p\n", 2391 i, EXT_FIRST_INDEX(path[i].p_hdr), 2392 path[i].p_idx); 2393 if (ext4_ext_more_to_rm(path + i)) { 2394 struct buffer_head *bh; 2395 /* go to the next level */ 2396 ext_debug("move to level %d (block %llu)\n", 2397 i + 1, ext4_idx_pblock(path[i].p_idx)); 2398 memset(path + i + 1, 0, sizeof(*path)); 2399 bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx)); 2400 if (!bh) { 2401 /* should we reset i_size? */ 2402 err = -EIO; 2403 break; 2404 } 2405 if (WARN_ON(i + 1 > depth)) { 2406 err = -EIO; 2407 break; 2408 } 2409 if (ext4_ext_check(inode, ext_block_hdr(bh), 2410 depth - i - 1)) { 2411 err = -EIO; 2412 break; 2413 } 2414 path[i + 1].p_bh = bh; 2415 2416 /* save actual number of indexes since this 2417 * number is changed at the next iteration */ 2418 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries); 2419 i++; 2420 } else { 2421 /* we finished processing this index, go up */ 2422 if (path[i].p_hdr->eh_entries == 0 && i > 0) { 2423 /* index is empty, remove it; 2424 * handle must be already prepared by the 2425 * truncatei_leaf() */ 2426 err = ext4_ext_rm_idx(handle, inode, path + i); 2427 } 2428 /* root level has p_bh == NULL, brelse() eats this */ 2429 brelse(path[i].p_bh); 2430 path[i].p_bh = NULL; 2431 i--; 2432 ext_debug("return to level %d\n", i); 2433 } 2434 } 2435 2436 /* TODO: flexible tree reduction should be here */ 2437 if (path->p_hdr->eh_entries == 0) { 2438 /* 2439 * truncate to zero freed all the tree, 2440 * so we need to correct eh_depth 2441 */ 2442 err = ext4_ext_get_access(handle, inode, path); 2443 if (err == 0) { 2444 ext_inode_hdr(inode)->eh_depth = 0; 2445 ext_inode_hdr(inode)->eh_max = 2446 cpu_to_le16(ext4_ext_space_root(inode, 0)); 2447 err = ext4_ext_dirty(handle, inode, path); 2448 } 2449 } 2450 out: 2451 ext4_ext_drop_refs(path); 2452 kfree(path); 2453 if (err == -EAGAIN) 2454 goto again; 2455 ext4_journal_stop(handle); 2456 2457 return err; 2458 } 2459 2460 /* 2461 * called at mount time 2462 */ 2463 void ext4_ext_init(struct super_block *sb) 2464 { 2465 /* 2466 * possible initialization would be here 2467 */ 2468 2469 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { 2470 #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS) 2471 printk(KERN_INFO "EXT4-fs: file extents enabled"); 2472 #ifdef AGGRESSIVE_TEST 2473 printk(", aggressive tests"); 2474 #endif 2475 #ifdef CHECK_BINSEARCH 2476 printk(", check binsearch"); 2477 #endif 2478 #ifdef EXTENTS_STATS 2479 printk(", stats"); 2480 #endif 2481 printk("\n"); 2482 #endif 2483 #ifdef EXTENTS_STATS 2484 spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock); 2485 EXT4_SB(sb)->s_ext_min = 1 << 30; 2486 EXT4_SB(sb)->s_ext_max = 0; 2487 #endif 2488 } 2489 } 2490 2491 /* 2492 * called at umount time 2493 */ 2494 void ext4_ext_release(struct super_block *sb) 2495 { 2496 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) 2497 return; 2498 2499 #ifdef EXTENTS_STATS 2500 if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) { 2501 struct ext4_sb_info *sbi = EXT4_SB(sb); 2502 printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n", 2503 sbi->s_ext_blocks, sbi->s_ext_extents, 2504 sbi->s_ext_blocks / sbi->s_ext_extents); 2505 printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n", 2506 sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max); 2507 } 2508 #endif 2509 } 2510 2511 /* FIXME!! we need to try to merge to left or right after zero-out */ 2512 static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) 2513 { 2514 ext4_fsblk_t ee_pblock; 2515 unsigned int ee_len; 2516 int ret; 2517 2518 ee_len = ext4_ext_get_actual_len(ex); 2519 ee_pblock = ext4_ext_pblock(ex); 2520 2521 ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS); 2522 if (ret > 0) 2523 ret = 0; 2524 2525 return ret; 2526 } 2527 2528 #define EXT4_EXT_ZERO_LEN 7 2529 /* 2530 * This function is called by ext4_ext_map_blocks() if someone tries to write 2531 * to an uninitialized extent. It may result in splitting the uninitialized 2532 * extent into multiple extents (upto three - one initialized and two 2533 * uninitialized). 2534 * There are three possibilities: 2535 * a> There is no split required: Entire extent should be initialized 2536 * b> Splits in two extents: Write is happening at either end of the extent 2537 * c> Splits in three extents: Somone is writing in middle of the extent 2538 */ 2539 static int ext4_ext_convert_to_initialized(handle_t *handle, 2540 struct inode *inode, 2541 struct ext4_map_blocks *map, 2542 struct ext4_ext_path *path) 2543 { 2544 struct ext4_extent *ex, newex, orig_ex; 2545 struct ext4_extent *ex1 = NULL; 2546 struct ext4_extent *ex2 = NULL; 2547 struct ext4_extent *ex3 = NULL; 2548 struct ext4_extent_header *eh; 2549 ext4_lblk_t ee_block, eof_block; 2550 unsigned int allocated, ee_len, depth; 2551 ext4_fsblk_t newblock; 2552 int err = 0; 2553 int ret = 0; 2554 int may_zeroout; 2555 2556 ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" 2557 "block %llu, max_blocks %u\n", inode->i_ino, 2558 (unsigned long long)map->m_lblk, map->m_len); 2559 2560 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 2561 inode->i_sb->s_blocksize_bits; 2562 if (eof_block < map->m_lblk + map->m_len) 2563 eof_block = map->m_lblk + map->m_len; 2564 2565 depth = ext_depth(inode); 2566 eh = path[depth].p_hdr; 2567 ex = path[depth].p_ext; 2568 ee_block = le32_to_cpu(ex->ee_block); 2569 ee_len = ext4_ext_get_actual_len(ex); 2570 allocated = ee_len - (map->m_lblk - ee_block); 2571 newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); 2572 2573 ex2 = ex; 2574 orig_ex.ee_block = ex->ee_block; 2575 orig_ex.ee_len = cpu_to_le16(ee_len); 2576 ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); 2577 2578 /* 2579 * It is safe to convert extent to initialized via explicit 2580 * zeroout only if extent is fully insde i_size or new_size. 2581 */ 2582 may_zeroout = ee_block + ee_len <= eof_block; 2583 2584 err = ext4_ext_get_access(handle, inode, path + depth); 2585 if (err) 2586 goto out; 2587 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ 2588 if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) { 2589 err = ext4_ext_zeroout(inode, &orig_ex); 2590 if (err) 2591 goto fix_extent_len; 2592 /* update the extent length and mark as initialized */ 2593 ex->ee_block = orig_ex.ee_block; 2594 ex->ee_len = orig_ex.ee_len; 2595 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 2596 ext4_ext_dirty(handle, inode, path + depth); 2597 /* zeroed the full extent */ 2598 return allocated; 2599 } 2600 2601 /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ 2602 if (map->m_lblk > ee_block) { 2603 ex1 = ex; 2604 ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); 2605 ext4_ext_mark_uninitialized(ex1); 2606 ex2 = &newex; 2607 } 2608 /* 2609 * for sanity, update the length of the ex2 extent before 2610 * we insert ex3, if ex1 is NULL. This is to avoid temporary 2611 * overlap of blocks. 2612 */ 2613 if (!ex1 && allocated > map->m_len) 2614 ex2->ee_len = cpu_to_le16(map->m_len); 2615 /* ex3: to ee_block + ee_len : uninitialised */ 2616 if (allocated > map->m_len) { 2617 unsigned int newdepth; 2618 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ 2619 if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { 2620 /* 2621 * map->m_lblk == ee_block is handled by the zerouout 2622 * at the beginning. 2623 * Mark first half uninitialized. 2624 * Mark second half initialized and zero out the 2625 * initialized extent 2626 */ 2627 ex->ee_block = orig_ex.ee_block; 2628 ex->ee_len = cpu_to_le16(ee_len - allocated); 2629 ext4_ext_mark_uninitialized(ex); 2630 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 2631 ext4_ext_dirty(handle, inode, path + depth); 2632 2633 ex3 = &newex; 2634 ex3->ee_block = cpu_to_le32(map->m_lblk); 2635 ext4_ext_store_pblock(ex3, newblock); 2636 ex3->ee_len = cpu_to_le16(allocated); 2637 err = ext4_ext_insert_extent(handle, inode, path, 2638 ex3, 0); 2639 if (err == -ENOSPC) { 2640 err = ext4_ext_zeroout(inode, &orig_ex); 2641 if (err) 2642 goto fix_extent_len; 2643 ex->ee_block = orig_ex.ee_block; 2644 ex->ee_len = orig_ex.ee_len; 2645 ext4_ext_store_pblock(ex, 2646 ext4_ext_pblock(&orig_ex)); 2647 ext4_ext_dirty(handle, inode, path + depth); 2648 /* blocks available from map->m_lblk */ 2649 return allocated; 2650 2651 } else if (err) 2652 goto fix_extent_len; 2653 2654 /* 2655 * We need to zero out the second half because 2656 * an fallocate request can update file size and 2657 * converting the second half to initialized extent 2658 * implies that we can leak some junk data to user 2659 * space. 2660 */ 2661 err = ext4_ext_zeroout(inode, ex3); 2662 if (err) { 2663 /* 2664 * We should actually mark the 2665 * second half as uninit and return error 2666 * Insert would have changed the extent 2667 */ 2668 depth = ext_depth(inode); 2669 ext4_ext_drop_refs(path); 2670 path = ext4_ext_find_extent(inode, map->m_lblk, 2671 path); 2672 if (IS_ERR(path)) { 2673 err = PTR_ERR(path); 2674 return err; 2675 } 2676 /* get the second half extent details */ 2677 ex = path[depth].p_ext; 2678 err = ext4_ext_get_access(handle, inode, 2679 path + depth); 2680 if (err) 2681 return err; 2682 ext4_ext_mark_uninitialized(ex); 2683 ext4_ext_dirty(handle, inode, path + depth); 2684 return err; 2685 } 2686 2687 /* zeroed the second half */ 2688 return allocated; 2689 } 2690 ex3 = &newex; 2691 ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); 2692 ext4_ext_store_pblock(ex3, newblock + map->m_len); 2693 ex3->ee_len = cpu_to_le16(allocated - map->m_len); 2694 ext4_ext_mark_uninitialized(ex3); 2695 err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); 2696 if (err == -ENOSPC && may_zeroout) { 2697 err = ext4_ext_zeroout(inode, &orig_ex); 2698 if (err) 2699 goto fix_extent_len; 2700 /* update the extent length and mark as initialized */ 2701 ex->ee_block = orig_ex.ee_block; 2702 ex->ee_len = orig_ex.ee_len; 2703 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 2704 ext4_ext_dirty(handle, inode, path + depth); 2705 /* zeroed the full extent */ 2706 /* blocks available from map->m_lblk */ 2707 return allocated; 2708 2709 } else if (err) 2710 goto fix_extent_len; 2711 /* 2712 * The depth, and hence eh & ex might change 2713 * as part of the insert above. 2714 */ 2715 newdepth = ext_depth(inode); 2716 /* 2717 * update the extent length after successful insert of the 2718 * split extent 2719 */ 2720 ee_len -= ext4_ext_get_actual_len(ex3); 2721 orig_ex.ee_len = cpu_to_le16(ee_len); 2722 may_zeroout = ee_block + ee_len <= eof_block; 2723 2724 depth = newdepth; 2725 ext4_ext_drop_refs(path); 2726 path = ext4_ext_find_extent(inode, map->m_lblk, path); 2727 if (IS_ERR(path)) { 2728 err = PTR_ERR(path); 2729 goto out; 2730 } 2731 eh = path[depth].p_hdr; 2732 ex = path[depth].p_ext; 2733 if (ex2 != &newex) 2734 ex2 = ex; 2735 2736 err = ext4_ext_get_access(handle, inode, path + depth); 2737 if (err) 2738 goto out; 2739 2740 allocated = map->m_len; 2741 2742 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying 2743 * to insert a extent in the middle zerout directly 2744 * otherwise give the extent a chance to merge to left 2745 */ 2746 if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && 2747 map->m_lblk != ee_block && may_zeroout) { 2748 err = ext4_ext_zeroout(inode, &orig_ex); 2749 if (err) 2750 goto fix_extent_len; 2751 /* update the extent length and mark as initialized */ 2752 ex->ee_block = orig_ex.ee_block; 2753 ex->ee_len = orig_ex.ee_len; 2754 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 2755 ext4_ext_dirty(handle, inode, path + depth); 2756 /* zero out the first half */ 2757 /* blocks available from map->m_lblk */ 2758 return allocated; 2759 } 2760 } 2761 /* 2762 * If there was a change of depth as part of the 2763 * insertion of ex3 above, we need to update the length 2764 * of the ex1 extent again here 2765 */ 2766 if (ex1 && ex1 != ex) { 2767 ex1 = ex; 2768 ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); 2769 ext4_ext_mark_uninitialized(ex1); 2770 ex2 = &newex; 2771 } 2772 /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */ 2773 ex2->ee_block = cpu_to_le32(map->m_lblk); 2774 ext4_ext_store_pblock(ex2, newblock); 2775 ex2->ee_len = cpu_to_le16(allocated); 2776 if (ex2 != ex) 2777 goto insert; 2778 /* 2779 * New (initialized) extent starts from the first block 2780 * in the current extent. i.e., ex2 == ex 2781 * We have to see if it can be merged with the extent 2782 * on the left. 2783 */ 2784 if (ex2 > EXT_FIRST_EXTENT(eh)) { 2785 /* 2786 * To merge left, pass "ex2 - 1" to try_to_merge(), 2787 * since it merges towards right _only_. 2788 */ 2789 ret = ext4_ext_try_to_merge(inode, path, ex2 - 1); 2790 if (ret) { 2791 err = ext4_ext_correct_indexes(handle, inode, path); 2792 if (err) 2793 goto out; 2794 depth = ext_depth(inode); 2795 ex2--; 2796 } 2797 } 2798 /* 2799 * Try to Merge towards right. This might be required 2800 * only when the whole extent is being written to. 2801 * i.e. ex2 == ex and ex3 == NULL. 2802 */ 2803 if (!ex3) { 2804 ret = ext4_ext_try_to_merge(inode, path, ex2); 2805 if (ret) { 2806 err = ext4_ext_correct_indexes(handle, inode, path); 2807 if (err) 2808 goto out; 2809 } 2810 } 2811 /* Mark modified extent as dirty */ 2812 err = ext4_ext_dirty(handle, inode, path + depth); 2813 goto out; 2814 insert: 2815 err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); 2816 if (err == -ENOSPC && may_zeroout) { 2817 err = ext4_ext_zeroout(inode, &orig_ex); 2818 if (err) 2819 goto fix_extent_len; 2820 /* update the extent length and mark as initialized */ 2821 ex->ee_block = orig_ex.ee_block; 2822 ex->ee_len = orig_ex.ee_len; 2823 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 2824 ext4_ext_dirty(handle, inode, path + depth); 2825 /* zero out the first half */ 2826 return allocated; 2827 } else if (err) 2828 goto fix_extent_len; 2829 out: 2830 ext4_ext_show_leaf(inode, path); 2831 return err ? err : allocated; 2832 2833 fix_extent_len: 2834 ex->ee_block = orig_ex.ee_block; 2835 ex->ee_len = orig_ex.ee_len; 2836 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 2837 ext4_ext_mark_uninitialized(ex); 2838 ext4_ext_dirty(handle, inode, path + depth); 2839 return err; 2840 } 2841 2842 /* 2843 * This function is called by ext4_ext_map_blocks() from 2844 * ext4_get_blocks_dio_write() when DIO to write 2845 * to an uninitialized extent. 2846 * 2847 * Writing to an uninitized extent may result in splitting the uninitialized 2848 * extent into multiple /initialized uninitialized extents (up to three) 2849 * There are three possibilities: 2850 * a> There is no split required: Entire extent should be uninitialized 2851 * b> Splits in two extents: Write is happening at either end of the extent 2852 * c> Splits in three extents: Somone is writing in middle of the extent 2853 * 2854 * One of more index blocks maybe needed if the extent tree grow after 2855 * the uninitialized extent split. To prevent ENOSPC occur at the IO 2856 * complete, we need to split the uninitialized extent before DIO submit 2857 * the IO. The uninitialized extent called at this time will be split 2858 * into three uninitialized extent(at most). After IO complete, the part 2859 * being filled will be convert to initialized by the end_io callback function 2860 * via ext4_convert_unwritten_extents(). 2861 * 2862 * Returns the size of uninitialized extent to be written on success. 2863 */ 2864 static int ext4_split_unwritten_extents(handle_t *handle, 2865 struct inode *inode, 2866 struct ext4_map_blocks *map, 2867 struct ext4_ext_path *path, 2868 int flags) 2869 { 2870 struct ext4_extent *ex, newex, orig_ex; 2871 struct ext4_extent *ex1 = NULL; 2872 struct ext4_extent *ex2 = NULL; 2873 struct ext4_extent *ex3 = NULL; 2874 ext4_lblk_t ee_block, eof_block; 2875 unsigned int allocated, ee_len, depth; 2876 ext4_fsblk_t newblock; 2877 int err = 0; 2878 int may_zeroout; 2879 2880 ext_debug("ext4_split_unwritten_extents: inode %lu, logical" 2881 "block %llu, max_blocks %u\n", inode->i_ino, 2882 (unsigned long long)map->m_lblk, map->m_len); 2883 2884 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 2885 inode->i_sb->s_blocksize_bits; 2886 if (eof_block < map->m_lblk + map->m_len) 2887 eof_block = map->m_lblk + map->m_len; 2888 2889 depth = ext_depth(inode); 2890 ex = path[depth].p_ext; 2891 ee_block = le32_to_cpu(ex->ee_block); 2892 ee_len = ext4_ext_get_actual_len(ex); 2893 allocated = ee_len - (map->m_lblk - ee_block); 2894 newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); 2895 2896 ex2 = ex; 2897 orig_ex.ee_block = ex->ee_block; 2898 orig_ex.ee_len = cpu_to_le16(ee_len); 2899 ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); 2900 2901 /* 2902 * It is safe to convert extent to initialized via explicit 2903 * zeroout only if extent is fully insde i_size or new_size. 2904 */ 2905 may_zeroout = ee_block + ee_len <= eof_block; 2906 2907 /* 2908 * If the uninitialized extent begins at the same logical 2909 * block where the write begins, and the write completely 2910 * covers the extent, then we don't need to split it. 2911 */ 2912 if ((map->m_lblk == ee_block) && (allocated <= map->m_len)) 2913 return allocated; 2914 2915 err = ext4_ext_get_access(handle, inode, path + depth); 2916 if (err) 2917 goto out; 2918 /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ 2919 if (map->m_lblk > ee_block) { 2920 ex1 = ex; 2921 ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); 2922 ext4_ext_mark_uninitialized(ex1); 2923 ex2 = &newex; 2924 } 2925 /* 2926 * for sanity, update the length of the ex2 extent before 2927 * we insert ex3, if ex1 is NULL. This is to avoid temporary 2928 * overlap of blocks. 2929 */ 2930 if (!ex1 && allocated > map->m_len) 2931 ex2->ee_len = cpu_to_le16(map->m_len); 2932 /* ex3: to ee_block + ee_len : uninitialised */ 2933 if (allocated > map->m_len) { 2934 unsigned int newdepth; 2935 ex3 = &newex; 2936 ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); 2937 ext4_ext_store_pblock(ex3, newblock + map->m_len); 2938 ex3->ee_len = cpu_to_le16(allocated - map->m_len); 2939 ext4_ext_mark_uninitialized(ex3); 2940 err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); 2941 if (err == -ENOSPC && may_zeroout) { 2942 err = ext4_ext_zeroout(inode, &orig_ex); 2943 if (err) 2944 goto fix_extent_len; 2945 /* update the extent length and mark as initialized */ 2946 ex->ee_block = orig_ex.ee_block; 2947 ex->ee_len = orig_ex.ee_len; 2948 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 2949 ext4_ext_dirty(handle, inode, path + depth); 2950 /* zeroed the full extent */ 2951 /* blocks available from map->m_lblk */ 2952 return allocated; 2953 2954 } else if (err) 2955 goto fix_extent_len; 2956 /* 2957 * The depth, and hence eh & ex might change 2958 * as part of the insert above. 2959 */ 2960 newdepth = ext_depth(inode); 2961 /* 2962 * update the extent length after successful insert of the 2963 * split extent 2964 */ 2965 ee_len -= ext4_ext_get_actual_len(ex3); 2966 orig_ex.ee_len = cpu_to_le16(ee_len); 2967 may_zeroout = ee_block + ee_len <= eof_block; 2968 2969 depth = newdepth; 2970 ext4_ext_drop_refs(path); 2971 path = ext4_ext_find_extent(inode, map->m_lblk, path); 2972 if (IS_ERR(path)) { 2973 err = PTR_ERR(path); 2974 goto out; 2975 } 2976 ex = path[depth].p_ext; 2977 if (ex2 != &newex) 2978 ex2 = ex; 2979 2980 err = ext4_ext_get_access(handle, inode, path + depth); 2981 if (err) 2982 goto out; 2983 2984 allocated = map->m_len; 2985 } 2986 /* 2987 * If there was a change of depth as part of the 2988 * insertion of ex3 above, we need to update the length 2989 * of the ex1 extent again here 2990 */ 2991 if (ex1 && ex1 != ex) { 2992 ex1 = ex; 2993 ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); 2994 ext4_ext_mark_uninitialized(ex1); 2995 ex2 = &newex; 2996 } 2997 /* 2998 * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written 2999 * using direct I/O, uninitialised still. 3000 */ 3001 ex2->ee_block = cpu_to_le32(map->m_lblk); 3002 ext4_ext_store_pblock(ex2, newblock); 3003 ex2->ee_len = cpu_to_le16(allocated); 3004 ext4_ext_mark_uninitialized(ex2); 3005 if (ex2 != ex) 3006 goto insert; 3007 /* Mark modified extent as dirty */ 3008 err = ext4_ext_dirty(handle, inode, path + depth); 3009 ext_debug("out here\n"); 3010 goto out; 3011 insert: 3012 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3013 if (err == -ENOSPC && may_zeroout) { 3014 err = ext4_ext_zeroout(inode, &orig_ex); 3015 if (err) 3016 goto fix_extent_len; 3017 /* update the extent length and mark as initialized */ 3018 ex->ee_block = orig_ex.ee_block; 3019 ex->ee_len = orig_ex.ee_len; 3020 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 3021 ext4_ext_dirty(handle, inode, path + depth); 3022 /* zero out the first half */ 3023 return allocated; 3024 } else if (err) 3025 goto fix_extent_len; 3026 out: 3027 ext4_ext_show_leaf(inode, path); 3028 return err ? err : allocated; 3029 3030 fix_extent_len: 3031 ex->ee_block = orig_ex.ee_block; 3032 ex->ee_len = orig_ex.ee_len; 3033 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 3034 ext4_ext_mark_uninitialized(ex); 3035 ext4_ext_dirty(handle, inode, path + depth); 3036 return err; 3037 } 3038 static int ext4_convert_unwritten_extents_endio(handle_t *handle, 3039 struct inode *inode, 3040 struct ext4_ext_path *path) 3041 { 3042 struct ext4_extent *ex; 3043 struct ext4_extent_header *eh; 3044 int depth; 3045 int err = 0; 3046 int ret = 0; 3047 3048 depth = ext_depth(inode); 3049 eh = path[depth].p_hdr; 3050 ex = path[depth].p_ext; 3051 3052 err = ext4_ext_get_access(handle, inode, path + depth); 3053 if (err) 3054 goto out; 3055 /* first mark the extent as initialized */ 3056 ext4_ext_mark_initialized(ex); 3057 3058 /* 3059 * We have to see if it can be merged with the extent 3060 * on the left. 3061 */ 3062 if (ex > EXT_FIRST_EXTENT(eh)) { 3063 /* 3064 * To merge left, pass "ex - 1" to try_to_merge(), 3065 * since it merges towards right _only_. 3066 */ 3067 ret = ext4_ext_try_to_merge(inode, path, ex - 1); 3068 if (ret) { 3069 err = ext4_ext_correct_indexes(handle, inode, path); 3070 if (err) 3071 goto out; 3072 depth = ext_depth(inode); 3073 ex--; 3074 } 3075 } 3076 /* 3077 * Try to Merge towards right. 3078 */ 3079 ret = ext4_ext_try_to_merge(inode, path, ex); 3080 if (ret) { 3081 err = ext4_ext_correct_indexes(handle, inode, path); 3082 if (err) 3083 goto out; 3084 depth = ext_depth(inode); 3085 } 3086 /* Mark modified extent as dirty */ 3087 err = ext4_ext_dirty(handle, inode, path + depth); 3088 out: 3089 ext4_ext_show_leaf(inode, path); 3090 return err; 3091 } 3092 3093 static void unmap_underlying_metadata_blocks(struct block_device *bdev, 3094 sector_t block, int count) 3095 { 3096 int i; 3097 for (i = 0; i < count; i++) 3098 unmap_underlying_metadata(bdev, block + i); 3099 } 3100 3101 /* 3102 * Handle EOFBLOCKS_FL flag, clearing it if necessary 3103 */ 3104 static int check_eofblocks_fl(handle_t *handle, struct inode *inode, 3105 ext4_lblk_t lblk, 3106 struct ext4_ext_path *path, 3107 unsigned int len) 3108 { 3109 int i, depth; 3110 struct ext4_extent_header *eh; 3111 struct ext4_extent *ex, *last_ex; 3112 3113 if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) 3114 return 0; 3115 3116 depth = ext_depth(inode); 3117 eh = path[depth].p_hdr; 3118 ex = path[depth].p_ext; 3119 3120 if (unlikely(!eh->eh_entries)) { 3121 EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " 3122 "EOFBLOCKS_FL set"); 3123 return -EIO; 3124 } 3125 last_ex = EXT_LAST_EXTENT(eh); 3126 /* 3127 * We should clear the EOFBLOCKS_FL flag if we are writing the 3128 * last block in the last extent in the file. We test this by 3129 * first checking to see if the caller to 3130 * ext4_ext_get_blocks() was interested in the last block (or 3131 * a block beyond the last block) in the current extent. If 3132 * this turns out to be false, we can bail out from this 3133 * function immediately. 3134 */ 3135 if (lblk + len < le32_to_cpu(last_ex->ee_block) + 3136 ext4_ext_get_actual_len(last_ex)) 3137 return 0; 3138 /* 3139 * If the caller does appear to be planning to write at or 3140 * beyond the end of the current extent, we then test to see 3141 * if the current extent is the last extent in the file, by 3142 * checking to make sure it was reached via the rightmost node 3143 * at each level of the tree. 3144 */ 3145 for (i = depth-1; i >= 0; i--) 3146 if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) 3147 return 0; 3148 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 3149 return ext4_mark_inode_dirty(handle, inode); 3150 } 3151 3152 static int 3153 ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3154 struct ext4_map_blocks *map, 3155 struct ext4_ext_path *path, int flags, 3156 unsigned int allocated, ext4_fsblk_t newblock) 3157 { 3158 int ret = 0; 3159 int err = 0; 3160 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3161 3162 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" 3163 "block %llu, max_blocks %u, flags %d, allocated %u", 3164 inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, 3165 flags, allocated); 3166 ext4_ext_show_leaf(inode, path); 3167 3168 /* get_block() before submit the IO, split the extent */ 3169 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3170 ret = ext4_split_unwritten_extents(handle, inode, map, 3171 path, flags); 3172 /* 3173 * Flag the inode(non aio case) or end_io struct (aio case) 3174 * that this IO needs to convertion to written when IO is 3175 * completed 3176 */ 3177 if (io) 3178 io->flag = EXT4_IO_END_UNWRITTEN; 3179 else 3180 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3181 if (ext4_should_dioread_nolock(inode)) 3182 map->m_flags |= EXT4_MAP_UNINIT; 3183 goto out; 3184 } 3185 /* IO end_io complete, convert the filled extent to written */ 3186 if ((flags & EXT4_GET_BLOCKS_CONVERT)) { 3187 ret = ext4_convert_unwritten_extents_endio(handle, inode, 3188 path); 3189 if (ret >= 0) { 3190 ext4_update_inode_fsync_trans(handle, inode, 1); 3191 err = check_eofblocks_fl(handle, inode, map->m_lblk, 3192 path, map->m_len); 3193 } else 3194 err = ret; 3195 goto out2; 3196 } 3197 /* buffered IO case */ 3198 /* 3199 * repeat fallocate creation request 3200 * we already have an unwritten extent 3201 */ 3202 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) 3203 goto map_out; 3204 3205 /* buffered READ or buffered write_begin() lookup */ 3206 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3207 /* 3208 * We have blocks reserved already. We 3209 * return allocated blocks so that delalloc 3210 * won't do block reservation for us. But 3211 * the buffer head will be unmapped so that 3212 * a read from the block returns 0s. 3213 */ 3214 map->m_flags |= EXT4_MAP_UNWRITTEN; 3215 goto out1; 3216 } 3217 3218 /* buffered write, writepage time, convert*/ 3219 ret = ext4_ext_convert_to_initialized(handle, inode, map, path); 3220 if (ret >= 0) { 3221 ext4_update_inode_fsync_trans(handle, inode, 1); 3222 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, 3223 map->m_len); 3224 if (err < 0) 3225 goto out2; 3226 } 3227 3228 out: 3229 if (ret <= 0) { 3230 err = ret; 3231 goto out2; 3232 } else 3233 allocated = ret; 3234 map->m_flags |= EXT4_MAP_NEW; 3235 /* 3236 * if we allocated more blocks than requested 3237 * we need to make sure we unmap the extra block 3238 * allocated. The actual needed block will get 3239 * unmapped later when we find the buffer_head marked 3240 * new. 3241 */ 3242 if (allocated > map->m_len) { 3243 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, 3244 newblock + map->m_len, 3245 allocated - map->m_len); 3246 allocated = map->m_len; 3247 } 3248 3249 /* 3250 * If we have done fallocate with the offset that is already 3251 * delayed allocated, we would have block reservation 3252 * and quota reservation done in the delayed write path. 3253 * But fallocate would have already updated quota and block 3254 * count for this offset. So cancel these reservation 3255 */ 3256 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 3257 ext4_da_update_reserve_space(inode, allocated, 0); 3258 3259 map_out: 3260 map->m_flags |= EXT4_MAP_MAPPED; 3261 out1: 3262 if (allocated > map->m_len) 3263 allocated = map->m_len; 3264 ext4_ext_show_leaf(inode, path); 3265 map->m_pblk = newblock; 3266 map->m_len = allocated; 3267 out2: 3268 if (path) { 3269 ext4_ext_drop_refs(path); 3270 kfree(path); 3271 } 3272 return err ? err : allocated; 3273 } 3274 3275 /* 3276 * Block allocation/map/preallocation routine for extents based files 3277 * 3278 * 3279 * Need to be called with 3280 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block 3281 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) 3282 * 3283 * return > 0, number of of blocks already mapped/allocated 3284 * if create == 0 and these are pre-allocated blocks 3285 * buffer head is unmapped 3286 * otherwise blocks are mapped 3287 * 3288 * return = 0, if plain look up failed (blocks have not been allocated) 3289 * buffer head is unmapped 3290 * 3291 * return < 0, error case. 3292 */ 3293 int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, 3294 struct ext4_map_blocks *map, int flags) 3295 { 3296 struct ext4_ext_path *path = NULL; 3297 struct ext4_extent_header *eh; 3298 struct ext4_extent newex, *ex; 3299 ext4_fsblk_t newblock; 3300 int err = 0, depth, ret; 3301 unsigned int allocated = 0; 3302 struct ext4_allocation_request ar; 3303 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3304 3305 ext_debug("blocks %u/%u requested for inode %lu\n", 3306 map->m_lblk, map->m_len, inode->i_ino); 3307 3308 /* check in cache */ 3309 if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { 3310 if (!newex.ee_start_lo && !newex.ee_start_hi) { 3311 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3312 /* 3313 * block isn't allocated yet and 3314 * user doesn't want to allocate it 3315 */ 3316 goto out2; 3317 } 3318 /* we should allocate requested block */ 3319 } else { 3320 /* block is already allocated */ 3321 newblock = map->m_lblk 3322 - le32_to_cpu(newex.ee_block) 3323 + ext4_ext_pblock(&newex); 3324 /* number of remaining blocks in the extent */ 3325 allocated = ext4_ext_get_actual_len(&newex) - 3326 (map->m_lblk - le32_to_cpu(newex.ee_block)); 3327 goto out; 3328 } 3329 } 3330 3331 /* find extent for this block */ 3332 path = ext4_ext_find_extent(inode, map->m_lblk, NULL); 3333 if (IS_ERR(path)) { 3334 err = PTR_ERR(path); 3335 path = NULL; 3336 goto out2; 3337 } 3338 3339 depth = ext_depth(inode); 3340 3341 /* 3342 * consistent leaf must not be empty; 3343 * this situation is possible, though, _during_ tree modification; 3344 * this is why assert can't be put in ext4_ext_find_extent() 3345 */ 3346 if (unlikely(path[depth].p_ext == NULL && depth != 0)) { 3347 EXT4_ERROR_INODE(inode, "bad extent address " 3348 "lblock: %lu, depth: %d pblock %lld", 3349 (unsigned long) map->m_lblk, depth, 3350 path[depth].p_block); 3351 err = -EIO; 3352 goto out2; 3353 } 3354 eh = path[depth].p_hdr; 3355 3356 ex = path[depth].p_ext; 3357 if (ex) { 3358 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); 3359 ext4_fsblk_t ee_start = ext4_ext_pblock(ex); 3360 unsigned short ee_len; 3361 3362 /* 3363 * Uninitialized extents are treated as holes, except that 3364 * we split out initialized portions during a write. 3365 */ 3366 ee_len = ext4_ext_get_actual_len(ex); 3367 /* if found extent covers block, simply return it */ 3368 if (in_range(map->m_lblk, ee_block, ee_len)) { 3369 newblock = map->m_lblk - ee_block + ee_start; 3370 /* number of remaining blocks in the extent */ 3371 allocated = ee_len - (map->m_lblk - ee_block); 3372 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, 3373 ee_block, ee_len, newblock); 3374 3375 /* Do not put uninitialized extent in the cache */ 3376 if (!ext4_ext_is_uninitialized(ex)) { 3377 ext4_ext_put_in_cache(inode, ee_block, 3378 ee_len, ee_start); 3379 goto out; 3380 } 3381 ret = ext4_ext_handle_uninitialized_extents(handle, 3382 inode, map, path, flags, allocated, 3383 newblock); 3384 return ret; 3385 } 3386 } 3387 3388 /* 3389 * requested block isn't allocated yet; 3390 * we couldn't try to create block if create flag is zero 3391 */ 3392 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3393 /* 3394 * put just found gap into cache to speed up 3395 * subsequent requests 3396 */ 3397 ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); 3398 goto out2; 3399 } 3400 /* 3401 * Okay, we need to do block allocation. 3402 */ 3403 3404 /* find neighbour allocated blocks */ 3405 ar.lleft = map->m_lblk; 3406 err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); 3407 if (err) 3408 goto out2; 3409 ar.lright = map->m_lblk; 3410 err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); 3411 if (err) 3412 goto out2; 3413 3414 /* 3415 * See if request is beyond maximum number of blocks we can have in 3416 * a single extent. For an initialized extent this limit is 3417 * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is 3418 * EXT_UNINIT_MAX_LEN. 3419 */ 3420 if (map->m_len > EXT_INIT_MAX_LEN && 3421 !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) 3422 map->m_len = EXT_INIT_MAX_LEN; 3423 else if (map->m_len > EXT_UNINIT_MAX_LEN && 3424 (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) 3425 map->m_len = EXT_UNINIT_MAX_LEN; 3426 3427 /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ 3428 newex.ee_block = cpu_to_le32(map->m_lblk); 3429 newex.ee_len = cpu_to_le16(map->m_len); 3430 err = ext4_ext_check_overlap(inode, &newex, path); 3431 if (err) 3432 allocated = ext4_ext_get_actual_len(&newex); 3433 else 3434 allocated = map->m_len; 3435 3436 /* allocate new block */ 3437 ar.inode = inode; 3438 ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); 3439 ar.logical = map->m_lblk; 3440 ar.len = allocated; 3441 if (S_ISREG(inode->i_mode)) 3442 ar.flags = EXT4_MB_HINT_DATA; 3443 else 3444 /* disable in-core preallocation for non-regular files */ 3445 ar.flags = 0; 3446 newblock = ext4_mb_new_blocks(handle, &ar, &err); 3447 if (!newblock) 3448 goto out2; 3449 ext_debug("allocate new block: goal %llu, found %llu/%u\n", 3450 ar.goal, newblock, allocated); 3451 3452 /* try to insert new extent into found leaf and return */ 3453 ext4_ext_store_pblock(&newex, newblock); 3454 newex.ee_len = cpu_to_le16(ar.len); 3455 /* Mark uninitialized */ 3456 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ 3457 ext4_ext_mark_uninitialized(&newex); 3458 /* 3459 * io_end structure was created for every IO write to an 3460 * uninitialized extent. To avoid unecessary conversion, 3461 * here we flag the IO that really needs the conversion. 3462 * For non asycn direct IO case, flag the inode state 3463 * that we need to perform convertion when IO is done. 3464 */ 3465 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3466 if (io) 3467 io->flag = EXT4_IO_END_UNWRITTEN; 3468 else 3469 ext4_set_inode_state(inode, 3470 EXT4_STATE_DIO_UNWRITTEN); 3471 } 3472 if (ext4_should_dioread_nolock(inode)) 3473 map->m_flags |= EXT4_MAP_UNINIT; 3474 } 3475 3476 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); 3477 if (err) 3478 goto out2; 3479 3480 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3481 if (err) { 3482 /* free data blocks we just allocated */ 3483 /* not a good idea to call discard here directly, 3484 * but otherwise we'd need to call it every free() */ 3485 ext4_discard_preallocations(inode); 3486 ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex), 3487 ext4_ext_get_actual_len(&newex), 0); 3488 goto out2; 3489 } 3490 3491 /* previous routine could use block we allocated */ 3492 newblock = ext4_ext_pblock(&newex); 3493 allocated = ext4_ext_get_actual_len(&newex); 3494 if (allocated > map->m_len) 3495 allocated = map->m_len; 3496 map->m_flags |= EXT4_MAP_NEW; 3497 3498 /* 3499 * Update reserved blocks/metadata blocks after successful 3500 * block allocation which had been deferred till now. 3501 */ 3502 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 3503 ext4_da_update_reserve_space(inode, allocated, 1); 3504 3505 /* 3506 * Cache the extent and update transaction to commit on fdatasync only 3507 * when it is _not_ an uninitialized extent. 3508 */ 3509 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { 3510 ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock); 3511 ext4_update_inode_fsync_trans(handle, inode, 1); 3512 } else 3513 ext4_update_inode_fsync_trans(handle, inode, 0); 3514 out: 3515 if (allocated > map->m_len) 3516 allocated = map->m_len; 3517 ext4_ext_show_leaf(inode, path); 3518 map->m_flags |= EXT4_MAP_MAPPED; 3519 map->m_pblk = newblock; 3520 map->m_len = allocated; 3521 out2: 3522 if (path) { 3523 ext4_ext_drop_refs(path); 3524 kfree(path); 3525 } 3526 return err ? err : allocated; 3527 } 3528 3529 void ext4_ext_truncate(struct inode *inode) 3530 { 3531 struct address_space *mapping = inode->i_mapping; 3532 struct super_block *sb = inode->i_sb; 3533 ext4_lblk_t last_block; 3534 handle_t *handle; 3535 int err = 0; 3536 3537 /* 3538 * finish any pending end_io work so we won't run the risk of 3539 * converting any truncated blocks to initialized later 3540 */ 3541 ext4_flush_completed_IO(inode); 3542 3543 /* 3544 * probably first extent we're gonna free will be last in block 3545 */ 3546 err = ext4_writepage_trans_blocks(inode); 3547 handle = ext4_journal_start(inode, err); 3548 if (IS_ERR(handle)) 3549 return; 3550 3551 if (inode->i_size & (sb->s_blocksize - 1)) 3552 ext4_block_truncate_page(handle, mapping, inode->i_size); 3553 3554 if (ext4_orphan_add(handle, inode)) 3555 goto out_stop; 3556 3557 down_write(&EXT4_I(inode)->i_data_sem); 3558 ext4_ext_invalidate_cache(inode); 3559 3560 ext4_discard_preallocations(inode); 3561 3562 /* 3563 * TODO: optimization is possible here. 3564 * Probably we need not scan at all, 3565 * because page truncation is enough. 3566 */ 3567 3568 /* we have to know where to truncate from in crash case */ 3569 EXT4_I(inode)->i_disksize = inode->i_size; 3570 ext4_mark_inode_dirty(handle, inode); 3571 3572 last_block = (inode->i_size + sb->s_blocksize - 1) 3573 >> EXT4_BLOCK_SIZE_BITS(sb); 3574 err = ext4_ext_remove_space(inode, last_block); 3575 3576 /* In a multi-transaction truncate, we only make the final 3577 * transaction synchronous. 3578 */ 3579 if (IS_SYNC(inode)) 3580 ext4_handle_sync(handle); 3581 3582 out_stop: 3583 up_write(&EXT4_I(inode)->i_data_sem); 3584 /* 3585 * If this was a simple ftruncate() and the file will remain alive, 3586 * then we need to clear up the orphan record which we created above. 3587 * However, if this was a real unlink then we were called by 3588 * ext4_delete_inode(), and we allow that function to clean up the 3589 * orphan info for us. 3590 */ 3591 if (inode->i_nlink) 3592 ext4_orphan_del(handle, inode); 3593 3594 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 3595 ext4_mark_inode_dirty(handle, inode); 3596 ext4_journal_stop(handle); 3597 } 3598 3599 static void ext4_falloc_update_inode(struct inode *inode, 3600 int mode, loff_t new_size, int update_ctime) 3601 { 3602 struct timespec now; 3603 3604 if (update_ctime) { 3605 now = current_fs_time(inode->i_sb); 3606 if (!timespec_equal(&inode->i_ctime, &now)) 3607 inode->i_ctime = now; 3608 } 3609 /* 3610 * Update only when preallocation was requested beyond 3611 * the file size. 3612 */ 3613 if (!(mode & FALLOC_FL_KEEP_SIZE)) { 3614 if (new_size > i_size_read(inode)) 3615 i_size_write(inode, new_size); 3616 if (new_size > EXT4_I(inode)->i_disksize) 3617 ext4_update_i_disksize(inode, new_size); 3618 } else { 3619 /* 3620 * Mark that we allocate beyond EOF so the subsequent truncate 3621 * can proceed even if the new size is the same as i_size. 3622 */ 3623 if (new_size > i_size_read(inode)) 3624 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 3625 } 3626 3627 } 3628 3629 /* 3630 * preallocate space for a file. This implements ext4's fallocate file 3631 * operation, which gets called from sys_fallocate system call. 3632 * For block-mapped files, posix_fallocate should fall back to the method 3633 * of writing zeroes to the required new blocks (the same behavior which is 3634 * expected for file systems which do not support fallocate() system call). 3635 */ 3636 long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) 3637 { 3638 struct inode *inode = file->f_path.dentry->d_inode; 3639 handle_t *handle; 3640 loff_t new_size; 3641 unsigned int max_blocks; 3642 int ret = 0; 3643 int ret2 = 0; 3644 int retries = 0; 3645 struct ext4_map_blocks map; 3646 unsigned int credits, blkbits = inode->i_blkbits; 3647 3648 /* We only support the FALLOC_FL_KEEP_SIZE mode */ 3649 if (mode & ~FALLOC_FL_KEEP_SIZE) 3650 return -EOPNOTSUPP; 3651 3652 /* 3653 * currently supporting (pre)allocate mode for extent-based 3654 * files _only_ 3655 */ 3656 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 3657 return -EOPNOTSUPP; 3658 3659 map.m_lblk = offset >> blkbits; 3660 /* 3661 * We can't just convert len to max_blocks because 3662 * If blocksize = 4096 offset = 3072 and len = 2048 3663 */ 3664 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 3665 - map.m_lblk; 3666 /* 3667 * credits to insert 1 extent into extent tree 3668 */ 3669 credits = ext4_chunk_trans_blocks(inode, max_blocks); 3670 mutex_lock(&inode->i_mutex); 3671 ret = inode_newsize_ok(inode, (len + offset)); 3672 if (ret) { 3673 mutex_unlock(&inode->i_mutex); 3674 return ret; 3675 } 3676 retry: 3677 while (ret >= 0 && ret < max_blocks) { 3678 map.m_lblk = map.m_lblk + ret; 3679 map.m_len = max_blocks = max_blocks - ret; 3680 handle = ext4_journal_start(inode, credits); 3681 if (IS_ERR(handle)) { 3682 ret = PTR_ERR(handle); 3683 break; 3684 } 3685 ret = ext4_map_blocks(handle, inode, &map, 3686 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); 3687 if (ret <= 0) { 3688 #ifdef EXT4FS_DEBUG 3689 WARN_ON(ret <= 0); 3690 printk(KERN_ERR "%s: ext4_ext_map_blocks " 3691 "returned error inode#%lu, block=%u, " 3692 "max_blocks=%u", __func__, 3693 inode->i_ino, map.m_lblk, max_blocks); 3694 #endif 3695 ext4_mark_inode_dirty(handle, inode); 3696 ret2 = ext4_journal_stop(handle); 3697 break; 3698 } 3699 if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len, 3700 blkbits) >> blkbits)) 3701 new_size = offset + len; 3702 else 3703 new_size = (map.m_lblk + ret) << blkbits; 3704 3705 ext4_falloc_update_inode(inode, mode, new_size, 3706 (map.m_flags & EXT4_MAP_NEW)); 3707 ext4_mark_inode_dirty(handle, inode); 3708 ret2 = ext4_journal_stop(handle); 3709 if (ret2) 3710 break; 3711 } 3712 if (ret == -ENOSPC && 3713 ext4_should_retry_alloc(inode->i_sb, &retries)) { 3714 ret = 0; 3715 goto retry; 3716 } 3717 mutex_unlock(&inode->i_mutex); 3718 return ret > 0 ? ret2 : ret; 3719 } 3720 3721 /* 3722 * This function convert a range of blocks to written extents 3723 * The caller of this function will pass the start offset and the size. 3724 * all unwritten extents within this range will be converted to 3725 * written extents. 3726 * 3727 * This function is called from the direct IO end io call back 3728 * function, to convert the fallocated extents after IO is completed. 3729 * Returns 0 on success. 3730 */ 3731 int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, 3732 ssize_t len) 3733 { 3734 handle_t *handle; 3735 unsigned int max_blocks; 3736 int ret = 0; 3737 int ret2 = 0; 3738 struct ext4_map_blocks map; 3739 unsigned int credits, blkbits = inode->i_blkbits; 3740 3741 map.m_lblk = offset >> blkbits; 3742 /* 3743 * We can't just convert len to max_blocks because 3744 * If blocksize = 4096 offset = 3072 and len = 2048 3745 */ 3746 max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - 3747 map.m_lblk); 3748 /* 3749 * credits to insert 1 extent into extent tree 3750 */ 3751 credits = ext4_chunk_trans_blocks(inode, max_blocks); 3752 while (ret >= 0 && ret < max_blocks) { 3753 map.m_lblk += ret; 3754 map.m_len = (max_blocks -= ret); 3755 handle = ext4_journal_start(inode, credits); 3756 if (IS_ERR(handle)) { 3757 ret = PTR_ERR(handle); 3758 break; 3759 } 3760 ret = ext4_map_blocks(handle, inode, &map, 3761 EXT4_GET_BLOCKS_IO_CONVERT_EXT); 3762 if (ret <= 0) { 3763 WARN_ON(ret <= 0); 3764 printk(KERN_ERR "%s: ext4_ext_map_blocks " 3765 "returned error inode#%lu, block=%u, " 3766 "max_blocks=%u", __func__, 3767 inode->i_ino, map.m_lblk, map.m_len); 3768 } 3769 ext4_mark_inode_dirty(handle, inode); 3770 ret2 = ext4_journal_stop(handle); 3771 if (ret <= 0 || ret2 ) 3772 break; 3773 } 3774 return ret > 0 ? ret2 : ret; 3775 } 3776 /* 3777 * Callback function called for each extent to gather FIEMAP information. 3778 */ 3779 static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, 3780 struct ext4_ext_cache *newex, struct ext4_extent *ex, 3781 void *data) 3782 { 3783 struct fiemap_extent_info *fieinfo = data; 3784 unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; 3785 __u64 logical; 3786 __u64 physical; 3787 __u64 length; 3788 __u32 flags = 0; 3789 int error; 3790 3791 logical = (__u64)newex->ec_block << blksize_bits; 3792 3793 if (newex->ec_start == 0) { 3794 pgoff_t offset; 3795 struct page *page; 3796 struct buffer_head *bh = NULL; 3797 3798 offset = logical >> PAGE_SHIFT; 3799 page = find_get_page(inode->i_mapping, offset); 3800 if (!page || !page_has_buffers(page)) 3801 return EXT_CONTINUE; 3802 3803 bh = page_buffers(page); 3804 3805 if (!bh) 3806 return EXT_CONTINUE; 3807 3808 if (buffer_delay(bh)) { 3809 flags |= FIEMAP_EXTENT_DELALLOC; 3810 page_cache_release(page); 3811 } else { 3812 page_cache_release(page); 3813 return EXT_CONTINUE; 3814 } 3815 } 3816 3817 physical = (__u64)newex->ec_start << blksize_bits; 3818 length = (__u64)newex->ec_len << blksize_bits; 3819 3820 if (ex && ext4_ext_is_uninitialized(ex)) 3821 flags |= FIEMAP_EXTENT_UNWRITTEN; 3822 3823 /* 3824 * If this extent reaches EXT_MAX_BLOCK, it must be last. 3825 * 3826 * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK, 3827 * this also indicates no more allocated blocks. 3828 * 3829 * XXX this might miss a single-block extent at EXT_MAX_BLOCK 3830 */ 3831 if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK || 3832 newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) { 3833 loff_t size = i_size_read(inode); 3834 loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb); 3835 3836 flags |= FIEMAP_EXTENT_LAST; 3837 if ((flags & FIEMAP_EXTENT_DELALLOC) && 3838 logical+length > size) 3839 length = (size - logical + bs - 1) & ~(bs-1); 3840 } 3841 3842 error = fiemap_fill_next_extent(fieinfo, logical, physical, 3843 length, flags); 3844 if (error < 0) 3845 return error; 3846 if (error == 1) 3847 return EXT_BREAK; 3848 3849 return EXT_CONTINUE; 3850 } 3851 3852 /* fiemap flags we can handle specified here */ 3853 #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) 3854 3855 static int ext4_xattr_fiemap(struct inode *inode, 3856 struct fiemap_extent_info *fieinfo) 3857 { 3858 __u64 physical = 0; 3859 __u64 length; 3860 __u32 flags = FIEMAP_EXTENT_LAST; 3861 int blockbits = inode->i_sb->s_blocksize_bits; 3862 int error = 0; 3863 3864 /* in-inode? */ 3865 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { 3866 struct ext4_iloc iloc; 3867 int offset; /* offset of xattr in inode */ 3868 3869 error = ext4_get_inode_loc(inode, &iloc); 3870 if (error) 3871 return error; 3872 physical = iloc.bh->b_blocknr << blockbits; 3873 offset = EXT4_GOOD_OLD_INODE_SIZE + 3874 EXT4_I(inode)->i_extra_isize; 3875 physical += offset; 3876 length = EXT4_SB(inode->i_sb)->s_inode_size - offset; 3877 flags |= FIEMAP_EXTENT_DATA_INLINE; 3878 brelse(iloc.bh); 3879 } else { /* external block */ 3880 physical = EXT4_I(inode)->i_file_acl << blockbits; 3881 length = inode->i_sb->s_blocksize; 3882 } 3883 3884 if (physical) 3885 error = fiemap_fill_next_extent(fieinfo, 0, physical, 3886 length, flags); 3887 return (error < 0 ? error : 0); 3888 } 3889 3890 int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 3891 __u64 start, __u64 len) 3892 { 3893 ext4_lblk_t start_blk; 3894 int error = 0; 3895 3896 /* fallback to generic here if not in extents fmt */ 3897 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 3898 return generic_block_fiemap(inode, fieinfo, start, len, 3899 ext4_get_block); 3900 3901 if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) 3902 return -EBADR; 3903 3904 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { 3905 error = ext4_xattr_fiemap(inode, fieinfo); 3906 } else { 3907 ext4_lblk_t len_blks; 3908 __u64 last_blk; 3909 3910 start_blk = start >> inode->i_sb->s_blocksize_bits; 3911 last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; 3912 if (last_blk >= EXT_MAX_BLOCK) 3913 last_blk = EXT_MAX_BLOCK-1; 3914 len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; 3915 3916 /* 3917 * Walk the extent tree gathering extent information. 3918 * ext4_ext_fiemap_cb will push extents back to user. 3919 */ 3920 error = ext4_ext_walk_space(inode, start_blk, len_blks, 3921 ext4_ext_fiemap_cb, fieinfo); 3922 } 3923 3924 return error; 3925 } 3926 3927