1 // SPDX-License-Identifier: GPL-2.0-only 2 /* -*- mode: c; c-basic-offset: 8; -*- 3 * vim: noexpandtab sw=8 ts=8 sts=0: 4 * 5 * extent_map.c 6 * 7 * Block/Cluster mapping functions 8 * 9 * Copyright (C) 2004 Oracle. All rights reserved. 10 */ 11 12 #include <linux/fs.h> 13 #include <linux/init.h> 14 #include <linux/slab.h> 15 #include <linux/types.h> 16 #include <linux/fiemap.h> 17 18 #include <cluster/masklog.h> 19 20 #include "ocfs2.h" 21 22 #include "alloc.h" 23 #include "dlmglue.h" 24 #include "extent_map.h" 25 #include "inode.h" 26 #include "super.h" 27 #include "symlink.h" 28 #include "aops.h" 29 #include "ocfs2_trace.h" 30 31 #include "buffer_head_io.h" 32 33 /* 34 * The extent caching implementation is intentionally trivial. 35 * 36 * We only cache a small number of extents stored directly on the 37 * inode, so linear order operations are acceptable. If we ever want 38 * to increase the size of the extent map, then these algorithms must 39 * get smarter. 40 */ 41 42 void ocfs2_extent_map_init(struct inode *inode) 43 { 44 struct ocfs2_inode_info *oi = OCFS2_I(inode); 45 46 oi->ip_extent_map.em_num_items = 0; 47 INIT_LIST_HEAD(&oi->ip_extent_map.em_list); 48 } 49 50 static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em, 51 unsigned int cpos, 52 struct ocfs2_extent_map_item **ret_emi) 53 { 54 unsigned int range; 55 struct ocfs2_extent_map_item *emi; 56 57 *ret_emi = NULL; 58 59 list_for_each_entry(emi, &em->em_list, ei_list) { 60 range = emi->ei_cpos + emi->ei_clusters; 61 62 if (cpos >= emi->ei_cpos && cpos < range) { 63 list_move(&emi->ei_list, &em->em_list); 64 65 *ret_emi = emi; 66 break; 67 } 68 } 69 } 70 71 static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos, 72 unsigned int *phys, unsigned int *len, 73 unsigned int *flags) 74 { 75 unsigned int coff; 76 struct ocfs2_inode_info *oi = OCFS2_I(inode); 77 struct ocfs2_extent_map_item *emi; 78 79 spin_lock(&oi->ip_lock); 80 81 __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi); 82 if (emi) { 83 coff = cpos - emi->ei_cpos; 84 *phys = emi->ei_phys + coff; 85 if (len) 86 *len = emi->ei_clusters - coff; 87 if (flags) 88 *flags = emi->ei_flags; 89 } 90 91 spin_unlock(&oi->ip_lock); 92 93 if (emi == NULL) 94 return -ENOENT; 95 96 return 0; 97 } 98 99 /* 100 * Forget about all clusters equal to or greater than cpos. 101 */ 102 void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) 103 { 104 struct ocfs2_extent_map_item *emi, *n; 105 struct ocfs2_inode_info *oi = OCFS2_I(inode); 106 struct ocfs2_extent_map *em = &oi->ip_extent_map; 107 LIST_HEAD(tmp_list); 108 unsigned int range; 109 110 spin_lock(&oi->ip_lock); 111 list_for_each_entry_safe(emi, n, &em->em_list, ei_list) { 112 if (emi->ei_cpos >= cpos) { 113 /* Full truncate of this record. */ 114 list_move(&emi->ei_list, &tmp_list); 115 BUG_ON(em->em_num_items == 0); 116 em->em_num_items--; 117 continue; 118 } 119 120 range = emi->ei_cpos + emi->ei_clusters; 121 if (range > cpos) { 122 /* Partial truncate */ 123 emi->ei_clusters = cpos - emi->ei_cpos; 124 } 125 } 126 spin_unlock(&oi->ip_lock); 127 128 list_for_each_entry_safe(emi, n, &tmp_list, ei_list) { 129 list_del(&emi->ei_list); 130 kfree(emi); 131 } 132 } 133 134 /* 135 * Is any part of emi2 contained within emi1 136 */ 137 static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1, 138 struct ocfs2_extent_map_item *emi2) 139 { 140 unsigned int range1, range2; 141 142 /* 143 * Check if logical start of emi2 is inside emi1 144 */ 145 range1 = emi1->ei_cpos + emi1->ei_clusters; 146 if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1) 147 return 1; 148 149 /* 150 * Check if logical end of emi2 is inside emi1 151 */ 152 range2 = emi2->ei_cpos + emi2->ei_clusters; 153 if (range2 > emi1->ei_cpos && range2 <= range1) 154 return 1; 155 156 return 0; 157 } 158 159 static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest, 160 struct ocfs2_extent_map_item *src) 161 { 162 dest->ei_cpos = src->ei_cpos; 163 dest->ei_phys = src->ei_phys; 164 dest->ei_clusters = src->ei_clusters; 165 dest->ei_flags = src->ei_flags; 166 } 167 168 /* 169 * Try to merge emi with ins. Returns 1 if merge succeeds, zero 170 * otherwise. 171 */ 172 static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi, 173 struct ocfs2_extent_map_item *ins) 174 { 175 /* 176 * Handle contiguousness 177 */ 178 if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) && 179 ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) && 180 ins->ei_flags == emi->ei_flags) { 181 emi->ei_clusters += ins->ei_clusters; 182 return 1; 183 } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys && 184 (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos && 185 ins->ei_flags == emi->ei_flags) { 186 emi->ei_phys = ins->ei_phys; 187 emi->ei_cpos = ins->ei_cpos; 188 emi->ei_clusters += ins->ei_clusters; 189 return 1; 190 } 191 192 /* 193 * Overlapping extents - this shouldn't happen unless we've 194 * split an extent to change it's flags. That is exceedingly 195 * rare, so there's no sense in trying to optimize it yet. 196 */ 197 if (ocfs2_ei_is_contained(emi, ins) || 198 ocfs2_ei_is_contained(ins, emi)) { 199 ocfs2_copy_emi_fields(emi, ins); 200 return 1; 201 } 202 203 /* No merge was possible. */ 204 return 0; 205 } 206 207 /* 208 * In order to reduce complexity on the caller, this insert function 209 * is intentionally liberal in what it will accept. 210 * 211 * The only rule is that the truncate call *must* be used whenever 212 * records have been deleted. This avoids inserting overlapping 213 * records with different physical mappings. 214 */ 215 void ocfs2_extent_map_insert_rec(struct inode *inode, 216 struct ocfs2_extent_rec *rec) 217 { 218 struct ocfs2_inode_info *oi = OCFS2_I(inode); 219 struct ocfs2_extent_map *em = &oi->ip_extent_map; 220 struct ocfs2_extent_map_item *emi, *new_emi = NULL; 221 struct ocfs2_extent_map_item ins; 222 223 ins.ei_cpos = le32_to_cpu(rec->e_cpos); 224 ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb, 225 le64_to_cpu(rec->e_blkno)); 226 ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters); 227 ins.ei_flags = rec->e_flags; 228 229 search: 230 spin_lock(&oi->ip_lock); 231 232 list_for_each_entry(emi, &em->em_list, ei_list) { 233 if (ocfs2_try_to_merge_extent_map(emi, &ins)) { 234 list_move(&emi->ei_list, &em->em_list); 235 spin_unlock(&oi->ip_lock); 236 goto out; 237 } 238 } 239 240 /* 241 * No item could be merged. 242 * 243 * Either allocate and add a new item, or overwrite the last recently 244 * inserted. 245 */ 246 247 if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) { 248 if (new_emi == NULL) { 249 spin_unlock(&oi->ip_lock); 250 251 new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS); 252 if (new_emi == NULL) 253 goto out; 254 255 goto search; 256 } 257 258 ocfs2_copy_emi_fields(new_emi, &ins); 259 list_add(&new_emi->ei_list, &em->em_list); 260 em->em_num_items++; 261 new_emi = NULL; 262 } else { 263 BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0); 264 emi = list_entry(em->em_list.prev, 265 struct ocfs2_extent_map_item, ei_list); 266 list_move(&emi->ei_list, &em->em_list); 267 ocfs2_copy_emi_fields(emi, &ins); 268 } 269 270 spin_unlock(&oi->ip_lock); 271 272 out: 273 kfree(new_emi); 274 } 275 276 static int ocfs2_last_eb_is_empty(struct inode *inode, 277 struct ocfs2_dinode *di) 278 { 279 int ret, next_free; 280 u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk); 281 struct buffer_head *eb_bh = NULL; 282 struct ocfs2_extent_block *eb; 283 struct ocfs2_extent_list *el; 284 285 ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh); 286 if (ret) { 287 mlog_errno(ret); 288 goto out; 289 } 290 291 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 292 el = &eb->h_list; 293 294 if (el->l_tree_depth) { 295 ocfs2_error(inode->i_sb, 296 "Inode %lu has non zero tree depth in leaf block %llu\n", 297 inode->i_ino, 298 (unsigned long long)eb_bh->b_blocknr); 299 ret = -EROFS; 300 goto out; 301 } 302 303 next_free = le16_to_cpu(el->l_next_free_rec); 304 305 if (next_free == 0 || 306 (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) 307 ret = 1; 308 309 out: 310 brelse(eb_bh); 311 return ret; 312 } 313 314 /* 315 * Return the 1st index within el which contains an extent start 316 * larger than v_cluster. 317 */ 318 static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el, 319 u32 v_cluster) 320 { 321 int i; 322 struct ocfs2_extent_rec *rec; 323 324 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 325 rec = &el->l_recs[i]; 326 327 if (v_cluster < le32_to_cpu(rec->e_cpos)) 328 break; 329 } 330 331 return i; 332 } 333 334 /* 335 * Figure out the size of a hole which starts at v_cluster within the given 336 * extent list. 337 * 338 * If there is no more allocation past v_cluster, we return the maximum 339 * cluster size minus v_cluster. 340 * 341 * If we have in-inode extents, then el points to the dinode list and 342 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block 343 * containing el. 344 */ 345 int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci, 346 struct ocfs2_extent_list *el, 347 struct buffer_head *eb_bh, 348 u32 v_cluster, 349 u32 *num_clusters) 350 { 351 int ret, i; 352 struct buffer_head *next_eb_bh = NULL; 353 struct ocfs2_extent_block *eb, *next_eb; 354 355 i = ocfs2_search_for_hole_index(el, v_cluster); 356 357 if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) { 358 eb = (struct ocfs2_extent_block *)eb_bh->b_data; 359 360 /* 361 * Check the next leaf for any extents. 362 */ 363 364 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) 365 goto no_more_extents; 366 367 ret = ocfs2_read_extent_block(ci, 368 le64_to_cpu(eb->h_next_leaf_blk), 369 &next_eb_bh); 370 if (ret) { 371 mlog_errno(ret); 372 goto out; 373 } 374 375 next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; 376 el = &next_eb->h_list; 377 i = ocfs2_search_for_hole_index(el, v_cluster); 378 } 379 380 no_more_extents: 381 if (i == le16_to_cpu(el->l_next_free_rec)) { 382 /* 383 * We're at the end of our existing allocation. Just 384 * return the maximum number of clusters we could 385 * possibly allocate. 386 */ 387 *num_clusters = UINT_MAX - v_cluster; 388 } else { 389 *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster; 390 } 391 392 ret = 0; 393 out: 394 brelse(next_eb_bh); 395 return ret; 396 } 397 398 static int ocfs2_get_clusters_nocache(struct inode *inode, 399 struct buffer_head *di_bh, 400 u32 v_cluster, unsigned int *hole_len, 401 struct ocfs2_extent_rec *ret_rec, 402 unsigned int *is_last) 403 { 404 int i, ret, tree_height, len; 405 struct ocfs2_dinode *di; 406 struct ocfs2_extent_block *eb; 407 struct ocfs2_extent_list *el; 408 struct ocfs2_extent_rec *rec; 409 struct buffer_head *eb_bh = NULL; 410 411 memset(ret_rec, 0, sizeof(*ret_rec)); 412 if (is_last) 413 *is_last = 0; 414 415 di = (struct ocfs2_dinode *) di_bh->b_data; 416 el = &di->id2.i_list; 417 tree_height = le16_to_cpu(el->l_tree_depth); 418 419 if (tree_height > 0) { 420 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, 421 &eb_bh); 422 if (ret) { 423 mlog_errno(ret); 424 goto out; 425 } 426 427 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 428 el = &eb->h_list; 429 430 if (el->l_tree_depth) { 431 ocfs2_error(inode->i_sb, 432 "Inode %lu has non zero tree depth in leaf block %llu\n", 433 inode->i_ino, 434 (unsigned long long)eb_bh->b_blocknr); 435 ret = -EROFS; 436 goto out; 437 } 438 } 439 440 i = ocfs2_search_extent_list(el, v_cluster); 441 if (i == -1) { 442 /* 443 * Holes can be larger than the maximum size of an 444 * extent, so we return their lengths in a separate 445 * field. 446 */ 447 if (hole_len) { 448 ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode), 449 el, eb_bh, 450 v_cluster, &len); 451 if (ret) { 452 mlog_errno(ret); 453 goto out; 454 } 455 456 *hole_len = len; 457 } 458 goto out_hole; 459 } 460 461 rec = &el->l_recs[i]; 462 463 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); 464 465 if (!rec->e_blkno) { 466 ocfs2_error(inode->i_sb, 467 "Inode %lu has bad extent record (%u, %u, 0)\n", 468 inode->i_ino, 469 le32_to_cpu(rec->e_cpos), 470 ocfs2_rec_clusters(el, rec)); 471 ret = -EROFS; 472 goto out; 473 } 474 475 *ret_rec = *rec; 476 477 /* 478 * Checking for last extent is potentially expensive - we 479 * might have to look at the next leaf over to see if it's 480 * empty. 481 * 482 * The first two checks are to see whether the caller even 483 * cares for this information, and if the extent is at least 484 * the last in it's list. 485 * 486 * If those hold true, then the extent is last if any of the 487 * additional conditions hold true: 488 * - Extent list is in-inode 489 * - Extent list is right-most 490 * - Extent list is 2nd to rightmost, with empty right-most 491 */ 492 if (is_last) { 493 if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) { 494 if (tree_height == 0) 495 *is_last = 1; 496 else if (eb->h_blkno == di->i_last_eb_blk) 497 *is_last = 1; 498 else if (eb->h_next_leaf_blk == di->i_last_eb_blk) { 499 ret = ocfs2_last_eb_is_empty(inode, di); 500 if (ret < 0) { 501 mlog_errno(ret); 502 goto out; 503 } 504 if (ret == 1) 505 *is_last = 1; 506 } 507 } 508 } 509 510 out_hole: 511 ret = 0; 512 out: 513 brelse(eb_bh); 514 return ret; 515 } 516 517 static void ocfs2_relative_extent_offsets(struct super_block *sb, 518 u32 v_cluster, 519 struct ocfs2_extent_rec *rec, 520 u32 *p_cluster, u32 *num_clusters) 521 522 { 523 u32 coff = v_cluster - le32_to_cpu(rec->e_cpos); 524 525 *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno)); 526 *p_cluster = *p_cluster + coff; 527 528 if (num_clusters) 529 *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; 530 } 531 532 int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, 533 u32 *p_cluster, u32 *num_clusters, 534 struct ocfs2_extent_list *el, 535 unsigned int *extent_flags) 536 { 537 int ret = 0, i; 538 struct buffer_head *eb_bh = NULL; 539 struct ocfs2_extent_block *eb; 540 struct ocfs2_extent_rec *rec; 541 u32 coff; 542 543 if (el->l_tree_depth) { 544 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, 545 &eb_bh); 546 if (ret) { 547 mlog_errno(ret); 548 goto out; 549 } 550 551 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 552 el = &eb->h_list; 553 554 if (el->l_tree_depth) { 555 ocfs2_error(inode->i_sb, 556 "Inode %lu has non zero tree depth in xattr leaf block %llu\n", 557 inode->i_ino, 558 (unsigned long long)eb_bh->b_blocknr); 559 ret = -EROFS; 560 goto out; 561 } 562 } 563 564 i = ocfs2_search_extent_list(el, v_cluster); 565 if (i == -1) { 566 ret = -EROFS; 567 mlog_errno(ret); 568 goto out; 569 } else { 570 rec = &el->l_recs[i]; 571 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); 572 573 if (!rec->e_blkno) { 574 ocfs2_error(inode->i_sb, 575 "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", 576 inode->i_ino, 577 le32_to_cpu(rec->e_cpos), 578 ocfs2_rec_clusters(el, rec)); 579 ret = -EROFS; 580 goto out; 581 } 582 coff = v_cluster - le32_to_cpu(rec->e_cpos); 583 *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, 584 le64_to_cpu(rec->e_blkno)); 585 *p_cluster = *p_cluster + coff; 586 if (num_clusters) 587 *num_clusters = ocfs2_rec_clusters(el, rec) - coff; 588 589 if (extent_flags) 590 *extent_flags = rec->e_flags; 591 } 592 out: 593 brelse(eb_bh); 594 return ret; 595 } 596 597 int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, 598 u32 *p_cluster, u32 *num_clusters, 599 unsigned int *extent_flags) 600 { 601 int ret; 602 unsigned int hole_len, flags = 0; 603 struct buffer_head *di_bh = NULL; 604 struct ocfs2_extent_rec rec; 605 606 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 607 ret = -ERANGE; 608 mlog_errno(ret); 609 goto out; 610 } 611 612 ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, 613 num_clusters, extent_flags); 614 if (ret == 0) 615 goto out; 616 617 ret = ocfs2_read_inode_block(inode, &di_bh); 618 if (ret) { 619 mlog_errno(ret); 620 goto out; 621 } 622 623 ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len, 624 &rec, NULL); 625 if (ret) { 626 mlog_errno(ret); 627 goto out; 628 } 629 630 if (rec.e_blkno == 0ULL) { 631 /* 632 * A hole was found. Return some canned values that 633 * callers can key on. If asked for, num_clusters will 634 * be populated with the size of the hole. 635 */ 636 *p_cluster = 0; 637 if (num_clusters) { 638 *num_clusters = hole_len; 639 } 640 } else { 641 ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec, 642 p_cluster, num_clusters); 643 flags = rec.e_flags; 644 645 ocfs2_extent_map_insert_rec(inode, &rec); 646 } 647 648 if (extent_flags) 649 *extent_flags = flags; 650 651 out: 652 brelse(di_bh); 653 return ret; 654 } 655 656 /* 657 * This expects alloc_sem to be held. The allocation cannot change at 658 * all while the map is in the process of being updated. 659 */ 660 int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, 661 u64 *ret_count, unsigned int *extent_flags) 662 { 663 int ret; 664 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 665 u32 cpos, num_clusters, p_cluster; 666 u64 boff = 0; 667 668 cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno); 669 670 ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters, 671 extent_flags); 672 if (ret) { 673 mlog_errno(ret); 674 goto out; 675 } 676 677 /* 678 * p_cluster == 0 indicates a hole. 679 */ 680 if (p_cluster) { 681 boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 682 boff += (v_blkno & (u64)(bpc - 1)); 683 } 684 685 *p_blkno = boff; 686 687 if (ret_count) { 688 *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 689 *ret_count -= v_blkno & (u64)(bpc - 1); 690 } 691 692 out: 693 return ret; 694 } 695 696 /* 697 * The ocfs2_fiemap_inline() may be a little bit misleading, since 698 * it not only handles the fiemap for inlined files, but also deals 699 * with the fast symlink, cause they have no difference for extent 700 * mapping per se. 701 */ 702 static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, 703 struct fiemap_extent_info *fieinfo, 704 u64 map_start) 705 { 706 int ret; 707 unsigned int id_count; 708 struct ocfs2_dinode *di; 709 u64 phys; 710 u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; 711 struct ocfs2_inode_info *oi = OCFS2_I(inode); 712 713 di = (struct ocfs2_dinode *)di_bh->b_data; 714 if (ocfs2_inode_is_fast_symlink(inode)) 715 id_count = ocfs2_fast_symlink_chars(inode->i_sb); 716 else 717 id_count = le16_to_cpu(di->id2.i_data.id_count); 718 719 if (map_start < id_count) { 720 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; 721 if (ocfs2_inode_is_fast_symlink(inode)) 722 phys += offsetof(struct ocfs2_dinode, id2.i_symlink); 723 else 724 phys += offsetof(struct ocfs2_dinode, 725 id2.i_data.id_data); 726 727 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, 728 flags); 729 if (ret < 0) 730 return ret; 731 } 732 733 return 0; 734 } 735 736 int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 737 u64 map_start, u64 map_len) 738 { 739 int ret, is_last; 740 u32 mapping_end, cpos; 741 unsigned int hole_size; 742 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 743 u64 len_bytes, phys_bytes, virt_bytes; 744 struct buffer_head *di_bh = NULL; 745 struct ocfs2_extent_rec rec; 746 747 ret = fiemap_prep(inode, fieinfo, map_start, &map_len, 0); 748 if (ret) 749 return ret; 750 751 ret = ocfs2_inode_lock(inode, &di_bh, 0); 752 if (ret) { 753 mlog_errno(ret); 754 goto out; 755 } 756 757 down_read(&OCFS2_I(inode)->ip_alloc_sem); 758 759 /* 760 * Handle inline-data and fast symlink separately. 761 */ 762 if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) || 763 ocfs2_inode_is_fast_symlink(inode)) { 764 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); 765 goto out_unlock; 766 } 767 768 cpos = map_start >> osb->s_clustersize_bits; 769 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, 770 map_start + map_len); 771 is_last = 0; 772 while (cpos < mapping_end && !is_last) { 773 u32 fe_flags; 774 775 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, 776 &hole_size, &rec, &is_last); 777 if (ret) { 778 mlog_errno(ret); 779 goto out_unlock; 780 } 781 782 if (rec.e_blkno == 0ULL) { 783 cpos += hole_size; 784 continue; 785 } 786 787 fe_flags = 0; 788 if (rec.e_flags & OCFS2_EXT_UNWRITTEN) 789 fe_flags |= FIEMAP_EXTENT_UNWRITTEN; 790 if (rec.e_flags & OCFS2_EXT_REFCOUNTED) 791 fe_flags |= FIEMAP_EXTENT_SHARED; 792 if (is_last) 793 fe_flags |= FIEMAP_EXTENT_LAST; 794 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; 795 phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; 796 virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; 797 798 ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, 799 len_bytes, fe_flags); 800 if (ret) 801 break; 802 803 cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters); 804 } 805 806 if (ret > 0) 807 ret = 0; 808 809 out_unlock: 810 brelse(di_bh); 811 812 up_read(&OCFS2_I(inode)->ip_alloc_sem); 813 814 ocfs2_inode_unlock(inode, 0); 815 out: 816 817 return ret; 818 } 819 820 /* Is IO overwriting allocated blocks? */ 821 int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh, 822 u64 map_start, u64 map_len) 823 { 824 int ret = 0, is_last; 825 u32 mapping_end, cpos; 826 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 827 struct ocfs2_extent_rec rec; 828 829 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 830 if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len)) 831 return ret; 832 else 833 return -EAGAIN; 834 } 835 836 cpos = map_start >> osb->s_clustersize_bits; 837 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, 838 map_start + map_len); 839 is_last = 0; 840 while (cpos < mapping_end && !is_last) { 841 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, 842 NULL, &rec, &is_last); 843 if (ret) { 844 mlog_errno(ret); 845 goto out; 846 } 847 848 if (rec.e_blkno == 0ULL) 849 break; 850 851 if (rec.e_flags & OCFS2_EXT_REFCOUNTED) 852 break; 853 854 cpos = le32_to_cpu(rec.e_cpos) + 855 le16_to_cpu(rec.e_leaf_clusters); 856 } 857 858 if (cpos < mapping_end) 859 ret = -EAGAIN; 860 out: 861 return ret; 862 } 863 864 int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) 865 { 866 struct inode *inode = file->f_mapping->host; 867 int ret; 868 unsigned int is_last = 0, is_data = 0; 869 u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; 870 u32 cpos, cend, clen, hole_size; 871 u64 extoff, extlen; 872 struct buffer_head *di_bh = NULL; 873 struct ocfs2_extent_rec rec; 874 875 BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE); 876 877 ret = ocfs2_inode_lock(inode, &di_bh, 0); 878 if (ret) { 879 mlog_errno(ret); 880 goto out; 881 } 882 883 down_read(&OCFS2_I(inode)->ip_alloc_sem); 884 885 if (*offset >= i_size_read(inode)) { 886 ret = -ENXIO; 887 goto out_unlock; 888 } 889 890 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 891 if (whence == SEEK_HOLE) 892 *offset = i_size_read(inode); 893 goto out_unlock; 894 } 895 896 clen = 0; 897 cpos = *offset >> cs_bits; 898 cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); 899 900 while (cpos < cend && !is_last) { 901 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, 902 &rec, &is_last); 903 if (ret) { 904 mlog_errno(ret); 905 goto out_unlock; 906 } 907 908 extoff = cpos; 909 extoff <<= cs_bits; 910 911 if (rec.e_blkno == 0ULL) { 912 clen = hole_size; 913 is_data = 0; 914 } else { 915 clen = le16_to_cpu(rec.e_leaf_clusters) - 916 (cpos - le32_to_cpu(rec.e_cpos)); 917 is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ? 0 : 1; 918 } 919 920 if ((!is_data && whence == SEEK_HOLE) || 921 (is_data && whence == SEEK_DATA)) { 922 if (extoff > *offset) 923 *offset = extoff; 924 goto out_unlock; 925 } 926 927 if (!is_last) 928 cpos += clen; 929 } 930 931 if (whence == SEEK_HOLE) { 932 extoff = cpos; 933 extoff <<= cs_bits; 934 extlen = clen; 935 extlen <<= cs_bits; 936 937 if ((extoff + extlen) > i_size_read(inode)) 938 extlen = i_size_read(inode) - extoff; 939 extoff += extlen; 940 if (extoff > *offset) 941 *offset = extoff; 942 goto out_unlock; 943 } 944 945 ret = -ENXIO; 946 947 out_unlock: 948 949 brelse(di_bh); 950 951 up_read(&OCFS2_I(inode)->ip_alloc_sem); 952 953 ocfs2_inode_unlock(inode, 0); 954 out: 955 return ret; 956 } 957 958 int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, 959 struct buffer_head *bhs[], int flags, 960 int (*validate)(struct super_block *sb, 961 struct buffer_head *bh)) 962 { 963 int rc = 0; 964 u64 p_block, p_count; 965 int i, count, done = 0; 966 967 trace_ocfs2_read_virt_blocks( 968 inode, (unsigned long long)v_block, nr, bhs, flags, 969 validate); 970 971 if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >= 972 i_size_read(inode)) { 973 BUG_ON(!(flags & OCFS2_BH_READAHEAD)); 974 goto out; 975 } 976 977 while (done < nr) { 978 down_read(&OCFS2_I(inode)->ip_alloc_sem); 979 rc = ocfs2_extent_map_get_blocks(inode, v_block + done, 980 &p_block, &p_count, NULL); 981 up_read(&OCFS2_I(inode)->ip_alloc_sem); 982 if (rc) { 983 mlog_errno(rc); 984 break; 985 } 986 987 if (!p_block) { 988 rc = -EIO; 989 mlog(ML_ERROR, 990 "Inode #%llu contains a hole at offset %llu\n", 991 (unsigned long long)OCFS2_I(inode)->ip_blkno, 992 (unsigned long long)(v_block + done) << 993 inode->i_sb->s_blocksize_bits); 994 break; 995 } 996 997 count = nr - done; 998 if (p_count < count) 999 count = p_count; 1000 1001 /* 1002 * If the caller passed us bhs, they should have come 1003 * from a previous readahead call to this function. Thus, 1004 * they should have the right b_blocknr. 1005 */ 1006 for (i = 0; i < count; i++) { 1007 if (!bhs[done + i]) 1008 continue; 1009 BUG_ON(bhs[done + i]->b_blocknr != (p_block + i)); 1010 } 1011 1012 rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count, 1013 bhs + done, flags, validate); 1014 if (rc) { 1015 mlog_errno(rc); 1016 break; 1017 } 1018 done += count; 1019 } 1020 1021 out: 1022 return rc; 1023 } 1024 1025 1026