1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * extent_map.c 5 * 6 * Block/Cluster mapping functions 7 * 8 * Copyright (C) 2004 Oracle. All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public 12 * License, version 2, as published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public 20 * License along with this program; if not, write to the 21 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 22 * Boston, MA 021110-1307, USA. 23 */ 24 25 #include <linux/fs.h> 26 #include <linux/init.h> 27 #include <linux/slab.h> 28 #include <linux/types.h> 29 #include <linux/fiemap.h> 30 31 #include <cluster/masklog.h> 32 33 #include "ocfs2.h" 34 35 #include "alloc.h" 36 #include "dlmglue.h" 37 #include "extent_map.h" 38 #include "inode.h" 39 #include "super.h" 40 #include "symlink.h" 41 #include "aops.h" 42 #include "ocfs2_trace.h" 43 44 #include "buffer_head_io.h" 45 46 /* 47 * The extent caching implementation is intentionally trivial. 48 * 49 * We only cache a small number of extents stored directly on the 50 * inode, so linear order operations are acceptable. If we ever want 51 * to increase the size of the extent map, then these algorithms must 52 * get smarter. 53 */ 54 55 void ocfs2_extent_map_init(struct inode *inode) 56 { 57 struct ocfs2_inode_info *oi = OCFS2_I(inode); 58 59 oi->ip_extent_map.em_num_items = 0; 60 INIT_LIST_HEAD(&oi->ip_extent_map.em_list); 61 } 62 63 static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em, 64 unsigned int cpos, 65 struct ocfs2_extent_map_item **ret_emi) 66 { 67 unsigned int range; 68 struct ocfs2_extent_map_item *emi; 69 70 *ret_emi = NULL; 71 72 list_for_each_entry(emi, &em->em_list, ei_list) { 73 range = emi->ei_cpos + emi->ei_clusters; 74 75 if (cpos >= emi->ei_cpos && cpos < range) { 76 list_move(&emi->ei_list, &em->em_list); 77 78 *ret_emi = emi; 79 break; 80 } 81 } 82 } 83 84 static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos, 85 unsigned int *phys, unsigned int *len, 86 unsigned int *flags) 87 { 88 unsigned int coff; 89 struct ocfs2_inode_info *oi = OCFS2_I(inode); 90 struct ocfs2_extent_map_item *emi; 91 92 spin_lock(&oi->ip_lock); 93 94 __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi); 95 if (emi) { 96 coff = cpos - emi->ei_cpos; 97 *phys = emi->ei_phys + coff; 98 if (len) 99 *len = emi->ei_clusters - coff; 100 if (flags) 101 *flags = emi->ei_flags; 102 } 103 104 spin_unlock(&oi->ip_lock); 105 106 if (emi == NULL) 107 return -ENOENT; 108 109 return 0; 110 } 111 112 /* 113 * Forget about all clusters equal to or greater than cpos. 114 */ 115 void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) 116 { 117 struct ocfs2_extent_map_item *emi, *n; 118 struct ocfs2_inode_info *oi = OCFS2_I(inode); 119 struct ocfs2_extent_map *em = &oi->ip_extent_map; 120 LIST_HEAD(tmp_list); 121 unsigned int range; 122 123 spin_lock(&oi->ip_lock); 124 list_for_each_entry_safe(emi, n, &em->em_list, ei_list) { 125 if (emi->ei_cpos >= cpos) { 126 /* Full truncate of this record. */ 127 list_move(&emi->ei_list, &tmp_list); 128 BUG_ON(em->em_num_items == 0); 129 em->em_num_items--; 130 continue; 131 } 132 133 range = emi->ei_cpos + emi->ei_clusters; 134 if (range > cpos) { 135 /* Partial truncate */ 136 emi->ei_clusters = cpos - emi->ei_cpos; 137 } 138 } 139 spin_unlock(&oi->ip_lock); 140 141 list_for_each_entry_safe(emi, n, &tmp_list, ei_list) { 142 list_del(&emi->ei_list); 143 kfree(emi); 144 } 145 } 146 147 /* 148 * Is any part of emi2 contained within emi1 149 */ 150 static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1, 151 struct ocfs2_extent_map_item *emi2) 152 { 153 unsigned int range1, range2; 154 155 /* 156 * Check if logical start of emi2 is inside emi1 157 */ 158 range1 = emi1->ei_cpos + emi1->ei_clusters; 159 if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1) 160 return 1; 161 162 /* 163 * Check if logical end of emi2 is inside emi1 164 */ 165 range2 = emi2->ei_cpos + emi2->ei_clusters; 166 if (range2 > emi1->ei_cpos && range2 <= range1) 167 return 1; 168 169 return 0; 170 } 171 172 static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest, 173 struct ocfs2_extent_map_item *src) 174 { 175 dest->ei_cpos = src->ei_cpos; 176 dest->ei_phys = src->ei_phys; 177 dest->ei_clusters = src->ei_clusters; 178 dest->ei_flags = src->ei_flags; 179 } 180 181 /* 182 * Try to merge emi with ins. Returns 1 if merge succeeds, zero 183 * otherwise. 184 */ 185 static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi, 186 struct ocfs2_extent_map_item *ins) 187 { 188 /* 189 * Handle contiguousness 190 */ 191 if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) && 192 ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) && 193 ins->ei_flags == emi->ei_flags) { 194 emi->ei_clusters += ins->ei_clusters; 195 return 1; 196 } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys && 197 (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos && 198 ins->ei_flags == emi->ei_flags) { 199 emi->ei_phys = ins->ei_phys; 200 emi->ei_cpos = ins->ei_cpos; 201 emi->ei_clusters += ins->ei_clusters; 202 return 1; 203 } 204 205 /* 206 * Overlapping extents - this shouldn't happen unless we've 207 * split an extent to change it's flags. That is exceedingly 208 * rare, so there's no sense in trying to optimize it yet. 209 */ 210 if (ocfs2_ei_is_contained(emi, ins) || 211 ocfs2_ei_is_contained(ins, emi)) { 212 ocfs2_copy_emi_fields(emi, ins); 213 return 1; 214 } 215 216 /* No merge was possible. */ 217 return 0; 218 } 219 220 /* 221 * In order to reduce complexity on the caller, this insert function 222 * is intentionally liberal in what it will accept. 223 * 224 * The only rule is that the truncate call *must* be used whenever 225 * records have been deleted. This avoids inserting overlapping 226 * records with different physical mappings. 227 */ 228 void ocfs2_extent_map_insert_rec(struct inode *inode, 229 struct ocfs2_extent_rec *rec) 230 { 231 struct ocfs2_inode_info *oi = OCFS2_I(inode); 232 struct ocfs2_extent_map *em = &oi->ip_extent_map; 233 struct ocfs2_extent_map_item *emi, *new_emi = NULL; 234 struct ocfs2_extent_map_item ins; 235 236 ins.ei_cpos = le32_to_cpu(rec->e_cpos); 237 ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb, 238 le64_to_cpu(rec->e_blkno)); 239 ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters); 240 ins.ei_flags = rec->e_flags; 241 242 search: 243 spin_lock(&oi->ip_lock); 244 245 list_for_each_entry(emi, &em->em_list, ei_list) { 246 if (ocfs2_try_to_merge_extent_map(emi, &ins)) { 247 list_move(&emi->ei_list, &em->em_list); 248 spin_unlock(&oi->ip_lock); 249 goto out; 250 } 251 } 252 253 /* 254 * No item could be merged. 255 * 256 * Either allocate and add a new item, or overwrite the last recently 257 * inserted. 258 */ 259 260 if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) { 261 if (new_emi == NULL) { 262 spin_unlock(&oi->ip_lock); 263 264 new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS); 265 if (new_emi == NULL) 266 goto out; 267 268 goto search; 269 } 270 271 ocfs2_copy_emi_fields(new_emi, &ins); 272 list_add(&new_emi->ei_list, &em->em_list); 273 em->em_num_items++; 274 new_emi = NULL; 275 } else { 276 BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0); 277 emi = list_entry(em->em_list.prev, 278 struct ocfs2_extent_map_item, ei_list); 279 list_move(&emi->ei_list, &em->em_list); 280 ocfs2_copy_emi_fields(emi, &ins); 281 } 282 283 spin_unlock(&oi->ip_lock); 284 285 out: 286 kfree(new_emi); 287 } 288 289 static int ocfs2_last_eb_is_empty(struct inode *inode, 290 struct ocfs2_dinode *di) 291 { 292 int ret, next_free; 293 u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk); 294 struct buffer_head *eb_bh = NULL; 295 struct ocfs2_extent_block *eb; 296 struct ocfs2_extent_list *el; 297 298 ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh); 299 if (ret) { 300 mlog_errno(ret); 301 goto out; 302 } 303 304 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 305 el = &eb->h_list; 306 307 if (el->l_tree_depth) { 308 ocfs2_error(inode->i_sb, 309 "Inode %lu has non zero tree depth in leaf block %llu\n", 310 inode->i_ino, 311 (unsigned long long)eb_bh->b_blocknr); 312 ret = -EROFS; 313 goto out; 314 } 315 316 next_free = le16_to_cpu(el->l_next_free_rec); 317 318 if (next_free == 0 || 319 (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) 320 ret = 1; 321 322 out: 323 brelse(eb_bh); 324 return ret; 325 } 326 327 /* 328 * Return the 1st index within el which contains an extent start 329 * larger than v_cluster. 330 */ 331 static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el, 332 u32 v_cluster) 333 { 334 int i; 335 struct ocfs2_extent_rec *rec; 336 337 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 338 rec = &el->l_recs[i]; 339 340 if (v_cluster < le32_to_cpu(rec->e_cpos)) 341 break; 342 } 343 344 return i; 345 } 346 347 /* 348 * Figure out the size of a hole which starts at v_cluster within the given 349 * extent list. 350 * 351 * If there is no more allocation past v_cluster, we return the maximum 352 * cluster size minus v_cluster. 353 * 354 * If we have in-inode extents, then el points to the dinode list and 355 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block 356 * containing el. 357 */ 358 int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci, 359 struct ocfs2_extent_list *el, 360 struct buffer_head *eb_bh, 361 u32 v_cluster, 362 u32 *num_clusters) 363 { 364 int ret, i; 365 struct buffer_head *next_eb_bh = NULL; 366 struct ocfs2_extent_block *eb, *next_eb; 367 368 i = ocfs2_search_for_hole_index(el, v_cluster); 369 370 if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) { 371 eb = (struct ocfs2_extent_block *)eb_bh->b_data; 372 373 /* 374 * Check the next leaf for any extents. 375 */ 376 377 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) 378 goto no_more_extents; 379 380 ret = ocfs2_read_extent_block(ci, 381 le64_to_cpu(eb->h_next_leaf_blk), 382 &next_eb_bh); 383 if (ret) { 384 mlog_errno(ret); 385 goto out; 386 } 387 388 next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; 389 el = &next_eb->h_list; 390 i = ocfs2_search_for_hole_index(el, v_cluster); 391 } 392 393 no_more_extents: 394 if (i == le16_to_cpu(el->l_next_free_rec)) { 395 /* 396 * We're at the end of our existing allocation. Just 397 * return the maximum number of clusters we could 398 * possibly allocate. 399 */ 400 *num_clusters = UINT_MAX - v_cluster; 401 } else { 402 *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster; 403 } 404 405 ret = 0; 406 out: 407 brelse(next_eb_bh); 408 return ret; 409 } 410 411 static int ocfs2_get_clusters_nocache(struct inode *inode, 412 struct buffer_head *di_bh, 413 u32 v_cluster, unsigned int *hole_len, 414 struct ocfs2_extent_rec *ret_rec, 415 unsigned int *is_last) 416 { 417 int i, ret, tree_height, len; 418 struct ocfs2_dinode *di; 419 struct ocfs2_extent_block *uninitialized_var(eb); 420 struct ocfs2_extent_list *el; 421 struct ocfs2_extent_rec *rec; 422 struct buffer_head *eb_bh = NULL; 423 424 memset(ret_rec, 0, sizeof(*ret_rec)); 425 if (is_last) 426 *is_last = 0; 427 428 di = (struct ocfs2_dinode *) di_bh->b_data; 429 el = &di->id2.i_list; 430 tree_height = le16_to_cpu(el->l_tree_depth); 431 432 if (tree_height > 0) { 433 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, 434 &eb_bh); 435 if (ret) { 436 mlog_errno(ret); 437 goto out; 438 } 439 440 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 441 el = &eb->h_list; 442 443 if (el->l_tree_depth) { 444 ocfs2_error(inode->i_sb, 445 "Inode %lu has non zero tree depth in leaf block %llu\n", 446 inode->i_ino, 447 (unsigned long long)eb_bh->b_blocknr); 448 ret = -EROFS; 449 goto out; 450 } 451 } 452 453 i = ocfs2_search_extent_list(el, v_cluster); 454 if (i == -1) { 455 /* 456 * Holes can be larger than the maximum size of an 457 * extent, so we return their lengths in a separate 458 * field. 459 */ 460 if (hole_len) { 461 ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode), 462 el, eb_bh, 463 v_cluster, &len); 464 if (ret) { 465 mlog_errno(ret); 466 goto out; 467 } 468 469 *hole_len = len; 470 } 471 goto out_hole; 472 } 473 474 rec = &el->l_recs[i]; 475 476 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); 477 478 if (!rec->e_blkno) { 479 ocfs2_error(inode->i_sb, 480 "Inode %lu has bad extent record (%u, %u, 0)\n", 481 inode->i_ino, 482 le32_to_cpu(rec->e_cpos), 483 ocfs2_rec_clusters(el, rec)); 484 ret = -EROFS; 485 goto out; 486 } 487 488 *ret_rec = *rec; 489 490 /* 491 * Checking for last extent is potentially expensive - we 492 * might have to look at the next leaf over to see if it's 493 * empty. 494 * 495 * The first two checks are to see whether the caller even 496 * cares for this information, and if the extent is at least 497 * the last in it's list. 498 * 499 * If those hold true, then the extent is last if any of the 500 * additional conditions hold true: 501 * - Extent list is in-inode 502 * - Extent list is right-most 503 * - Extent list is 2nd to rightmost, with empty right-most 504 */ 505 if (is_last) { 506 if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) { 507 if (tree_height == 0) 508 *is_last = 1; 509 else if (eb->h_blkno == di->i_last_eb_blk) 510 *is_last = 1; 511 else if (eb->h_next_leaf_blk == di->i_last_eb_blk) { 512 ret = ocfs2_last_eb_is_empty(inode, di); 513 if (ret < 0) { 514 mlog_errno(ret); 515 goto out; 516 } 517 if (ret == 1) 518 *is_last = 1; 519 } 520 } 521 } 522 523 out_hole: 524 ret = 0; 525 out: 526 brelse(eb_bh); 527 return ret; 528 } 529 530 static void ocfs2_relative_extent_offsets(struct super_block *sb, 531 u32 v_cluster, 532 struct ocfs2_extent_rec *rec, 533 u32 *p_cluster, u32 *num_clusters) 534 535 { 536 u32 coff = v_cluster - le32_to_cpu(rec->e_cpos); 537 538 *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno)); 539 *p_cluster = *p_cluster + coff; 540 541 if (num_clusters) 542 *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; 543 } 544 545 int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, 546 u32 *p_cluster, u32 *num_clusters, 547 struct ocfs2_extent_list *el, 548 unsigned int *extent_flags) 549 { 550 int ret = 0, i; 551 struct buffer_head *eb_bh = NULL; 552 struct ocfs2_extent_block *eb; 553 struct ocfs2_extent_rec *rec; 554 u32 coff; 555 556 if (el->l_tree_depth) { 557 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, 558 &eb_bh); 559 if (ret) { 560 mlog_errno(ret); 561 goto out; 562 } 563 564 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 565 el = &eb->h_list; 566 567 if (el->l_tree_depth) { 568 ocfs2_error(inode->i_sb, 569 "Inode %lu has non zero tree depth in xattr leaf block %llu\n", 570 inode->i_ino, 571 (unsigned long long)eb_bh->b_blocknr); 572 ret = -EROFS; 573 goto out; 574 } 575 } 576 577 i = ocfs2_search_extent_list(el, v_cluster); 578 if (i == -1) { 579 ret = -EROFS; 580 mlog_errno(ret); 581 goto out; 582 } else { 583 rec = &el->l_recs[i]; 584 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); 585 586 if (!rec->e_blkno) { 587 ocfs2_error(inode->i_sb, 588 "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", 589 inode->i_ino, 590 le32_to_cpu(rec->e_cpos), 591 ocfs2_rec_clusters(el, rec)); 592 ret = -EROFS; 593 goto out; 594 } 595 coff = v_cluster - le32_to_cpu(rec->e_cpos); 596 *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, 597 le64_to_cpu(rec->e_blkno)); 598 *p_cluster = *p_cluster + coff; 599 if (num_clusters) 600 *num_clusters = ocfs2_rec_clusters(el, rec) - coff; 601 602 if (extent_flags) 603 *extent_flags = rec->e_flags; 604 } 605 out: 606 if (eb_bh) 607 brelse(eb_bh); 608 return ret; 609 } 610 611 int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, 612 u32 *p_cluster, u32 *num_clusters, 613 unsigned int *extent_flags) 614 { 615 int ret; 616 unsigned int uninitialized_var(hole_len), flags = 0; 617 struct buffer_head *di_bh = NULL; 618 struct ocfs2_extent_rec rec; 619 620 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 621 ret = -ERANGE; 622 mlog_errno(ret); 623 goto out; 624 } 625 626 ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, 627 num_clusters, extent_flags); 628 if (ret == 0) 629 goto out; 630 631 ret = ocfs2_read_inode_block(inode, &di_bh); 632 if (ret) { 633 mlog_errno(ret); 634 goto out; 635 } 636 637 ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len, 638 &rec, NULL); 639 if (ret) { 640 mlog_errno(ret); 641 goto out; 642 } 643 644 if (rec.e_blkno == 0ULL) { 645 /* 646 * A hole was found. Return some canned values that 647 * callers can key on. If asked for, num_clusters will 648 * be populated with the size of the hole. 649 */ 650 *p_cluster = 0; 651 if (num_clusters) { 652 *num_clusters = hole_len; 653 } 654 } else { 655 ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec, 656 p_cluster, num_clusters); 657 flags = rec.e_flags; 658 659 ocfs2_extent_map_insert_rec(inode, &rec); 660 } 661 662 if (extent_flags) 663 *extent_flags = flags; 664 665 out: 666 brelse(di_bh); 667 return ret; 668 } 669 670 /* 671 * This expects alloc_sem to be held. The allocation cannot change at 672 * all while the map is in the process of being updated. 673 */ 674 int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, 675 u64 *ret_count, unsigned int *extent_flags) 676 { 677 int ret; 678 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 679 u32 cpos, num_clusters, p_cluster; 680 u64 boff = 0; 681 682 cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno); 683 684 ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters, 685 extent_flags); 686 if (ret) { 687 mlog_errno(ret); 688 goto out; 689 } 690 691 /* 692 * p_cluster == 0 indicates a hole. 693 */ 694 if (p_cluster) { 695 boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 696 boff += (v_blkno & (u64)(bpc - 1)); 697 } 698 699 *p_blkno = boff; 700 701 if (ret_count) { 702 *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 703 *ret_count -= v_blkno & (u64)(bpc - 1); 704 } 705 706 out: 707 return ret; 708 } 709 710 /* 711 * The ocfs2_fiemap_inline() may be a little bit misleading, since 712 * it not only handles the fiemap for inlined files, but also deals 713 * with the fast symlink, cause they have no difference for extent 714 * mapping per se. 715 */ 716 static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, 717 struct fiemap_extent_info *fieinfo, 718 u64 map_start) 719 { 720 int ret; 721 unsigned int id_count; 722 struct ocfs2_dinode *di; 723 u64 phys; 724 u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; 725 struct ocfs2_inode_info *oi = OCFS2_I(inode); 726 727 di = (struct ocfs2_dinode *)di_bh->b_data; 728 if (ocfs2_inode_is_fast_symlink(inode)) 729 id_count = ocfs2_fast_symlink_chars(inode->i_sb); 730 else 731 id_count = le16_to_cpu(di->id2.i_data.id_count); 732 733 if (map_start < id_count) { 734 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; 735 if (ocfs2_inode_is_fast_symlink(inode)) 736 phys += offsetof(struct ocfs2_dinode, id2.i_symlink); 737 else 738 phys += offsetof(struct ocfs2_dinode, 739 id2.i_data.id_data); 740 741 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, 742 flags); 743 if (ret < 0) 744 return ret; 745 } 746 747 return 0; 748 } 749 750 #define OCFS2_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) 751 752 int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 753 u64 map_start, u64 map_len) 754 { 755 int ret, is_last; 756 u32 mapping_end, cpos; 757 unsigned int hole_size; 758 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 759 u64 len_bytes, phys_bytes, virt_bytes; 760 struct buffer_head *di_bh = NULL; 761 struct ocfs2_extent_rec rec; 762 763 ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS); 764 if (ret) 765 return ret; 766 767 ret = ocfs2_inode_lock(inode, &di_bh, 0); 768 if (ret) { 769 mlog_errno(ret); 770 goto out; 771 } 772 773 down_read(&OCFS2_I(inode)->ip_alloc_sem); 774 775 /* 776 * Handle inline-data and fast symlink separately. 777 */ 778 if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) || 779 ocfs2_inode_is_fast_symlink(inode)) { 780 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); 781 goto out_unlock; 782 } 783 784 cpos = map_start >> osb->s_clustersize_bits; 785 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, 786 map_start + map_len); 787 is_last = 0; 788 while (cpos < mapping_end && !is_last) { 789 u32 fe_flags; 790 791 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, 792 &hole_size, &rec, &is_last); 793 if (ret) { 794 mlog_errno(ret); 795 goto out_unlock; 796 } 797 798 if (rec.e_blkno == 0ULL) { 799 cpos += hole_size; 800 continue; 801 } 802 803 fe_flags = 0; 804 if (rec.e_flags & OCFS2_EXT_UNWRITTEN) 805 fe_flags |= FIEMAP_EXTENT_UNWRITTEN; 806 if (rec.e_flags & OCFS2_EXT_REFCOUNTED) 807 fe_flags |= FIEMAP_EXTENT_SHARED; 808 if (is_last) 809 fe_flags |= FIEMAP_EXTENT_LAST; 810 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; 811 phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; 812 virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; 813 814 ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, 815 len_bytes, fe_flags); 816 if (ret) 817 break; 818 819 cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters); 820 } 821 822 if (ret > 0) 823 ret = 0; 824 825 out_unlock: 826 brelse(di_bh); 827 828 up_read(&OCFS2_I(inode)->ip_alloc_sem); 829 830 ocfs2_inode_unlock(inode, 0); 831 out: 832 833 return ret; 834 } 835 836 /* Is IO overwriting allocated blocks? */ 837 int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh, 838 u64 map_start, u64 map_len) 839 { 840 int ret = 0, is_last; 841 u32 mapping_end, cpos; 842 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 843 struct ocfs2_extent_rec rec; 844 845 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 846 if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len)) 847 return ret; 848 else 849 return -EAGAIN; 850 } 851 852 cpos = map_start >> osb->s_clustersize_bits; 853 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, 854 map_start + map_len); 855 is_last = 0; 856 while (cpos < mapping_end && !is_last) { 857 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, 858 NULL, &rec, &is_last); 859 if (ret) { 860 mlog_errno(ret); 861 goto out; 862 } 863 864 if (rec.e_blkno == 0ULL) 865 break; 866 867 if (rec.e_flags & OCFS2_EXT_REFCOUNTED) 868 break; 869 870 cpos = le32_to_cpu(rec.e_cpos) + 871 le16_to_cpu(rec.e_leaf_clusters); 872 } 873 874 if (cpos < mapping_end) 875 ret = -EAGAIN; 876 out: 877 return ret; 878 } 879 880 int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) 881 { 882 struct inode *inode = file->f_mapping->host; 883 int ret; 884 unsigned int is_last = 0, is_data = 0; 885 u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; 886 u32 cpos, cend, clen, hole_size; 887 u64 extoff, extlen; 888 struct buffer_head *di_bh = NULL; 889 struct ocfs2_extent_rec rec; 890 891 BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE); 892 893 ret = ocfs2_inode_lock(inode, &di_bh, 0); 894 if (ret) { 895 mlog_errno(ret); 896 goto out; 897 } 898 899 down_read(&OCFS2_I(inode)->ip_alloc_sem); 900 901 if (*offset >= i_size_read(inode)) { 902 ret = -ENXIO; 903 goto out_unlock; 904 } 905 906 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 907 if (whence == SEEK_HOLE) 908 *offset = i_size_read(inode); 909 goto out_unlock; 910 } 911 912 clen = 0; 913 cpos = *offset >> cs_bits; 914 cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); 915 916 while (cpos < cend && !is_last) { 917 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, 918 &rec, &is_last); 919 if (ret) { 920 mlog_errno(ret); 921 goto out_unlock; 922 } 923 924 extoff = cpos; 925 extoff <<= cs_bits; 926 927 if (rec.e_blkno == 0ULL) { 928 clen = hole_size; 929 is_data = 0; 930 } else { 931 clen = le16_to_cpu(rec.e_leaf_clusters) - 932 (cpos - le32_to_cpu(rec.e_cpos)); 933 is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ? 0 : 1; 934 } 935 936 if ((!is_data && whence == SEEK_HOLE) || 937 (is_data && whence == SEEK_DATA)) { 938 if (extoff > *offset) 939 *offset = extoff; 940 goto out_unlock; 941 } 942 943 if (!is_last) 944 cpos += clen; 945 } 946 947 if (whence == SEEK_HOLE) { 948 extoff = cpos; 949 extoff <<= cs_bits; 950 extlen = clen; 951 extlen <<= cs_bits; 952 953 if ((extoff + extlen) > i_size_read(inode)) 954 extlen = i_size_read(inode) - extoff; 955 extoff += extlen; 956 if (extoff > *offset) 957 *offset = extoff; 958 goto out_unlock; 959 } 960 961 ret = -ENXIO; 962 963 out_unlock: 964 965 brelse(di_bh); 966 967 up_read(&OCFS2_I(inode)->ip_alloc_sem); 968 969 ocfs2_inode_unlock(inode, 0); 970 out: 971 return ret; 972 } 973 974 int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, 975 struct buffer_head *bhs[], int flags, 976 int (*validate)(struct super_block *sb, 977 struct buffer_head *bh)) 978 { 979 int rc = 0; 980 u64 p_block, p_count; 981 int i, count, done = 0; 982 983 trace_ocfs2_read_virt_blocks( 984 inode, (unsigned long long)v_block, nr, bhs, flags, 985 validate); 986 987 if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >= 988 i_size_read(inode)) { 989 BUG_ON(!(flags & OCFS2_BH_READAHEAD)); 990 goto out; 991 } 992 993 while (done < nr) { 994 down_read(&OCFS2_I(inode)->ip_alloc_sem); 995 rc = ocfs2_extent_map_get_blocks(inode, v_block + done, 996 &p_block, &p_count, NULL); 997 up_read(&OCFS2_I(inode)->ip_alloc_sem); 998 if (rc) { 999 mlog_errno(rc); 1000 break; 1001 } 1002 1003 if (!p_block) { 1004 rc = -EIO; 1005 mlog(ML_ERROR, 1006 "Inode #%llu contains a hole at offset %llu\n", 1007 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1008 (unsigned long long)(v_block + done) << 1009 inode->i_sb->s_blocksize_bits); 1010 break; 1011 } 1012 1013 count = nr - done; 1014 if (p_count < count) 1015 count = p_count; 1016 1017 /* 1018 * If the caller passed us bhs, they should have come 1019 * from a previous readahead call to this function. Thus, 1020 * they should have the right b_blocknr. 1021 */ 1022 for (i = 0; i < count; i++) { 1023 if (!bhs[done + i]) 1024 continue; 1025 BUG_ON(bhs[done + i]->b_blocknr != (p_block + i)); 1026 } 1027 1028 rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count, 1029 bhs + done, flags, validate); 1030 if (rc) { 1031 mlog_errno(rc); 1032 break; 1033 } 1034 done += count; 1035 } 1036 1037 out: 1038 return rc; 1039 } 1040 1041 1042