1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * extent_map.c 5 * 6 * In-memory extent map for OCFS2. Man, this code was prettier in 7 * the library. 8 * 9 * Copyright (C) 2004 Oracle. All rights reserved. 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public 13 * License, version 2, as published by the Free Software Foundation. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public 21 * License along with this program; if not, write to the 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23 * Boston, MA 021110-1307, USA. 24 */ 25 26 #include <linux/fs.h> 27 #include <linux/init.h> 28 #include <linux/types.h> 29 #include <linux/slab.h> 30 #include <linux/rbtree.h> 31 32 #define MLOG_MASK_PREFIX ML_EXTENT_MAP 33 #include <cluster/masklog.h> 34 35 #include "ocfs2.h" 36 37 #include "extent_map.h" 38 #include "inode.h" 39 #include "super.h" 40 41 #include "buffer_head_io.h" 42 43 44 /* 45 * SUCK SUCK SUCK 46 * Our headers are so bad that struct ocfs2_extent_map is in ocfs.h 47 */ 48 49 struct ocfs2_extent_map_entry { 50 struct rb_node e_node; 51 int e_tree_depth; 52 struct ocfs2_extent_rec e_rec; 53 }; 54 55 struct ocfs2_em_insert_context { 56 int need_left; 57 int need_right; 58 struct ocfs2_extent_map_entry *new_ent; 59 struct ocfs2_extent_map_entry *old_ent; 60 struct ocfs2_extent_map_entry *left_ent; 61 struct ocfs2_extent_map_entry *right_ent; 62 }; 63 64 static kmem_cache_t *ocfs2_em_ent_cachep = NULL; 65 66 67 static struct ocfs2_extent_map_entry * 68 ocfs2_extent_map_lookup(struct ocfs2_extent_map *em, 69 u32 cpos, u32 clusters, 70 struct rb_node ***ret_p, 71 struct rb_node **ret_parent); 72 static int ocfs2_extent_map_insert(struct inode *inode, 73 struct ocfs2_extent_rec *rec, 74 int tree_depth); 75 static int ocfs2_extent_map_insert_entry(struct ocfs2_extent_map *em, 76 struct ocfs2_extent_map_entry *ent); 77 static int ocfs2_extent_map_find_leaf(struct inode *inode, 78 u32 cpos, u32 clusters, 79 struct ocfs2_extent_list *el); 80 static int ocfs2_extent_map_lookup_read(struct inode *inode, 81 u32 cpos, u32 clusters, 82 struct ocfs2_extent_map_entry **ret_ent); 83 static int ocfs2_extent_map_try_insert(struct inode *inode, 84 struct ocfs2_extent_rec *rec, 85 int tree_depth, 86 struct ocfs2_em_insert_context *ctxt); 87 88 /* returns 1 only if the rec contains all the given clusters -- that is that 89 * rec's cpos is <= the cluster cpos and that the rec endpoint (cpos + 90 * clusters) is >= the argument's endpoint */ 91 static int ocfs2_extent_rec_contains_clusters(struct ocfs2_extent_rec *rec, 92 u32 cpos, u32 clusters) 93 { 94 if (le32_to_cpu(rec->e_cpos) > cpos) 95 return 0; 96 if (cpos + clusters > le32_to_cpu(rec->e_cpos) + 97 le32_to_cpu(rec->e_clusters)) 98 return 0; 99 return 1; 100 } 101 102 103 /* 104 * Find an entry in the tree that intersects the region passed in. 105 * Note that this will find straddled intervals, it is up to the 106 * callers to enforce any boundary conditions. 107 * 108 * Callers must hold ip_lock. This lookup is not guaranteed to return 109 * a tree_depth 0 match, and as such can race inserts if the lock 110 * were not held. 111 * 112 * The rb_node garbage lets insertion share the search. Trivial 113 * callers pass NULL. 114 */ 115 static struct ocfs2_extent_map_entry * 116 ocfs2_extent_map_lookup(struct ocfs2_extent_map *em, 117 u32 cpos, u32 clusters, 118 struct rb_node ***ret_p, 119 struct rb_node **ret_parent) 120 { 121 struct rb_node **p = &em->em_extents.rb_node; 122 struct rb_node *parent = NULL; 123 struct ocfs2_extent_map_entry *ent = NULL; 124 125 while (*p) 126 { 127 parent = *p; 128 ent = rb_entry(parent, struct ocfs2_extent_map_entry, 129 e_node); 130 if ((cpos + clusters) <= le32_to_cpu(ent->e_rec.e_cpos)) { 131 p = &(*p)->rb_left; 132 ent = NULL; 133 } else if (cpos >= (le32_to_cpu(ent->e_rec.e_cpos) + 134 le32_to_cpu(ent->e_rec.e_clusters))) { 135 p = &(*p)->rb_right; 136 ent = NULL; 137 } else 138 break; 139 } 140 141 if (ret_p != NULL) 142 *ret_p = p; 143 if (ret_parent != NULL) 144 *ret_parent = parent; 145 return ent; 146 } 147 148 /* 149 * Find the leaf containing the interval we want. While we're on our 150 * way down the tree, fill in every record we see at any depth, because 151 * we might want it later. 152 * 153 * Note that this code is run without ip_lock. That's because it 154 * sleeps while reading. If someone is also filling the extent list at 155 * the same time we are, we might have to restart. 156 */ 157 static int ocfs2_extent_map_find_leaf(struct inode *inode, 158 u32 cpos, u32 clusters, 159 struct ocfs2_extent_list *el) 160 { 161 int i, ret; 162 struct buffer_head *eb_bh = NULL; 163 u64 blkno; 164 u32 rec_end; 165 struct ocfs2_extent_block *eb; 166 struct ocfs2_extent_rec *rec; 167 168 /* 169 * The bh data containing the el cannot change here, because 170 * we hold alloc_sem. So we can do this without other 171 * locks. 172 */ 173 while (el->l_tree_depth) 174 { 175 blkno = 0; 176 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 177 rec = &el->l_recs[i]; 178 rec_end = (le32_to_cpu(rec->e_cpos) + 179 le32_to_cpu(rec->e_clusters)); 180 181 ret = -EBADR; 182 if (rec_end > OCFS2_I(inode)->ip_clusters) { 183 mlog_errno(ret); 184 goto out_free; 185 } 186 187 if (rec_end <= cpos) { 188 ret = ocfs2_extent_map_insert(inode, rec, 189 le16_to_cpu(el->l_tree_depth)); 190 if (ret && (ret != -EEXIST)) { 191 mlog_errno(ret); 192 goto out_free; 193 } 194 continue; 195 } 196 if ((cpos + clusters) <= le32_to_cpu(rec->e_cpos)) { 197 ret = ocfs2_extent_map_insert(inode, rec, 198 le16_to_cpu(el->l_tree_depth)); 199 if (ret && (ret != -EEXIST)) { 200 mlog_errno(ret); 201 goto out_free; 202 } 203 continue; 204 } 205 206 /* 207 * We've found a record that matches our 208 * interval. We don't insert it because we're 209 * about to traverse it. 210 */ 211 212 /* Check to see if we're stradling */ 213 ret = -ESRCH; 214 if (!ocfs2_extent_rec_contains_clusters(rec, 215 cpos, 216 clusters)) { 217 mlog_errno(ret); 218 goto out_free; 219 } 220 221 /* 222 * If we've already found a record, the el has 223 * two records covering the same interval. 224 * EEEK! 225 */ 226 ret = -EBADR; 227 if (blkno) { 228 mlog_errno(ret); 229 goto out_free; 230 } 231 232 blkno = le64_to_cpu(rec->e_blkno); 233 } 234 235 /* 236 * We don't support holes, and we're still up 237 * in the branches, so we'd better have found someone 238 */ 239 ret = -EBADR; 240 if (!blkno) { 241 mlog_errno(ret); 242 goto out_free; 243 } 244 245 if (eb_bh) { 246 brelse(eb_bh); 247 eb_bh = NULL; 248 } 249 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 250 blkno, &eb_bh, OCFS2_BH_CACHED, 251 inode); 252 if (ret) { 253 mlog_errno(ret); 254 goto out_free; 255 } 256 eb = (struct ocfs2_extent_block *)eb_bh->b_data; 257 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 258 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 259 ret = -EIO; 260 goto out_free; 261 } 262 el = &eb->h_list; 263 } 264 265 if (el->l_tree_depth) 266 BUG(); 267 268 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 269 rec = &el->l_recs[i]; 270 ret = ocfs2_extent_map_insert(inode, rec, 271 le16_to_cpu(el->l_tree_depth)); 272 if (ret) { 273 mlog_errno(ret); 274 goto out_free; 275 } 276 } 277 278 ret = 0; 279 280 out_free: 281 if (eb_bh) 282 brelse(eb_bh); 283 284 return ret; 285 } 286 287 /* 288 * This lookup actually will read from disk. It has one invariant: 289 * It will never re-traverse blocks. This means that all inserts should 290 * be new regions or more granular regions (both allowed by insert). 291 */ 292 static int ocfs2_extent_map_lookup_read(struct inode *inode, 293 u32 cpos, 294 u32 clusters, 295 struct ocfs2_extent_map_entry **ret_ent) 296 { 297 int ret; 298 u64 blkno; 299 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map; 300 struct ocfs2_extent_map_entry *ent; 301 struct buffer_head *bh = NULL; 302 struct ocfs2_extent_block *eb; 303 struct ocfs2_dinode *di; 304 struct ocfs2_extent_list *el; 305 306 spin_lock(&OCFS2_I(inode)->ip_lock); 307 ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL); 308 if (ent) { 309 if (!ent->e_tree_depth) { 310 spin_unlock(&OCFS2_I(inode)->ip_lock); 311 *ret_ent = ent; 312 return 0; 313 } 314 blkno = le64_to_cpu(ent->e_rec.e_blkno); 315 spin_unlock(&OCFS2_I(inode)->ip_lock); 316 317 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, &bh, 318 OCFS2_BH_CACHED, inode); 319 if (ret) { 320 mlog_errno(ret); 321 if (bh) 322 brelse(bh); 323 return ret; 324 } 325 eb = (struct ocfs2_extent_block *)bh->b_data; 326 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 327 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 328 brelse(bh); 329 return -EIO; 330 } 331 el = &eb->h_list; 332 } else { 333 spin_unlock(&OCFS2_I(inode)->ip_lock); 334 335 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 336 OCFS2_I(inode)->ip_blkno, &bh, 337 OCFS2_BH_CACHED, inode); 338 if (ret) { 339 mlog_errno(ret); 340 if (bh) 341 brelse(bh); 342 return ret; 343 } 344 di = (struct ocfs2_dinode *)bh->b_data; 345 if (!OCFS2_IS_VALID_DINODE(di)) { 346 brelse(bh); 347 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, di); 348 return -EIO; 349 } 350 el = &di->id2.i_list; 351 } 352 353 ret = ocfs2_extent_map_find_leaf(inode, cpos, clusters, el); 354 brelse(bh); 355 if (ret) { 356 mlog_errno(ret); 357 return ret; 358 } 359 360 ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL); 361 if (!ent) { 362 ret = -ESRCH; 363 mlog_errno(ret); 364 return ret; 365 } 366 367 if (ent->e_tree_depth) 368 BUG(); /* FIXME: Make sure this isn't a corruption */ 369 370 *ret_ent = ent; 371 372 return 0; 373 } 374 375 /* 376 * Callers must hold ip_lock. This can insert pieces of the tree, 377 * thus racing lookup if the lock weren't held. 378 */ 379 static int ocfs2_extent_map_insert_entry(struct ocfs2_extent_map *em, 380 struct ocfs2_extent_map_entry *ent) 381 { 382 struct rb_node **p, *parent; 383 struct ocfs2_extent_map_entry *old_ent; 384 385 old_ent = ocfs2_extent_map_lookup(em, le32_to_cpu(ent->e_rec.e_cpos), 386 le32_to_cpu(ent->e_rec.e_clusters), 387 &p, &parent); 388 if (old_ent) 389 return -EEXIST; 390 391 rb_link_node(&ent->e_node, parent, p); 392 rb_insert_color(&ent->e_node, &em->em_extents); 393 394 return 0; 395 } 396 397 398 /* 399 * Simple rule: on any return code other than -EAGAIN, anything left 400 * in the insert_context will be freed. 401 */ 402 static int ocfs2_extent_map_try_insert(struct inode *inode, 403 struct ocfs2_extent_rec *rec, 404 int tree_depth, 405 struct ocfs2_em_insert_context *ctxt) 406 { 407 int ret; 408 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map; 409 struct ocfs2_extent_map_entry *old_ent; 410 411 ctxt->need_left = 0; 412 ctxt->need_right = 0; 413 ctxt->old_ent = NULL; 414 415 spin_lock(&OCFS2_I(inode)->ip_lock); 416 ret = ocfs2_extent_map_insert_entry(em, ctxt->new_ent); 417 if (!ret) { 418 ctxt->new_ent = NULL; 419 goto out_unlock; 420 } 421 422 old_ent = ocfs2_extent_map_lookup(em, le32_to_cpu(rec->e_cpos), 423 le32_to_cpu(rec->e_clusters), NULL, 424 NULL); 425 426 if (!old_ent) 427 BUG(); 428 429 ret = -EEXIST; 430 if (old_ent->e_tree_depth < tree_depth) 431 goto out_unlock; 432 433 if (old_ent->e_tree_depth == tree_depth) { 434 if (!memcmp(rec, &old_ent->e_rec, 435 sizeof(struct ocfs2_extent_rec))) 436 ret = 0; 437 438 /* FIXME: Should this be ESRCH/EBADR??? */ 439 goto out_unlock; 440 } 441 442 /* 443 * We do it in this order specifically so that no actual tree 444 * changes occur until we have all the pieces we need. We 445 * don't want malloc failures to leave an inconsistent tree. 446 * Whenever we drop the lock, another process could be 447 * inserting. Also note that, if another process just beat us 448 * to an insert, we might not need the same pieces we needed 449 * the first go round. In the end, the pieces we need will 450 * be used, and the pieces we don't will be freed. 451 */ 452 ctxt->need_left = !!(le32_to_cpu(rec->e_cpos) > 453 le32_to_cpu(old_ent->e_rec.e_cpos)); 454 ctxt->need_right = !!((le32_to_cpu(old_ent->e_rec.e_cpos) + 455 le32_to_cpu(old_ent->e_rec.e_clusters)) > 456 (le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters))); 457 ret = -EAGAIN; 458 if (ctxt->need_left) { 459 if (!ctxt->left_ent) 460 goto out_unlock; 461 *(ctxt->left_ent) = *old_ent; 462 ctxt->left_ent->e_rec.e_clusters = 463 cpu_to_le32(le32_to_cpu(rec->e_cpos) - 464 le32_to_cpu(ctxt->left_ent->e_rec.e_cpos)); 465 } 466 if (ctxt->need_right) { 467 if (!ctxt->right_ent) 468 goto out_unlock; 469 *(ctxt->right_ent) = *old_ent; 470 ctxt->right_ent->e_rec.e_cpos = 471 cpu_to_le32(le32_to_cpu(rec->e_cpos) + 472 le32_to_cpu(rec->e_clusters)); 473 ctxt->right_ent->e_rec.e_clusters = 474 cpu_to_le32((le32_to_cpu(old_ent->e_rec.e_cpos) + 475 le32_to_cpu(old_ent->e_rec.e_clusters)) - 476 le32_to_cpu(ctxt->right_ent->e_rec.e_cpos)); 477 } 478 479 rb_erase(&old_ent->e_node, &em->em_extents); 480 /* Now that he's erased, set him up for deletion */ 481 ctxt->old_ent = old_ent; 482 483 if (ctxt->need_left) { 484 ret = ocfs2_extent_map_insert_entry(em, 485 ctxt->left_ent); 486 if (ret) 487 goto out_unlock; 488 ctxt->left_ent = NULL; 489 } 490 491 if (ctxt->need_right) { 492 ret = ocfs2_extent_map_insert_entry(em, 493 ctxt->right_ent); 494 if (ret) 495 goto out_unlock; 496 ctxt->right_ent = NULL; 497 } 498 499 ret = ocfs2_extent_map_insert_entry(em, ctxt->new_ent); 500 501 if (!ret) 502 ctxt->new_ent = NULL; 503 504 out_unlock: 505 spin_unlock(&OCFS2_I(inode)->ip_lock); 506 507 return ret; 508 } 509 510 511 static int ocfs2_extent_map_insert(struct inode *inode, 512 struct ocfs2_extent_rec *rec, 513 int tree_depth) 514 { 515 int ret; 516 struct ocfs2_em_insert_context ctxt = {0, }; 517 518 if ((le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)) > 519 OCFS2_I(inode)->ip_map.em_clusters) { 520 ret = -EBADR; 521 mlog_errno(ret); 522 return ret; 523 } 524 525 /* Zero e_clusters means a truncated tail record. It better be EOF */ 526 if (!rec->e_clusters) { 527 if ((le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)) != 528 OCFS2_I(inode)->ip_map.em_clusters) { 529 ret = -EBADR; 530 mlog_errno(ret); 531 return ret; 532 } 533 534 /* Ignore the truncated tail */ 535 return 0; 536 } 537 538 ret = -ENOMEM; 539 ctxt.new_ent = kmem_cache_alloc(ocfs2_em_ent_cachep, 540 GFP_KERNEL); 541 if (!ctxt.new_ent) { 542 mlog_errno(ret); 543 return ret; 544 } 545 546 ctxt.new_ent->e_rec = *rec; 547 ctxt.new_ent->e_tree_depth = tree_depth; 548 549 do { 550 ret = -ENOMEM; 551 if (ctxt.need_left && !ctxt.left_ent) { 552 ctxt.left_ent = 553 kmem_cache_alloc(ocfs2_em_ent_cachep, 554 GFP_KERNEL); 555 if (!ctxt.left_ent) 556 break; 557 } 558 if (ctxt.need_right && !ctxt.right_ent) { 559 ctxt.right_ent = 560 kmem_cache_alloc(ocfs2_em_ent_cachep, 561 GFP_KERNEL); 562 if (!ctxt.right_ent) 563 break; 564 } 565 566 ret = ocfs2_extent_map_try_insert(inode, rec, 567 tree_depth, &ctxt); 568 } while (ret == -EAGAIN); 569 570 if (ret < 0) 571 mlog_errno(ret); 572 573 if (ctxt.left_ent) 574 kmem_cache_free(ocfs2_em_ent_cachep, ctxt.left_ent); 575 if (ctxt.right_ent) 576 kmem_cache_free(ocfs2_em_ent_cachep, ctxt.right_ent); 577 if (ctxt.old_ent) 578 kmem_cache_free(ocfs2_em_ent_cachep, ctxt.old_ent); 579 if (ctxt.new_ent) 580 kmem_cache_free(ocfs2_em_ent_cachep, ctxt.new_ent); 581 582 return ret; 583 } 584 585 /* 586 * Append this record to the tail of the extent map. It must be 587 * tree_depth 0. The record might be an extension of an existing 588 * record, and as such that needs to be handled. eg: 589 * 590 * Existing record in the extent map: 591 * 592 * cpos = 10, len = 10 593 * |---------| 594 * 595 * New Record: 596 * 597 * cpos = 10, len = 20 598 * |------------------| 599 * 600 * The passed record is the new on-disk record. The new_clusters value 601 * is how many clusters were added to the file. If the append is a 602 * contiguous append, the new_clusters has been added to 603 * rec->e_clusters. If the append is an entirely new extent, then 604 * rec->e_clusters is == new_clusters. 605 */ 606 int ocfs2_extent_map_append(struct inode *inode, 607 struct ocfs2_extent_rec *rec, 608 u32 new_clusters) 609 { 610 int ret; 611 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map; 612 struct ocfs2_extent_map_entry *ent; 613 struct ocfs2_extent_rec *old; 614 615 BUG_ON(!new_clusters); 616 BUG_ON(le32_to_cpu(rec->e_clusters) < new_clusters); 617 618 if (em->em_clusters < OCFS2_I(inode)->ip_clusters) { 619 /* 620 * Size changed underneath us on disk. Drop any 621 * straddling records and update our idea of 622 * i_clusters 623 */ 624 ocfs2_extent_map_drop(inode, em->em_clusters - 1); 625 em->em_clusters = OCFS2_I(inode)->ip_clusters; 626 } 627 628 mlog_bug_on_msg((le32_to_cpu(rec->e_cpos) + 629 le32_to_cpu(rec->e_clusters)) != 630 (em->em_clusters + new_clusters), 631 "Inode %"MLFu64":\n" 632 "rec->e_cpos = %u + rec->e_clusters = %u = %u\n" 633 "em->em_clusters = %u + new_clusters = %u = %u\n", 634 OCFS2_I(inode)->ip_blkno, 635 le32_to_cpu(rec->e_cpos), le32_to_cpu(rec->e_clusters), 636 le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters), 637 em->em_clusters, new_clusters, 638 em->em_clusters + new_clusters); 639 640 em->em_clusters += new_clusters; 641 642 ret = -ENOENT; 643 if (le32_to_cpu(rec->e_clusters) > new_clusters) { 644 /* This is a contiguous append */ 645 ent = ocfs2_extent_map_lookup(em, le32_to_cpu(rec->e_cpos), 1, 646 NULL, NULL); 647 if (ent) { 648 old = &ent->e_rec; 649 BUG_ON((le32_to_cpu(rec->e_cpos) + 650 le32_to_cpu(rec->e_clusters)) != 651 (le32_to_cpu(old->e_cpos) + 652 le32_to_cpu(old->e_clusters) + 653 new_clusters)); 654 if (ent->e_tree_depth == 0) { 655 BUG_ON(le32_to_cpu(old->e_cpos) != 656 le32_to_cpu(rec->e_cpos)); 657 BUG_ON(le64_to_cpu(old->e_blkno) != 658 le64_to_cpu(rec->e_blkno)); 659 ret = 0; 660 } 661 /* 662 * Let non-leafs fall through as -ENOENT to 663 * force insertion of the new leaf. 664 */ 665 le32_add_cpu(&old->e_clusters, new_clusters); 666 } 667 } 668 669 if (ret == -ENOENT) 670 ret = ocfs2_extent_map_insert(inode, rec, 0); 671 if (ret < 0) 672 mlog_errno(ret); 673 return ret; 674 } 675 676 #if 0 677 /* Code here is included but defined out as it completes the extent 678 * map api and may be used in the future. */ 679 680 /* 681 * Look up the record containing this cluster offset. This record is 682 * part of the extent map. Do not free it. Any changes you make to 683 * it will reflect in the extent map. So, if your last extent 684 * is (cpos = 10, clusters = 10) and you truncate the file by 5 685 * clusters, you can do: 686 * 687 * ret = ocfs2_extent_map_get_rec(em, orig_size - 5, &rec); 688 * rec->e_clusters -= 5; 689 * 690 * The lookup does not read from disk. If the map isn't filled in for 691 * an entry, you won't find it. 692 * 693 * Also note that the returned record is valid until alloc_sem is 694 * dropped. After that, truncate and extend can happen. Caveat Emptor. 695 */ 696 int ocfs2_extent_map_get_rec(struct inode *inode, u32 cpos, 697 struct ocfs2_extent_rec **rec, 698 int *tree_depth) 699 { 700 int ret = -ENOENT; 701 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map; 702 struct ocfs2_extent_map_entry *ent; 703 704 *rec = NULL; 705 706 if (cpos >= OCFS2_I(inode)->ip_clusters) 707 return -EINVAL; 708 709 if (cpos >= em->em_clusters) { 710 /* 711 * Size changed underneath us on disk. Drop any 712 * straddling records and update our idea of 713 * i_clusters 714 */ 715 ocfs2_extent_map_drop(inode, em->em_clusters - 1); 716 em->em_clusters = OCFS2_I(inode)->ip_clusters ; 717 } 718 719 ent = ocfs2_extent_map_lookup(&OCFS2_I(inode)->ip_map, cpos, 1, 720 NULL, NULL); 721 722 if (ent) { 723 *rec = &ent->e_rec; 724 if (tree_depth) 725 *tree_depth = ent->e_tree_depth; 726 ret = 0; 727 } 728 729 return ret; 730 } 731 732 int ocfs2_extent_map_get_clusters(struct inode *inode, 733 u32 v_cpos, int count, 734 u32 *p_cpos, int *ret_count) 735 { 736 int ret; 737 u32 coff, ccount; 738 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map; 739 struct ocfs2_extent_map_entry *ent = NULL; 740 741 *p_cpos = ccount = 0; 742 743 if ((v_cpos + count) > OCFS2_I(inode)->ip_clusters) 744 return -EINVAL; 745 746 if ((v_cpos + count) > em->em_clusters) { 747 /* 748 * Size changed underneath us on disk. Drop any 749 * straddling records and update our idea of 750 * i_clusters 751 */ 752 ocfs2_extent_map_drop(inode, em->em_clusters - 1); 753 em->em_clusters = OCFS2_I(inode)->ip_clusters; 754 } 755 756 757 ret = ocfs2_extent_map_lookup_read(inode, v_cpos, count, &ent); 758 if (ret) 759 return ret; 760 761 if (ent) { 762 /* We should never find ourselves straddling an interval */ 763 if (!ocfs2_extent_rec_contains_clusters(&ent->e_rec, 764 v_cpos, 765 count)) 766 return -ESRCH; 767 768 coff = v_cpos - le32_to_cpu(ent->e_rec.e_cpos); 769 *p_cpos = ocfs2_blocks_to_clusters(inode->i_sb, 770 le64_to_cpu(ent->e_rec.e_blkno)) + 771 coff; 772 773 if (ret_count) 774 *ret_count = le32_to_cpu(ent->e_rec.e_clusters) - coff; 775 776 return 0; 777 } 778 779 780 return -ENOENT; 781 } 782 783 #endif /* 0 */ 784 785 int ocfs2_extent_map_get_blocks(struct inode *inode, 786 u64 v_blkno, int count, 787 u64 *p_blkno, int *ret_count) 788 { 789 int ret; 790 u64 boff; 791 u32 cpos, clusters; 792 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 793 struct ocfs2_extent_map_entry *ent = NULL; 794 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map; 795 struct ocfs2_extent_rec *rec; 796 797 *p_blkno = 0; 798 799 cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno); 800 clusters = ocfs2_blocks_to_clusters(inode->i_sb, 801 (u64)count + bpc - 1); 802 if ((cpos + clusters) > OCFS2_I(inode)->ip_clusters) { 803 ret = -EINVAL; 804 mlog_errno(ret); 805 return ret; 806 } 807 808 if ((cpos + clusters) > em->em_clusters) { 809 /* 810 * Size changed underneath us on disk. Drop any 811 * straddling records and update our idea of 812 * i_clusters 813 */ 814 ocfs2_extent_map_drop(inode, em->em_clusters - 1); 815 em->em_clusters = OCFS2_I(inode)->ip_clusters; 816 } 817 818 ret = ocfs2_extent_map_lookup_read(inode, cpos, clusters, &ent); 819 if (ret) { 820 mlog_errno(ret); 821 return ret; 822 } 823 824 if (ent) 825 { 826 rec = &ent->e_rec; 827 828 /* We should never find ourselves straddling an interval */ 829 if (!ocfs2_extent_rec_contains_clusters(rec, cpos, clusters)) { 830 ret = -ESRCH; 831 mlog_errno(ret); 832 return ret; 833 } 834 835 boff = ocfs2_clusters_to_blocks(inode->i_sb, cpos - 836 le32_to_cpu(rec->e_cpos)); 837 boff += (v_blkno & (u64)(bpc - 1)); 838 *p_blkno = le64_to_cpu(rec->e_blkno) + boff; 839 840 if (ret_count) { 841 *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, 842 le32_to_cpu(rec->e_clusters)) - boff; 843 } 844 845 return 0; 846 } 847 848 return -ENOENT; 849 } 850 851 int ocfs2_extent_map_init(struct inode *inode) 852 { 853 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map; 854 855 em->em_extents = RB_ROOT; 856 em->em_clusters = 0; 857 858 return 0; 859 } 860 861 /* Needs the lock */ 862 static void __ocfs2_extent_map_drop(struct inode *inode, 863 u32 new_clusters, 864 struct rb_node **free_head, 865 struct ocfs2_extent_map_entry **tail_ent) 866 { 867 struct rb_node *node, *next; 868 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map; 869 struct ocfs2_extent_map_entry *ent; 870 871 *free_head = NULL; 872 873 ent = NULL; 874 node = rb_last(&em->em_extents); 875 while (node) 876 { 877 next = rb_prev(node); 878 879 ent = rb_entry(node, struct ocfs2_extent_map_entry, 880 e_node); 881 if (le32_to_cpu(ent->e_rec.e_cpos) < new_clusters) 882 break; 883 884 rb_erase(&ent->e_node, &em->em_extents); 885 886 node->rb_right = *free_head; 887 *free_head = node; 888 889 ent = NULL; 890 node = next; 891 } 892 893 /* Do we have an entry straddling new_clusters? */ 894 if (tail_ent) { 895 if (ent && 896 ((le32_to_cpu(ent->e_rec.e_cpos) + 897 le32_to_cpu(ent->e_rec.e_clusters)) > new_clusters)) 898 *tail_ent = ent; 899 else 900 *tail_ent = NULL; 901 } 902 } 903 904 static void __ocfs2_extent_map_drop_cleanup(struct rb_node *free_head) 905 { 906 struct rb_node *node; 907 struct ocfs2_extent_map_entry *ent; 908 909 while (free_head) { 910 node = free_head; 911 free_head = node->rb_right; 912 913 ent = rb_entry(node, struct ocfs2_extent_map_entry, 914 e_node); 915 kmem_cache_free(ocfs2_em_ent_cachep, ent); 916 } 917 } 918 919 /* 920 * Remove all entries past new_clusters, inclusive of an entry that 921 * contains new_clusters. This is effectively a cache forget. 922 * 923 * If you want to also clip the last extent by some number of clusters, 924 * you need to call ocfs2_extent_map_trunc(). 925 * This code does not check or modify ip_clusters. 926 */ 927 int ocfs2_extent_map_drop(struct inode *inode, u32 new_clusters) 928 { 929 struct rb_node *free_head = NULL; 930 struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map; 931 struct ocfs2_extent_map_entry *ent; 932 933 spin_lock(&OCFS2_I(inode)->ip_lock); 934 935 __ocfs2_extent_map_drop(inode, new_clusters, &free_head, &ent); 936 937 if (ent) { 938 rb_erase(&ent->e_node, &em->em_extents); 939 ent->e_node.rb_right = free_head; 940 free_head = &ent->e_node; 941 } 942 943 spin_unlock(&OCFS2_I(inode)->ip_lock); 944 945 if (free_head) 946 __ocfs2_extent_map_drop_cleanup(free_head); 947 948 return 0; 949 } 950 951 /* 952 * Remove all entries past new_clusters and also clip any extent 953 * straddling new_clusters, if there is one. This does not check 954 * or modify ip_clusters 955 */ 956 int ocfs2_extent_map_trunc(struct inode *inode, u32 new_clusters) 957 { 958 struct rb_node *free_head = NULL; 959 struct ocfs2_extent_map_entry *ent = NULL; 960 961 spin_lock(&OCFS2_I(inode)->ip_lock); 962 963 __ocfs2_extent_map_drop(inode, new_clusters, &free_head, &ent); 964 965 if (ent) 966 ent->e_rec.e_clusters = cpu_to_le32(new_clusters - 967 le32_to_cpu(ent->e_rec.e_cpos)); 968 969 OCFS2_I(inode)->ip_map.em_clusters = new_clusters; 970 971 spin_unlock(&OCFS2_I(inode)->ip_lock); 972 973 if (free_head) 974 __ocfs2_extent_map_drop_cleanup(free_head); 975 976 return 0; 977 } 978 979 int __init init_ocfs2_extent_maps(void) 980 { 981 ocfs2_em_ent_cachep = 982 kmem_cache_create("ocfs2_em_ent", 983 sizeof(struct ocfs2_extent_map_entry), 984 0, SLAB_HWCACHE_ALIGN, NULL, NULL); 985 if (!ocfs2_em_ent_cachep) 986 return -ENOMEM; 987 988 return 0; 989 } 990 991 void __exit exit_ocfs2_extent_maps(void) 992 { 993 kmem_cache_destroy(ocfs2_em_ent_cachep); 994 } 995