1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * refcounttree.c 5 * 6 * Copyright (C) 2009 Oracle. All rights reserved. 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public 10 * License version 2 as published by the Free Software Foundation. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 */ 17 18 #include <linux/sort.h> 19 #define MLOG_MASK_PREFIX ML_REFCOUNT 20 #include <cluster/masklog.h> 21 #include "ocfs2.h" 22 #include "inode.h" 23 #include "alloc.h" 24 #include "suballoc.h" 25 #include "journal.h" 26 #include "uptodate.h" 27 #include "super.h" 28 #include "buffer_head_io.h" 29 #include "blockcheck.h" 30 #include "refcounttree.h" 31 #include "sysfile.h" 32 #include "dlmglue.h" 33 #include "extent_map.h" 34 35 static inline struct ocfs2_refcount_tree * 36 cache_info_to_refcount(struct ocfs2_caching_info *ci) 37 { 38 return container_of(ci, struct ocfs2_refcount_tree, rf_ci); 39 } 40 41 static int ocfs2_validate_refcount_block(struct super_block *sb, 42 struct buffer_head *bh) 43 { 44 int rc; 45 struct ocfs2_refcount_block *rb = 46 (struct ocfs2_refcount_block *)bh->b_data; 47 48 mlog(0, "Validating refcount block %llu\n", 49 (unsigned long long)bh->b_blocknr); 50 51 BUG_ON(!buffer_uptodate(bh)); 52 53 /* 54 * If the ecc fails, we return the error but otherwise 55 * leave the filesystem running. We know any error is 56 * local to this block. 57 */ 58 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check); 59 if (rc) { 60 mlog(ML_ERROR, "Checksum failed for refcount block %llu\n", 61 (unsigned long long)bh->b_blocknr); 62 return rc; 63 } 64 65 66 if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) { 67 ocfs2_error(sb, 68 "Refcount block #%llu has bad signature %.*s", 69 (unsigned long long)bh->b_blocknr, 7, 70 rb->rf_signature); 71 return -EINVAL; 72 } 73 74 if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) { 75 ocfs2_error(sb, 76 "Refcount block #%llu has an invalid rf_blkno " 77 "of %llu", 78 (unsigned long long)bh->b_blocknr, 79 (unsigned long long)le64_to_cpu(rb->rf_blkno)); 80 return -EINVAL; 81 } 82 83 if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) { 84 ocfs2_error(sb, 85 "Refcount block #%llu has an invalid " 86 "rf_fs_generation of #%u", 87 (unsigned long long)bh->b_blocknr, 88 le32_to_cpu(rb->rf_fs_generation)); 89 return -EINVAL; 90 } 91 92 return 0; 93 } 94 95 static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci, 96 u64 rb_blkno, 97 struct buffer_head **bh) 98 { 99 int rc; 100 struct buffer_head *tmp = *bh; 101 102 rc = ocfs2_read_block(ci, rb_blkno, &tmp, 103 ocfs2_validate_refcount_block); 104 105 /* If ocfs2_read_block() got us a new bh, pass it up. */ 106 if (!rc && !*bh) 107 *bh = tmp; 108 109 return rc; 110 } 111 112 static u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci) 113 { 114 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 115 116 return rf->rf_blkno; 117 } 118 119 static struct super_block * 120 ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci) 121 { 122 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 123 124 return rf->rf_sb; 125 } 126 127 static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci) 128 { 129 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 130 131 spin_lock(&rf->rf_lock); 132 } 133 134 static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci) 135 { 136 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 137 138 spin_unlock(&rf->rf_lock); 139 } 140 141 static void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci) 142 { 143 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 144 145 mutex_lock(&rf->rf_io_mutex); 146 } 147 148 static void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci) 149 { 150 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 151 152 mutex_unlock(&rf->rf_io_mutex); 153 } 154 155 static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = { 156 .co_owner = ocfs2_refcount_cache_owner, 157 .co_get_super = ocfs2_refcount_cache_get_super, 158 .co_cache_lock = ocfs2_refcount_cache_lock, 159 .co_cache_unlock = ocfs2_refcount_cache_unlock, 160 .co_io_lock = ocfs2_refcount_cache_io_lock, 161 .co_io_unlock = ocfs2_refcount_cache_io_unlock, 162 }; 163 164 static struct ocfs2_refcount_tree * 165 ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno) 166 { 167 struct rb_node *n = osb->osb_rf_lock_tree.rb_node; 168 struct ocfs2_refcount_tree *tree = NULL; 169 170 while (n) { 171 tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node); 172 173 if (blkno < tree->rf_blkno) 174 n = n->rb_left; 175 else if (blkno > tree->rf_blkno) 176 n = n->rb_right; 177 else 178 return tree; 179 } 180 181 return NULL; 182 } 183 184 /* osb_lock is already locked. */ 185 static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb, 186 struct ocfs2_refcount_tree *new) 187 { 188 u64 rf_blkno = new->rf_blkno; 189 struct rb_node *parent = NULL; 190 struct rb_node **p = &osb->osb_rf_lock_tree.rb_node; 191 struct ocfs2_refcount_tree *tmp; 192 193 while (*p) { 194 parent = *p; 195 196 tmp = rb_entry(parent, struct ocfs2_refcount_tree, 197 rf_node); 198 199 if (rf_blkno < tmp->rf_blkno) 200 p = &(*p)->rb_left; 201 else if (rf_blkno > tmp->rf_blkno) 202 p = &(*p)->rb_right; 203 else { 204 /* This should never happen! */ 205 mlog(ML_ERROR, "Duplicate refcount block %llu found!\n", 206 (unsigned long long)rf_blkno); 207 BUG(); 208 } 209 } 210 211 rb_link_node(&new->rf_node, parent, p); 212 rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree); 213 } 214 215 static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree) 216 { 217 ocfs2_metadata_cache_exit(&tree->rf_ci); 218 ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres); 219 ocfs2_lock_res_free(&tree->rf_lockres); 220 kfree(tree); 221 } 222 223 static inline void 224 ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb, 225 struct ocfs2_refcount_tree *tree) 226 { 227 rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree); 228 if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree) 229 osb->osb_ref_tree_lru = NULL; 230 } 231 232 static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb, 233 struct ocfs2_refcount_tree *tree) 234 { 235 spin_lock(&osb->osb_lock); 236 ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree); 237 spin_unlock(&osb->osb_lock); 238 } 239 240 void ocfs2_kref_remove_refcount_tree(struct kref *kref) 241 { 242 struct ocfs2_refcount_tree *tree = 243 container_of(kref, struct ocfs2_refcount_tree, rf_getcnt); 244 245 ocfs2_free_refcount_tree(tree); 246 } 247 248 static inline void 249 ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree) 250 { 251 kref_get(&tree->rf_getcnt); 252 } 253 254 static inline void 255 ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree) 256 { 257 kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree); 258 } 259 260 static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new, 261 struct super_block *sb) 262 { 263 ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops); 264 mutex_init(&new->rf_io_mutex); 265 new->rf_sb = sb; 266 spin_lock_init(&new->rf_lock); 267 } 268 269 static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb, 270 struct ocfs2_refcount_tree *new, 271 u64 rf_blkno, u32 generation) 272 { 273 init_rwsem(&new->rf_sem); 274 ocfs2_refcount_lock_res_init(&new->rf_lockres, osb, 275 rf_blkno, generation); 276 } 277 278 static struct ocfs2_refcount_tree* 279 ocfs2_allocate_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno) 280 { 281 struct ocfs2_refcount_tree *new; 282 283 new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS); 284 if (!new) 285 return NULL; 286 287 new->rf_blkno = rf_blkno; 288 kref_init(&new->rf_getcnt); 289 ocfs2_init_refcount_tree_ci(new, osb->sb); 290 291 return new; 292 } 293 294 static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno, 295 struct ocfs2_refcount_tree **ret_tree) 296 { 297 int ret = 0; 298 struct ocfs2_refcount_tree *tree, *new = NULL; 299 struct buffer_head *ref_root_bh = NULL; 300 struct ocfs2_refcount_block *ref_rb; 301 302 spin_lock(&osb->osb_lock); 303 if (osb->osb_ref_tree_lru && 304 osb->osb_ref_tree_lru->rf_blkno == rf_blkno) 305 tree = osb->osb_ref_tree_lru; 306 else 307 tree = ocfs2_find_refcount_tree(osb, rf_blkno); 308 if (tree) 309 goto out; 310 311 spin_unlock(&osb->osb_lock); 312 313 new = ocfs2_allocate_refcount_tree(osb, rf_blkno); 314 if (!new) { 315 ret = -ENOMEM; 316 mlog_errno(ret); 317 return ret; 318 } 319 /* 320 * We need the generation to create the refcount tree lock and since 321 * it isn't changed during the tree modification, we are safe here to 322 * read without protection. 323 * We also have to purge the cache after we create the lock since the 324 * refcount block may have the stale data. It can only be trusted when 325 * we hold the refcount lock. 326 */ 327 ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh); 328 if (ret) { 329 mlog_errno(ret); 330 ocfs2_metadata_cache_exit(&new->rf_ci); 331 kfree(new); 332 return ret; 333 } 334 335 ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 336 new->rf_generation = le32_to_cpu(ref_rb->rf_generation); 337 ocfs2_init_refcount_tree_lock(osb, new, rf_blkno, 338 new->rf_generation); 339 ocfs2_metadata_cache_purge(&new->rf_ci); 340 341 spin_lock(&osb->osb_lock); 342 tree = ocfs2_find_refcount_tree(osb, rf_blkno); 343 if (tree) 344 goto out; 345 346 ocfs2_insert_refcount_tree(osb, new); 347 348 tree = new; 349 new = NULL; 350 351 out: 352 *ret_tree = tree; 353 354 osb->osb_ref_tree_lru = tree; 355 356 spin_unlock(&osb->osb_lock); 357 358 if (new) 359 ocfs2_free_refcount_tree(new); 360 361 brelse(ref_root_bh); 362 return ret; 363 } 364 365 static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno) 366 { 367 int ret; 368 struct buffer_head *di_bh = NULL; 369 struct ocfs2_dinode *di; 370 371 ret = ocfs2_read_inode_block(inode, &di_bh); 372 if (ret) { 373 mlog_errno(ret); 374 goto out; 375 } 376 377 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); 378 379 di = (struct ocfs2_dinode *)di_bh->b_data; 380 *ref_blkno = le64_to_cpu(di->i_refcount_loc); 381 brelse(di_bh); 382 out: 383 return ret; 384 } 385 386 static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb, 387 struct ocfs2_refcount_tree *tree, int rw) 388 { 389 int ret; 390 391 ret = ocfs2_refcount_lock(tree, rw); 392 if (ret) { 393 mlog_errno(ret); 394 goto out; 395 } 396 397 if (rw) 398 down_write(&tree->rf_sem); 399 else 400 down_read(&tree->rf_sem); 401 402 out: 403 return ret; 404 } 405 406 /* 407 * Lock the refcount tree pointed by ref_blkno and return the tree. 408 * In most case, we lock the tree and read the refcount block. 409 * So read it here if the caller really needs it. 410 * 411 * If the tree has been re-created by other node, it will free the 412 * old one and re-create it. 413 */ 414 int ocfs2_lock_refcount_tree(struct ocfs2_super *osb, 415 u64 ref_blkno, int rw, 416 struct ocfs2_refcount_tree **ret_tree, 417 struct buffer_head **ref_bh) 418 { 419 int ret, delete_tree = 0; 420 struct ocfs2_refcount_tree *tree = NULL; 421 struct buffer_head *ref_root_bh = NULL; 422 struct ocfs2_refcount_block *rb; 423 424 again: 425 ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree); 426 if (ret) { 427 mlog_errno(ret); 428 return ret; 429 } 430 431 ocfs2_refcount_tree_get(tree); 432 433 ret = __ocfs2_lock_refcount_tree(osb, tree, rw); 434 if (ret) { 435 mlog_errno(ret); 436 ocfs2_refcount_tree_put(tree); 437 goto out; 438 } 439 440 ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno, 441 &ref_root_bh); 442 if (ret) { 443 mlog_errno(ret); 444 ocfs2_unlock_refcount_tree(osb, tree, rw); 445 ocfs2_refcount_tree_put(tree); 446 goto out; 447 } 448 449 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 450 /* 451 * If the refcount block has been freed and re-created, we may need 452 * to recreate the refcount tree also. 453 * 454 * Here we just remove the tree from the rb-tree, and the last 455 * kref holder will unlock and delete this refcount_tree. 456 * Then we goto "again" and ocfs2_get_refcount_tree will create 457 * the new refcount tree for us. 458 */ 459 if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) { 460 if (!tree->rf_removed) { 461 ocfs2_erase_refcount_tree_from_list(osb, tree); 462 tree->rf_removed = 1; 463 delete_tree = 1; 464 } 465 466 ocfs2_unlock_refcount_tree(osb, tree, rw); 467 /* 468 * We get an extra reference when we create the refcount 469 * tree, so another put will destroy it. 470 */ 471 if (delete_tree) 472 ocfs2_refcount_tree_put(tree); 473 brelse(ref_root_bh); 474 ref_root_bh = NULL; 475 goto again; 476 } 477 478 *ret_tree = tree; 479 if (ref_bh) { 480 *ref_bh = ref_root_bh; 481 ref_root_bh = NULL; 482 } 483 out: 484 brelse(ref_root_bh); 485 return ret; 486 } 487 488 int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw, 489 struct ocfs2_refcount_tree **ret_tree, 490 struct buffer_head **ref_bh) 491 { 492 int ret; 493 u64 ref_blkno; 494 495 ret = ocfs2_get_refcount_block(inode, &ref_blkno); 496 if (ret) { 497 mlog_errno(ret); 498 return ret; 499 } 500 501 return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, 502 rw, ret_tree, ref_bh); 503 } 504 505 void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb, 506 struct ocfs2_refcount_tree *tree, int rw) 507 { 508 if (rw) 509 up_write(&tree->rf_sem); 510 else 511 up_read(&tree->rf_sem); 512 513 ocfs2_refcount_unlock(tree, rw); 514 ocfs2_refcount_tree_put(tree); 515 } 516 517 void ocfs2_purge_refcount_trees(struct ocfs2_super *osb) 518 { 519 struct rb_node *node; 520 struct ocfs2_refcount_tree *tree; 521 struct rb_root *root = &osb->osb_rf_lock_tree; 522 523 while ((node = rb_last(root)) != NULL) { 524 tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node); 525 526 mlog(0, "Purge tree %llu\n", 527 (unsigned long long) tree->rf_blkno); 528 529 rb_erase(&tree->rf_node, root); 530 ocfs2_free_refcount_tree(tree); 531 } 532 } 533 534 /* 535 * Create a refcount tree for an inode. 536 * We take for granted that the inode is already locked. 537 */ 538 static int ocfs2_create_refcount_tree(struct inode *inode, 539 struct buffer_head *di_bh) 540 { 541 int ret; 542 handle_t *handle = NULL; 543 struct ocfs2_alloc_context *meta_ac = NULL; 544 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 545 struct ocfs2_inode_info *oi = OCFS2_I(inode); 546 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 547 struct buffer_head *new_bh = NULL; 548 struct ocfs2_refcount_block *rb; 549 struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL; 550 u16 suballoc_bit_start; 551 u32 num_got; 552 u64 first_blkno; 553 554 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); 555 556 mlog(0, "create tree for inode %lu\n", inode->i_ino); 557 558 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); 559 if (ret) { 560 mlog_errno(ret); 561 goto out; 562 } 563 564 handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_CREATE_CREDITS); 565 if (IS_ERR(handle)) { 566 ret = PTR_ERR(handle); 567 mlog_errno(ret); 568 goto out; 569 } 570 571 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 572 OCFS2_JOURNAL_ACCESS_WRITE); 573 if (ret) { 574 mlog_errno(ret); 575 goto out_commit; 576 } 577 578 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, 579 &suballoc_bit_start, &num_got, 580 &first_blkno); 581 if (ret) { 582 mlog_errno(ret); 583 goto out_commit; 584 } 585 586 new_tree = ocfs2_allocate_refcount_tree(osb, first_blkno); 587 if (!new_tree) { 588 ret = -ENOMEM; 589 mlog_errno(ret); 590 goto out_commit; 591 } 592 593 new_bh = sb_getblk(inode->i_sb, first_blkno); 594 ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh); 595 596 ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh, 597 OCFS2_JOURNAL_ACCESS_CREATE); 598 if (ret) { 599 mlog_errno(ret); 600 goto out_commit; 601 } 602 603 /* Initialize ocfs2_refcount_block. */ 604 rb = (struct ocfs2_refcount_block *)new_bh->b_data; 605 memset(rb, 0, inode->i_sb->s_blocksize); 606 strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); 607 rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num); 608 rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 609 rb->rf_fs_generation = cpu_to_le32(osb->fs_generation); 610 rb->rf_blkno = cpu_to_le64(first_blkno); 611 rb->rf_count = cpu_to_le32(1); 612 rb->rf_records.rl_count = 613 cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb)); 614 spin_lock(&osb->osb_lock); 615 rb->rf_generation = osb->s_next_generation++; 616 spin_unlock(&osb->osb_lock); 617 618 ocfs2_journal_dirty(handle, new_bh); 619 620 spin_lock(&oi->ip_lock); 621 oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL; 622 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 623 di->i_refcount_loc = cpu_to_le64(first_blkno); 624 spin_unlock(&oi->ip_lock); 625 626 mlog(0, "created tree for inode %lu, refblock %llu\n", 627 inode->i_ino, (unsigned long long)first_blkno); 628 629 ocfs2_journal_dirty(handle, di_bh); 630 631 /* 632 * We have to init the tree lock here since it will use 633 * the generation number to create it. 634 */ 635 new_tree->rf_generation = le32_to_cpu(rb->rf_generation); 636 ocfs2_init_refcount_tree_lock(osb, new_tree, first_blkno, 637 new_tree->rf_generation); 638 639 spin_lock(&osb->osb_lock); 640 tree = ocfs2_find_refcount_tree(osb, first_blkno); 641 642 /* 643 * We've just created a new refcount tree in this block. If 644 * we found a refcount tree on the ocfs2_super, it must be 645 * one we just deleted. We free the old tree before 646 * inserting the new tree. 647 */ 648 BUG_ON(tree && tree->rf_generation == new_tree->rf_generation); 649 if (tree) 650 ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree); 651 ocfs2_insert_refcount_tree(osb, new_tree); 652 spin_unlock(&osb->osb_lock); 653 new_tree = NULL; 654 if (tree) 655 ocfs2_refcount_tree_put(tree); 656 657 out_commit: 658 ocfs2_commit_trans(osb, handle); 659 660 out: 661 if (new_tree) { 662 ocfs2_metadata_cache_exit(&new_tree->rf_ci); 663 kfree(new_tree); 664 } 665 666 brelse(new_bh); 667 if (meta_ac) 668 ocfs2_free_alloc_context(meta_ac); 669 670 return ret; 671 } 672 673 static int ocfs2_set_refcount_tree(struct inode *inode, 674 struct buffer_head *di_bh, 675 u64 refcount_loc) 676 { 677 int ret; 678 handle_t *handle = NULL; 679 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 680 struct ocfs2_inode_info *oi = OCFS2_I(inode); 681 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 682 struct buffer_head *ref_root_bh = NULL; 683 struct ocfs2_refcount_block *rb; 684 struct ocfs2_refcount_tree *ref_tree; 685 686 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); 687 688 ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, 689 &ref_tree, &ref_root_bh); 690 if (ret) { 691 mlog_errno(ret); 692 return ret; 693 } 694 695 handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_SET_CREDITS); 696 if (IS_ERR(handle)) { 697 ret = PTR_ERR(handle); 698 mlog_errno(ret); 699 goto out; 700 } 701 702 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 703 OCFS2_JOURNAL_ACCESS_WRITE); 704 if (ret) { 705 mlog_errno(ret); 706 goto out_commit; 707 } 708 709 ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, ref_root_bh, 710 OCFS2_JOURNAL_ACCESS_WRITE); 711 if (ret) { 712 mlog_errno(ret); 713 goto out_commit; 714 } 715 716 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 717 le32_add_cpu(&rb->rf_count, 1); 718 719 ocfs2_journal_dirty(handle, ref_root_bh); 720 721 spin_lock(&oi->ip_lock); 722 oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL; 723 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 724 di->i_refcount_loc = cpu_to_le64(refcount_loc); 725 spin_unlock(&oi->ip_lock); 726 ocfs2_journal_dirty(handle, di_bh); 727 728 out_commit: 729 ocfs2_commit_trans(osb, handle); 730 out: 731 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 732 brelse(ref_root_bh); 733 734 return ret; 735 } 736 737 int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh) 738 { 739 int ret, delete_tree = 0; 740 handle_t *handle = NULL; 741 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 742 struct ocfs2_inode_info *oi = OCFS2_I(inode); 743 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 744 struct ocfs2_refcount_block *rb; 745 struct inode *alloc_inode = NULL; 746 struct buffer_head *alloc_bh = NULL; 747 struct buffer_head *blk_bh = NULL; 748 struct ocfs2_refcount_tree *ref_tree; 749 int credits = OCFS2_REFCOUNT_TREE_REMOVE_CREDITS; 750 u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc); 751 u16 bit = 0; 752 753 if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) 754 return 0; 755 756 BUG_ON(!ref_blkno); 757 ret = ocfs2_lock_refcount_tree(osb, ref_blkno, 1, &ref_tree, &blk_bh); 758 if (ret) { 759 mlog_errno(ret); 760 return ret; 761 } 762 763 rb = (struct ocfs2_refcount_block *)blk_bh->b_data; 764 765 /* 766 * If we are the last user, we need to free the block. 767 * So lock the allocator ahead. 768 */ 769 if (le32_to_cpu(rb->rf_count) == 1) { 770 blk = le64_to_cpu(rb->rf_blkno); 771 bit = le16_to_cpu(rb->rf_suballoc_bit); 772 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 773 774 alloc_inode = ocfs2_get_system_file_inode(osb, 775 EXTENT_ALLOC_SYSTEM_INODE, 776 le16_to_cpu(rb->rf_suballoc_slot)); 777 if (!alloc_inode) { 778 ret = -ENOMEM; 779 mlog_errno(ret); 780 goto out; 781 } 782 mutex_lock(&alloc_inode->i_mutex); 783 784 ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1); 785 if (ret) { 786 mlog_errno(ret); 787 goto out_mutex; 788 } 789 790 credits += OCFS2_SUBALLOC_FREE; 791 } 792 793 handle = ocfs2_start_trans(osb, credits); 794 if (IS_ERR(handle)) { 795 ret = PTR_ERR(handle); 796 mlog_errno(ret); 797 goto out_unlock; 798 } 799 800 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 801 OCFS2_JOURNAL_ACCESS_WRITE); 802 if (ret) { 803 mlog_errno(ret); 804 goto out_commit; 805 } 806 807 ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, blk_bh, 808 OCFS2_JOURNAL_ACCESS_WRITE); 809 if (ret) { 810 mlog_errno(ret); 811 goto out_commit; 812 } 813 814 spin_lock(&oi->ip_lock); 815 oi->ip_dyn_features &= ~OCFS2_HAS_REFCOUNT_FL; 816 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 817 di->i_refcount_loc = 0; 818 spin_unlock(&oi->ip_lock); 819 ocfs2_journal_dirty(handle, di_bh); 820 821 le32_add_cpu(&rb->rf_count , -1); 822 ocfs2_journal_dirty(handle, blk_bh); 823 824 if (!rb->rf_count) { 825 delete_tree = 1; 826 ocfs2_erase_refcount_tree_from_list(osb, ref_tree); 827 ret = ocfs2_free_suballoc_bits(handle, alloc_inode, 828 alloc_bh, bit, bg_blkno, 1); 829 if (ret) 830 mlog_errno(ret); 831 } 832 833 out_commit: 834 ocfs2_commit_trans(osb, handle); 835 out_unlock: 836 if (alloc_inode) { 837 ocfs2_inode_unlock(alloc_inode, 1); 838 brelse(alloc_bh); 839 } 840 out_mutex: 841 if (alloc_inode) { 842 mutex_unlock(&alloc_inode->i_mutex); 843 iput(alloc_inode); 844 } 845 out: 846 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 847 if (delete_tree) 848 ocfs2_refcount_tree_put(ref_tree); 849 brelse(blk_bh); 850 851 return ret; 852 } 853 854 static void ocfs2_find_refcount_rec_in_rl(struct ocfs2_caching_info *ci, 855 struct buffer_head *ref_leaf_bh, 856 u64 cpos, unsigned int len, 857 struct ocfs2_refcount_rec *ret_rec, 858 int *index) 859 { 860 int i = 0; 861 struct ocfs2_refcount_block *rb = 862 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 863 struct ocfs2_refcount_rec *rec = NULL; 864 865 for (; i < le16_to_cpu(rb->rf_records.rl_used); i++) { 866 rec = &rb->rf_records.rl_recs[i]; 867 868 if (le64_to_cpu(rec->r_cpos) + 869 le32_to_cpu(rec->r_clusters) <= cpos) 870 continue; 871 else if (le64_to_cpu(rec->r_cpos) > cpos) 872 break; 873 874 /* ok, cpos fail in this rec. Just return. */ 875 if (ret_rec) 876 *ret_rec = *rec; 877 goto out; 878 } 879 880 if (ret_rec) { 881 /* We meet with a hole here, so fake the rec. */ 882 ret_rec->r_cpos = cpu_to_le64(cpos); 883 ret_rec->r_refcount = 0; 884 if (i < le16_to_cpu(rb->rf_records.rl_used) && 885 le64_to_cpu(rec->r_cpos) < cpos + len) 886 ret_rec->r_clusters = 887 cpu_to_le32(le64_to_cpu(rec->r_cpos) - cpos); 888 else 889 ret_rec->r_clusters = cpu_to_le32(len); 890 } 891 892 out: 893 *index = i; 894 } 895 896 /* 897 * Given a cpos and len, try to find the refcount record which contains cpos. 898 * 1. If cpos can be found in one refcount record, return the record. 899 * 2. If cpos can't be found, return a fake record which start from cpos 900 * and end at a small value between cpos+len and start of the next record. 901 * This fake record has r_refcount = 0. 902 */ 903 static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci, 904 struct buffer_head *ref_root_bh, 905 u64 cpos, unsigned int len, 906 struct ocfs2_refcount_rec *ret_rec, 907 int *index, 908 struct buffer_head **ret_bh) 909 { 910 int ret = 0, i, found; 911 u32 low_cpos; 912 struct ocfs2_extent_list *el; 913 struct ocfs2_extent_rec *tmp, *rec = NULL; 914 struct ocfs2_extent_block *eb; 915 struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; 916 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 917 struct ocfs2_refcount_block *rb = 918 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 919 920 if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) { 921 ocfs2_find_refcount_rec_in_rl(ci, ref_root_bh, cpos, len, 922 ret_rec, index); 923 *ret_bh = ref_root_bh; 924 get_bh(ref_root_bh); 925 return 0; 926 } 927 928 el = &rb->rf_list; 929 low_cpos = cpos & OCFS2_32BIT_POS_MASK; 930 931 if (el->l_tree_depth) { 932 ret = ocfs2_find_leaf(ci, el, low_cpos, &eb_bh); 933 if (ret) { 934 mlog_errno(ret); 935 goto out; 936 } 937 938 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 939 el = &eb->h_list; 940 941 if (el->l_tree_depth) { 942 ocfs2_error(sb, 943 "refcount tree %llu has non zero tree " 944 "depth in leaf btree tree block %llu\n", 945 (unsigned long long)ocfs2_metadata_cache_owner(ci), 946 (unsigned long long)eb_bh->b_blocknr); 947 ret = -EROFS; 948 goto out; 949 } 950 } 951 952 found = 0; 953 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 954 rec = &el->l_recs[i]; 955 956 if (le32_to_cpu(rec->e_cpos) <= low_cpos) { 957 found = 1; 958 break; 959 } 960 } 961 962 /* adjust len when we have ocfs2_extent_rec after it. */ 963 if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) { 964 tmp = &el->l_recs[i+1]; 965 966 if (le32_to_cpu(tmp->e_cpos) < cpos + len) 967 len = le32_to_cpu(tmp->e_cpos) - cpos; 968 } 969 970 ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno), 971 &ref_leaf_bh); 972 if (ret) { 973 mlog_errno(ret); 974 goto out; 975 } 976 977 ocfs2_find_refcount_rec_in_rl(ci, ref_leaf_bh, cpos, len, 978 ret_rec, index); 979 *ret_bh = ref_leaf_bh; 980 out: 981 brelse(eb_bh); 982 return ret; 983 } 984 985 enum ocfs2_ref_rec_contig { 986 REF_CONTIG_NONE = 0, 987 REF_CONTIG_LEFT, 988 REF_CONTIG_RIGHT, 989 REF_CONTIG_LEFTRIGHT, 990 }; 991 992 static enum ocfs2_ref_rec_contig 993 ocfs2_refcount_rec_adjacent(struct ocfs2_refcount_block *rb, 994 int index) 995 { 996 if ((rb->rf_records.rl_recs[index].r_refcount == 997 rb->rf_records.rl_recs[index + 1].r_refcount) && 998 (le64_to_cpu(rb->rf_records.rl_recs[index].r_cpos) + 999 le32_to_cpu(rb->rf_records.rl_recs[index].r_clusters) == 1000 le64_to_cpu(rb->rf_records.rl_recs[index + 1].r_cpos))) 1001 return REF_CONTIG_RIGHT; 1002 1003 return REF_CONTIG_NONE; 1004 } 1005 1006 static enum ocfs2_ref_rec_contig 1007 ocfs2_refcount_rec_contig(struct ocfs2_refcount_block *rb, 1008 int index) 1009 { 1010 enum ocfs2_ref_rec_contig ret = REF_CONTIG_NONE; 1011 1012 if (index < le16_to_cpu(rb->rf_records.rl_used) - 1) 1013 ret = ocfs2_refcount_rec_adjacent(rb, index); 1014 1015 if (index > 0) { 1016 enum ocfs2_ref_rec_contig tmp; 1017 1018 tmp = ocfs2_refcount_rec_adjacent(rb, index - 1); 1019 1020 if (tmp == REF_CONTIG_RIGHT) { 1021 if (ret == REF_CONTIG_RIGHT) 1022 ret = REF_CONTIG_LEFTRIGHT; 1023 else 1024 ret = REF_CONTIG_LEFT; 1025 } 1026 } 1027 1028 return ret; 1029 } 1030 1031 static void ocfs2_rotate_refcount_rec_left(struct ocfs2_refcount_block *rb, 1032 int index) 1033 { 1034 BUG_ON(rb->rf_records.rl_recs[index].r_refcount != 1035 rb->rf_records.rl_recs[index+1].r_refcount); 1036 1037 le32_add_cpu(&rb->rf_records.rl_recs[index].r_clusters, 1038 le32_to_cpu(rb->rf_records.rl_recs[index+1].r_clusters)); 1039 1040 if (index < le16_to_cpu(rb->rf_records.rl_used) - 2) 1041 memmove(&rb->rf_records.rl_recs[index + 1], 1042 &rb->rf_records.rl_recs[index + 2], 1043 sizeof(struct ocfs2_refcount_rec) * 1044 (le16_to_cpu(rb->rf_records.rl_used) - index - 2)); 1045 1046 memset(&rb->rf_records.rl_recs[le16_to_cpu(rb->rf_records.rl_used) - 1], 1047 0, sizeof(struct ocfs2_refcount_rec)); 1048 le16_add_cpu(&rb->rf_records.rl_used, -1); 1049 } 1050 1051 /* 1052 * Merge the refcount rec if we are contiguous with the adjacent recs. 1053 */ 1054 static void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb, 1055 int index) 1056 { 1057 enum ocfs2_ref_rec_contig contig = 1058 ocfs2_refcount_rec_contig(rb, index); 1059 1060 if (contig == REF_CONTIG_NONE) 1061 return; 1062 1063 if (contig == REF_CONTIG_LEFT || contig == REF_CONTIG_LEFTRIGHT) { 1064 BUG_ON(index == 0); 1065 index--; 1066 } 1067 1068 ocfs2_rotate_refcount_rec_left(rb, index); 1069 1070 if (contig == REF_CONTIG_LEFTRIGHT) 1071 ocfs2_rotate_refcount_rec_left(rb, index); 1072 } 1073 1074 /* 1075 * Change the refcount indexed by "index" in ref_bh. 1076 * If refcount reaches 0, remove it. 1077 */ 1078 static int ocfs2_change_refcount_rec(handle_t *handle, 1079 struct ocfs2_caching_info *ci, 1080 struct buffer_head *ref_leaf_bh, 1081 int index, int change) 1082 { 1083 int ret; 1084 struct ocfs2_refcount_block *rb = 1085 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1086 struct ocfs2_refcount_list *rl = &rb->rf_records; 1087 struct ocfs2_refcount_rec *rec = &rl->rl_recs[index]; 1088 1089 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, 1090 OCFS2_JOURNAL_ACCESS_WRITE); 1091 if (ret) { 1092 mlog_errno(ret); 1093 goto out; 1094 } 1095 1096 mlog(0, "change index %d, old count %u, change %d\n", index, 1097 le32_to_cpu(rec->r_refcount), change); 1098 le32_add_cpu(&rec->r_refcount, change); 1099 1100 if (!rec->r_refcount) { 1101 if (index != le16_to_cpu(rl->rl_used) - 1) { 1102 memmove(rec, rec + 1, 1103 (le16_to_cpu(rl->rl_used) - index - 1) * 1104 sizeof(struct ocfs2_refcount_rec)); 1105 memset(&rl->rl_recs[le16_to_cpu(rl->rl_used) - 1], 1106 0, sizeof(struct ocfs2_refcount_rec)); 1107 } 1108 1109 le16_add_cpu(&rl->rl_used, -1); 1110 } else 1111 ocfs2_refcount_rec_merge(rb, index); 1112 1113 ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1114 if (ret) 1115 mlog_errno(ret); 1116 out: 1117 return ret; 1118 } 1119 1120 static int ocfs2_expand_inline_ref_root(handle_t *handle, 1121 struct ocfs2_caching_info *ci, 1122 struct buffer_head *ref_root_bh, 1123 struct buffer_head **ref_leaf_bh, 1124 struct ocfs2_alloc_context *meta_ac) 1125 { 1126 int ret; 1127 u16 suballoc_bit_start; 1128 u32 num_got; 1129 u64 blkno; 1130 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1131 struct buffer_head *new_bh = NULL; 1132 struct ocfs2_refcount_block *new_rb; 1133 struct ocfs2_refcount_block *root_rb = 1134 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 1135 1136 ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh, 1137 OCFS2_JOURNAL_ACCESS_WRITE); 1138 if (ret) { 1139 mlog_errno(ret); 1140 goto out; 1141 } 1142 1143 ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, 1144 &suballoc_bit_start, &num_got, 1145 &blkno); 1146 if (ret) { 1147 mlog_errno(ret); 1148 goto out; 1149 } 1150 1151 new_bh = sb_getblk(sb, blkno); 1152 if (new_bh == NULL) { 1153 ret = -EIO; 1154 mlog_errno(ret); 1155 goto out; 1156 } 1157 ocfs2_set_new_buffer_uptodate(ci, new_bh); 1158 1159 ret = ocfs2_journal_access_rb(handle, ci, new_bh, 1160 OCFS2_JOURNAL_ACCESS_CREATE); 1161 if (ret) { 1162 mlog_errno(ret); 1163 goto out; 1164 } 1165 1166 /* 1167 * Initialize ocfs2_refcount_block. 1168 * It should contain the same information as the old root. 1169 * so just memcpy it and change the corresponding field. 1170 */ 1171 memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize); 1172 1173 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; 1174 new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); 1175 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1176 new_rb->rf_blkno = cpu_to_le64(blkno); 1177 new_rb->rf_cpos = cpu_to_le32(0); 1178 new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr); 1179 new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL); 1180 ocfs2_journal_dirty(handle, new_bh); 1181 1182 /* Now change the root. */ 1183 memset(&root_rb->rf_list, 0, sb->s_blocksize - 1184 offsetof(struct ocfs2_refcount_block, rf_list)); 1185 root_rb->rf_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_rb(sb)); 1186 root_rb->rf_clusters = cpu_to_le32(1); 1187 root_rb->rf_list.l_next_free_rec = cpu_to_le16(1); 1188 root_rb->rf_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 1189 root_rb->rf_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 1190 root_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_TREE_FL); 1191 1192 ocfs2_journal_dirty(handle, ref_root_bh); 1193 1194 mlog(0, "new leaf block %llu, used %u\n", (unsigned long long)blkno, 1195 le16_to_cpu(new_rb->rf_records.rl_used)); 1196 1197 *ref_leaf_bh = new_bh; 1198 new_bh = NULL; 1199 out: 1200 brelse(new_bh); 1201 return ret; 1202 } 1203 1204 static int ocfs2_refcount_rec_no_intersect(struct ocfs2_refcount_rec *prev, 1205 struct ocfs2_refcount_rec *next) 1206 { 1207 if (ocfs2_get_ref_rec_low_cpos(prev) + le32_to_cpu(prev->r_clusters) <= 1208 ocfs2_get_ref_rec_low_cpos(next)) 1209 return 1; 1210 1211 return 0; 1212 } 1213 1214 static int cmp_refcount_rec_by_low_cpos(const void *a, const void *b) 1215 { 1216 const struct ocfs2_refcount_rec *l = a, *r = b; 1217 u32 l_cpos = ocfs2_get_ref_rec_low_cpos(l); 1218 u32 r_cpos = ocfs2_get_ref_rec_low_cpos(r); 1219 1220 if (l_cpos > r_cpos) 1221 return 1; 1222 if (l_cpos < r_cpos) 1223 return -1; 1224 return 0; 1225 } 1226 1227 static int cmp_refcount_rec_by_cpos(const void *a, const void *b) 1228 { 1229 const struct ocfs2_refcount_rec *l = a, *r = b; 1230 u64 l_cpos = le64_to_cpu(l->r_cpos); 1231 u64 r_cpos = le64_to_cpu(r->r_cpos); 1232 1233 if (l_cpos > r_cpos) 1234 return 1; 1235 if (l_cpos < r_cpos) 1236 return -1; 1237 return 0; 1238 } 1239 1240 static void swap_refcount_rec(void *a, void *b, int size) 1241 { 1242 struct ocfs2_refcount_rec *l = a, *r = b, tmp; 1243 1244 tmp = *(struct ocfs2_refcount_rec *)l; 1245 *(struct ocfs2_refcount_rec *)l = 1246 *(struct ocfs2_refcount_rec *)r; 1247 *(struct ocfs2_refcount_rec *)r = tmp; 1248 } 1249 1250 /* 1251 * The refcount cpos are ordered by their 64bit cpos, 1252 * But we will use the low 32 bit to be the e_cpos in the b-tree. 1253 * So we need to make sure that this pos isn't intersected with others. 1254 * 1255 * Note: The refcount block is already sorted by their low 32 bit cpos, 1256 * So just try the middle pos first, and we will exit when we find 1257 * the good position. 1258 */ 1259 static int ocfs2_find_refcount_split_pos(struct ocfs2_refcount_list *rl, 1260 u32 *split_pos, int *split_index) 1261 { 1262 int num_used = le16_to_cpu(rl->rl_used); 1263 int delta, middle = num_used / 2; 1264 1265 for (delta = 0; delta < middle; delta++) { 1266 /* Let's check delta earlier than middle */ 1267 if (ocfs2_refcount_rec_no_intersect( 1268 &rl->rl_recs[middle - delta - 1], 1269 &rl->rl_recs[middle - delta])) { 1270 *split_index = middle - delta; 1271 break; 1272 } 1273 1274 /* For even counts, don't walk off the end */ 1275 if ((middle + delta + 1) == num_used) 1276 continue; 1277 1278 /* Now try delta past middle */ 1279 if (ocfs2_refcount_rec_no_intersect( 1280 &rl->rl_recs[middle + delta], 1281 &rl->rl_recs[middle + delta + 1])) { 1282 *split_index = middle + delta + 1; 1283 break; 1284 } 1285 } 1286 1287 if (delta >= middle) 1288 return -ENOSPC; 1289 1290 *split_pos = ocfs2_get_ref_rec_low_cpos(&rl->rl_recs[*split_index]); 1291 return 0; 1292 } 1293 1294 static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh, 1295 struct buffer_head *new_bh, 1296 u32 *split_cpos) 1297 { 1298 int split_index = 0, num_moved, ret; 1299 u32 cpos = 0; 1300 struct ocfs2_refcount_block *rb = 1301 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1302 struct ocfs2_refcount_list *rl = &rb->rf_records; 1303 struct ocfs2_refcount_block *new_rb = 1304 (struct ocfs2_refcount_block *)new_bh->b_data; 1305 struct ocfs2_refcount_list *new_rl = &new_rb->rf_records; 1306 1307 mlog(0, "split old leaf refcount block %llu, count = %u, used = %u\n", 1308 (unsigned long long)ref_leaf_bh->b_blocknr, 1309 le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used)); 1310 1311 /* 1312 * XXX: Improvement later. 1313 * If we know all the high 32 bit cpos is the same, no need to sort. 1314 * 1315 * In order to make the whole process safe, we do: 1316 * 1. sort the entries by their low 32 bit cpos first so that we can 1317 * find the split cpos easily. 1318 * 2. call ocfs2_insert_extent to insert the new refcount block. 1319 * 3. move the refcount rec to the new block. 1320 * 4. sort the entries by their 64 bit cpos. 1321 * 5. dirty the new_rb and rb. 1322 */ 1323 sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), 1324 sizeof(struct ocfs2_refcount_rec), 1325 cmp_refcount_rec_by_low_cpos, swap_refcount_rec); 1326 1327 ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index); 1328 if (ret) { 1329 mlog_errno(ret); 1330 return ret; 1331 } 1332 1333 new_rb->rf_cpos = cpu_to_le32(cpos); 1334 1335 /* move refcount records starting from split_index to the new block. */ 1336 num_moved = le16_to_cpu(rl->rl_used) - split_index; 1337 memcpy(new_rl->rl_recs, &rl->rl_recs[split_index], 1338 num_moved * sizeof(struct ocfs2_refcount_rec)); 1339 1340 /*ok, remove the entries we just moved over to the other block. */ 1341 memset(&rl->rl_recs[split_index], 0, 1342 num_moved * sizeof(struct ocfs2_refcount_rec)); 1343 1344 /* change old and new rl_used accordingly. */ 1345 le16_add_cpu(&rl->rl_used, -num_moved); 1346 new_rl->rl_used = cpu_to_le32(num_moved); 1347 1348 sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), 1349 sizeof(struct ocfs2_refcount_rec), 1350 cmp_refcount_rec_by_cpos, swap_refcount_rec); 1351 1352 sort(&new_rl->rl_recs, le16_to_cpu(new_rl->rl_used), 1353 sizeof(struct ocfs2_refcount_rec), 1354 cmp_refcount_rec_by_cpos, swap_refcount_rec); 1355 1356 *split_cpos = cpos; 1357 return 0; 1358 } 1359 1360 static int ocfs2_new_leaf_refcount_block(handle_t *handle, 1361 struct ocfs2_caching_info *ci, 1362 struct buffer_head *ref_root_bh, 1363 struct buffer_head *ref_leaf_bh, 1364 struct ocfs2_alloc_context *meta_ac) 1365 { 1366 int ret; 1367 u16 suballoc_bit_start; 1368 u32 num_got, new_cpos; 1369 u64 blkno; 1370 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1371 struct ocfs2_refcount_block *root_rb = 1372 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 1373 struct buffer_head *new_bh = NULL; 1374 struct ocfs2_refcount_block *new_rb; 1375 struct ocfs2_extent_tree ref_et; 1376 1377 BUG_ON(!(le32_to_cpu(root_rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)); 1378 1379 ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh, 1380 OCFS2_JOURNAL_ACCESS_WRITE); 1381 if (ret) { 1382 mlog_errno(ret); 1383 goto out; 1384 } 1385 1386 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, 1387 OCFS2_JOURNAL_ACCESS_WRITE); 1388 if (ret) { 1389 mlog_errno(ret); 1390 goto out; 1391 } 1392 1393 ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, 1394 &suballoc_bit_start, &num_got, 1395 &blkno); 1396 if (ret) { 1397 mlog_errno(ret); 1398 goto out; 1399 } 1400 1401 new_bh = sb_getblk(sb, blkno); 1402 if (new_bh == NULL) { 1403 ret = -EIO; 1404 mlog_errno(ret); 1405 goto out; 1406 } 1407 ocfs2_set_new_buffer_uptodate(ci, new_bh); 1408 1409 ret = ocfs2_journal_access_rb(handle, ci, new_bh, 1410 OCFS2_JOURNAL_ACCESS_CREATE); 1411 if (ret) { 1412 mlog_errno(ret); 1413 goto out; 1414 } 1415 1416 /* Initialize ocfs2_refcount_block. */ 1417 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; 1418 memset(new_rb, 0, sb->s_blocksize); 1419 strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); 1420 new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); 1421 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1422 new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); 1423 new_rb->rf_blkno = cpu_to_le64(blkno); 1424 new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr); 1425 new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL); 1426 new_rb->rf_records.rl_count = 1427 cpu_to_le16(ocfs2_refcount_recs_per_rb(sb)); 1428 new_rb->rf_generation = root_rb->rf_generation; 1429 1430 ret = ocfs2_divide_leaf_refcount_block(ref_leaf_bh, new_bh, &new_cpos); 1431 if (ret) { 1432 mlog_errno(ret); 1433 goto out; 1434 } 1435 1436 ocfs2_journal_dirty(handle, ref_leaf_bh); 1437 ocfs2_journal_dirty(handle, new_bh); 1438 1439 ocfs2_init_refcount_extent_tree(&ref_et, ci, ref_root_bh); 1440 1441 mlog(0, "insert new leaf block %llu at %u\n", 1442 (unsigned long long)new_bh->b_blocknr, new_cpos); 1443 1444 /* Insert the new leaf block with the specific offset cpos. */ 1445 ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr, 1446 1, 0, meta_ac); 1447 if (ret) 1448 mlog_errno(ret); 1449 1450 out: 1451 brelse(new_bh); 1452 return ret; 1453 } 1454 1455 static int ocfs2_expand_refcount_tree(handle_t *handle, 1456 struct ocfs2_caching_info *ci, 1457 struct buffer_head *ref_root_bh, 1458 struct buffer_head *ref_leaf_bh, 1459 struct ocfs2_alloc_context *meta_ac) 1460 { 1461 int ret; 1462 struct buffer_head *expand_bh = NULL; 1463 1464 if (ref_root_bh == ref_leaf_bh) { 1465 /* 1466 * the old root bh hasn't been expanded to a b-tree, 1467 * so expand it first. 1468 */ 1469 ret = ocfs2_expand_inline_ref_root(handle, ci, ref_root_bh, 1470 &expand_bh, meta_ac); 1471 if (ret) { 1472 mlog_errno(ret); 1473 goto out; 1474 } 1475 } else { 1476 expand_bh = ref_leaf_bh; 1477 get_bh(expand_bh); 1478 } 1479 1480 1481 /* Now add a new refcount block into the tree.*/ 1482 ret = ocfs2_new_leaf_refcount_block(handle, ci, ref_root_bh, 1483 expand_bh, meta_ac); 1484 if (ret) 1485 mlog_errno(ret); 1486 out: 1487 brelse(expand_bh); 1488 return ret; 1489 } 1490 1491 /* 1492 * Adjust the extent rec in b-tree representing ref_leaf_bh. 1493 * 1494 * Only called when we have inserted a new refcount rec at index 0 1495 * which means ocfs2_extent_rec.e_cpos may need some change. 1496 */ 1497 static int ocfs2_adjust_refcount_rec(handle_t *handle, 1498 struct ocfs2_caching_info *ci, 1499 struct buffer_head *ref_root_bh, 1500 struct buffer_head *ref_leaf_bh, 1501 struct ocfs2_refcount_rec *rec) 1502 { 1503 int ret = 0, i; 1504 u32 new_cpos, old_cpos; 1505 struct ocfs2_path *path = NULL; 1506 struct ocfs2_extent_tree et; 1507 struct ocfs2_refcount_block *rb = 1508 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 1509 struct ocfs2_extent_list *el; 1510 1511 if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) 1512 goto out; 1513 1514 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1515 old_cpos = le32_to_cpu(rb->rf_cpos); 1516 new_cpos = le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK; 1517 if (old_cpos <= new_cpos) 1518 goto out; 1519 1520 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); 1521 1522 path = ocfs2_new_path_from_et(&et); 1523 if (!path) { 1524 ret = -ENOMEM; 1525 mlog_errno(ret); 1526 goto out; 1527 } 1528 1529 ret = ocfs2_find_path(ci, path, old_cpos); 1530 if (ret) { 1531 mlog_errno(ret); 1532 goto out; 1533 } 1534 1535 /* 1536 * 2 more credits, one for the leaf refcount block, one for 1537 * the extent block contains the extent rec. 1538 */ 1539 ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2); 1540 if (ret < 0) { 1541 mlog_errno(ret); 1542 goto out; 1543 } 1544 1545 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, 1546 OCFS2_JOURNAL_ACCESS_WRITE); 1547 if (ret < 0) { 1548 mlog_errno(ret); 1549 goto out; 1550 } 1551 1552 ret = ocfs2_journal_access_eb(handle, ci, path_leaf_bh(path), 1553 OCFS2_JOURNAL_ACCESS_WRITE); 1554 if (ret < 0) { 1555 mlog_errno(ret); 1556 goto out; 1557 } 1558 1559 /* change the leaf extent block first. */ 1560 el = path_leaf_el(path); 1561 1562 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) 1563 if (le32_to_cpu(el->l_recs[i].e_cpos) == old_cpos) 1564 break; 1565 1566 BUG_ON(i == le16_to_cpu(el->l_next_free_rec)); 1567 1568 el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); 1569 1570 /* change the r_cpos in the leaf block. */ 1571 rb->rf_cpos = cpu_to_le32(new_cpos); 1572 1573 ocfs2_journal_dirty(handle, path_leaf_bh(path)); 1574 ocfs2_journal_dirty(handle, ref_leaf_bh); 1575 1576 out: 1577 ocfs2_free_path(path); 1578 return ret; 1579 } 1580 1581 static int ocfs2_insert_refcount_rec(handle_t *handle, 1582 struct ocfs2_caching_info *ci, 1583 struct buffer_head *ref_root_bh, 1584 struct buffer_head *ref_leaf_bh, 1585 struct ocfs2_refcount_rec *rec, 1586 int index, 1587 struct ocfs2_alloc_context *meta_ac) 1588 { 1589 int ret; 1590 struct ocfs2_refcount_block *rb = 1591 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1592 struct ocfs2_refcount_list *rf_list = &rb->rf_records; 1593 struct buffer_head *new_bh = NULL; 1594 1595 BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL); 1596 1597 if (rf_list->rl_used == rf_list->rl_count) { 1598 u64 cpos = le64_to_cpu(rec->r_cpos); 1599 u32 len = le32_to_cpu(rec->r_clusters); 1600 1601 ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh, 1602 ref_leaf_bh, meta_ac); 1603 if (ret) { 1604 mlog_errno(ret); 1605 goto out; 1606 } 1607 1608 ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 1609 cpos, len, NULL, &index, 1610 &new_bh); 1611 if (ret) { 1612 mlog_errno(ret); 1613 goto out; 1614 } 1615 1616 ref_leaf_bh = new_bh; 1617 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1618 rf_list = &rb->rf_records; 1619 } 1620 1621 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, 1622 OCFS2_JOURNAL_ACCESS_WRITE); 1623 if (ret) { 1624 mlog_errno(ret); 1625 goto out; 1626 } 1627 1628 if (index < le16_to_cpu(rf_list->rl_used)) 1629 memmove(&rf_list->rl_recs[index + 1], 1630 &rf_list->rl_recs[index], 1631 (le16_to_cpu(rf_list->rl_used) - index) * 1632 sizeof(struct ocfs2_refcount_rec)); 1633 1634 mlog(0, "insert refcount record start %llu, len %u, count %u " 1635 "to leaf block %llu at index %d\n", 1636 (unsigned long long)le64_to_cpu(rec->r_cpos), 1637 le32_to_cpu(rec->r_clusters), le32_to_cpu(rec->r_refcount), 1638 (unsigned long long)ref_leaf_bh->b_blocknr, index); 1639 1640 rf_list->rl_recs[index] = *rec; 1641 1642 le16_add_cpu(&rf_list->rl_used, 1); 1643 1644 ocfs2_refcount_rec_merge(rb, index); 1645 1646 ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1647 if (ret) { 1648 mlog_errno(ret); 1649 goto out; 1650 } 1651 1652 if (index == 0) { 1653 ret = ocfs2_adjust_refcount_rec(handle, ci, 1654 ref_root_bh, 1655 ref_leaf_bh, rec); 1656 if (ret) 1657 mlog_errno(ret); 1658 } 1659 out: 1660 brelse(new_bh); 1661 return ret; 1662 } 1663 1664 /* 1665 * Split the refcount_rec indexed by "index" in ref_leaf_bh. 1666 * This is much simple than our b-tree code. 1667 * split_rec is the new refcount rec we want to insert. 1668 * If split_rec->r_refcount > 0, we are changing the refcount(in case we 1669 * increase refcount or decrease a refcount to non-zero). 1670 * If split_rec->r_refcount == 0, we are punching a hole in current refcount 1671 * rec( in case we decrease a refcount to zero). 1672 */ 1673 static int ocfs2_split_refcount_rec(handle_t *handle, 1674 struct ocfs2_caching_info *ci, 1675 struct buffer_head *ref_root_bh, 1676 struct buffer_head *ref_leaf_bh, 1677 struct ocfs2_refcount_rec *split_rec, 1678 int index, 1679 struct ocfs2_alloc_context *meta_ac, 1680 struct ocfs2_cached_dealloc_ctxt *dealloc) 1681 { 1682 int ret, recs_need; 1683 u32 len; 1684 struct ocfs2_refcount_block *rb = 1685 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1686 struct ocfs2_refcount_list *rf_list = &rb->rf_records; 1687 struct ocfs2_refcount_rec *orig_rec = &rf_list->rl_recs[index]; 1688 struct ocfs2_refcount_rec *tail_rec = NULL; 1689 struct buffer_head *new_bh = NULL; 1690 1691 BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL); 1692 1693 mlog(0, "original r_pos %llu, cluster %u, split %llu, cluster %u\n", 1694 le64_to_cpu(orig_rec->r_cpos), le32_to_cpu(orig_rec->r_clusters), 1695 le64_to_cpu(split_rec->r_cpos), 1696 le32_to_cpu(split_rec->r_clusters)); 1697 1698 /* 1699 * If we just need to split the header or tail clusters, 1700 * no more recs are needed, just split is OK. 1701 * Otherwise we at least need one new recs. 1702 */ 1703 if (!split_rec->r_refcount && 1704 (split_rec->r_cpos == orig_rec->r_cpos || 1705 le64_to_cpu(split_rec->r_cpos) + 1706 le32_to_cpu(split_rec->r_clusters) == 1707 le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters))) 1708 recs_need = 0; 1709 else 1710 recs_need = 1; 1711 1712 /* 1713 * We need one more rec if we split in the middle and the new rec have 1714 * some refcount in it. 1715 */ 1716 if (split_rec->r_refcount && 1717 (split_rec->r_cpos != orig_rec->r_cpos && 1718 le64_to_cpu(split_rec->r_cpos) + 1719 le32_to_cpu(split_rec->r_clusters) != 1720 le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters))) 1721 recs_need++; 1722 1723 /* If the leaf block don't have enough record, expand it. */ 1724 if (le16_to_cpu(rf_list->rl_used) + recs_need > rf_list->rl_count) { 1725 struct ocfs2_refcount_rec tmp_rec; 1726 u64 cpos = le64_to_cpu(orig_rec->r_cpos); 1727 len = le32_to_cpu(orig_rec->r_clusters); 1728 ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh, 1729 ref_leaf_bh, meta_ac); 1730 if (ret) { 1731 mlog_errno(ret); 1732 goto out; 1733 } 1734 1735 /* 1736 * We have to re-get it since now cpos may be moved to 1737 * another leaf block. 1738 */ 1739 ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 1740 cpos, len, &tmp_rec, &index, 1741 &new_bh); 1742 if (ret) { 1743 mlog_errno(ret); 1744 goto out; 1745 } 1746 1747 ref_leaf_bh = new_bh; 1748 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1749 rf_list = &rb->rf_records; 1750 orig_rec = &rf_list->rl_recs[index]; 1751 } 1752 1753 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, 1754 OCFS2_JOURNAL_ACCESS_WRITE); 1755 if (ret) { 1756 mlog_errno(ret); 1757 goto out; 1758 } 1759 1760 /* 1761 * We have calculated out how many new records we need and store 1762 * in recs_need, so spare enough space first by moving the records 1763 * after "index" to the end. 1764 */ 1765 if (index != le16_to_cpu(rf_list->rl_used) - 1) 1766 memmove(&rf_list->rl_recs[index + 1 + recs_need], 1767 &rf_list->rl_recs[index + 1], 1768 (le16_to_cpu(rf_list->rl_used) - index - 1) * 1769 sizeof(struct ocfs2_refcount_rec)); 1770 1771 len = (le64_to_cpu(orig_rec->r_cpos) + 1772 le32_to_cpu(orig_rec->r_clusters)) - 1773 (le64_to_cpu(split_rec->r_cpos) + 1774 le32_to_cpu(split_rec->r_clusters)); 1775 1776 /* 1777 * If we have "len", the we will split in the tail and move it 1778 * to the end of the space we have just spared. 1779 */ 1780 if (len) { 1781 tail_rec = &rf_list->rl_recs[index + recs_need]; 1782 1783 memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec)); 1784 le64_add_cpu(&tail_rec->r_cpos, 1785 le32_to_cpu(tail_rec->r_clusters) - len); 1786 tail_rec->r_clusters = le32_to_cpu(len); 1787 } 1788 1789 /* 1790 * If the split pos isn't the same as the original one, we need to 1791 * split in the head. 1792 * 1793 * Note: We have the chance that split_rec.r_refcount = 0, 1794 * recs_need = 0 and len > 0, which means we just cut the head from 1795 * the orig_rec and in that case we have done some modification in 1796 * orig_rec above, so the check for r_cpos is faked. 1797 */ 1798 if (split_rec->r_cpos != orig_rec->r_cpos && tail_rec != orig_rec) { 1799 len = le64_to_cpu(split_rec->r_cpos) - 1800 le64_to_cpu(orig_rec->r_cpos); 1801 orig_rec->r_clusters = cpu_to_le32(len); 1802 index++; 1803 } 1804 1805 le16_add_cpu(&rf_list->rl_used, recs_need); 1806 1807 if (split_rec->r_refcount) { 1808 rf_list->rl_recs[index] = *split_rec; 1809 mlog(0, "insert refcount record start %llu, len %u, count %u " 1810 "to leaf block %llu at index %d\n", 1811 (unsigned long long)le64_to_cpu(split_rec->r_cpos), 1812 le32_to_cpu(split_rec->r_clusters), 1813 le32_to_cpu(split_rec->r_refcount), 1814 (unsigned long long)ref_leaf_bh->b_blocknr, index); 1815 1816 ocfs2_refcount_rec_merge(rb, index); 1817 } 1818 1819 ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1820 if (ret) 1821 mlog_errno(ret); 1822 1823 out: 1824 brelse(new_bh); 1825 return ret; 1826 } 1827 1828 static int __ocfs2_increase_refcount(handle_t *handle, 1829 struct ocfs2_caching_info *ci, 1830 struct buffer_head *ref_root_bh, 1831 u64 cpos, u32 len, 1832 struct ocfs2_alloc_context *meta_ac, 1833 struct ocfs2_cached_dealloc_ctxt *dealloc) 1834 { 1835 int ret = 0, index; 1836 struct buffer_head *ref_leaf_bh = NULL; 1837 struct ocfs2_refcount_rec rec; 1838 unsigned int set_len = 0; 1839 1840 mlog(0, "Tree owner %llu, add refcount start %llu, len %u\n", 1841 (unsigned long long)ocfs2_metadata_cache_owner(ci), 1842 (unsigned long long)cpos, len); 1843 1844 while (len) { 1845 ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 1846 cpos, len, &rec, &index, 1847 &ref_leaf_bh); 1848 if (ret) { 1849 mlog_errno(ret); 1850 goto out; 1851 } 1852 1853 set_len = le32_to_cpu(rec.r_clusters); 1854 1855 /* 1856 * Here we may meet with 3 situations: 1857 * 1858 * 1. If we find an already existing record, and the length 1859 * is the same, cool, we just need to increase the r_refcount 1860 * and it is OK. 1861 * 2. If we find a hole, just insert it with r_refcount = 1. 1862 * 3. If we are in the middle of one extent record, split 1863 * it. 1864 */ 1865 if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos && 1866 set_len <= len) { 1867 mlog(0, "increase refcount rec, start %llu, len %u, " 1868 "count %u\n", (unsigned long long)cpos, set_len, 1869 le32_to_cpu(rec.r_refcount)); 1870 ret = ocfs2_change_refcount_rec(handle, ci, 1871 ref_leaf_bh, index, 1); 1872 if (ret) { 1873 mlog_errno(ret); 1874 goto out; 1875 } 1876 } else if (!rec.r_refcount) { 1877 rec.r_refcount = cpu_to_le32(1); 1878 1879 mlog(0, "insert refcount rec, start %llu, len %u\n", 1880 (unsigned long long)le64_to_cpu(rec.r_cpos), 1881 set_len); 1882 ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh, 1883 ref_leaf_bh, 1884 &rec, index, meta_ac); 1885 if (ret) { 1886 mlog_errno(ret); 1887 goto out; 1888 } 1889 } else { 1890 set_len = min((u64)(cpos + len), 1891 le64_to_cpu(rec.r_cpos) + set_len) - cpos; 1892 rec.r_cpos = cpu_to_le64(cpos); 1893 rec.r_clusters = cpu_to_le32(set_len); 1894 le32_add_cpu(&rec.r_refcount, 1); 1895 1896 mlog(0, "split refcount rec, start %llu, " 1897 "len %u, count %u\n", 1898 (unsigned long long)le64_to_cpu(rec.r_cpos), 1899 set_len, le32_to_cpu(rec.r_refcount)); 1900 ret = ocfs2_split_refcount_rec(handle, ci, 1901 ref_root_bh, ref_leaf_bh, 1902 &rec, index, 1903 meta_ac, dealloc); 1904 if (ret) { 1905 mlog_errno(ret); 1906 goto out; 1907 } 1908 } 1909 1910 cpos += set_len; 1911 len -= set_len; 1912 brelse(ref_leaf_bh); 1913 ref_leaf_bh = NULL; 1914 } 1915 1916 out: 1917 brelse(ref_leaf_bh); 1918 return ret; 1919 } 1920 1921 static int ocfs2_remove_refcount_extent(handle_t *handle, 1922 struct ocfs2_caching_info *ci, 1923 struct buffer_head *ref_root_bh, 1924 struct buffer_head *ref_leaf_bh, 1925 struct ocfs2_alloc_context *meta_ac, 1926 struct ocfs2_cached_dealloc_ctxt *dealloc) 1927 { 1928 int ret; 1929 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1930 struct ocfs2_refcount_block *rb = 1931 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1932 struct ocfs2_extent_tree et; 1933 1934 BUG_ON(rb->rf_records.rl_used); 1935 1936 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); 1937 ret = ocfs2_remove_extent(handle, &et, le32_to_cpu(rb->rf_cpos), 1938 1, meta_ac, dealloc); 1939 if (ret) { 1940 mlog_errno(ret); 1941 goto out; 1942 } 1943 1944 ocfs2_remove_from_cache(ci, ref_leaf_bh); 1945 1946 /* 1947 * add the freed block to the dealloc so that it will be freed 1948 * when we run dealloc. 1949 */ 1950 ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE, 1951 le16_to_cpu(rb->rf_suballoc_slot), 1952 le64_to_cpu(rb->rf_blkno), 1953 le16_to_cpu(rb->rf_suballoc_bit)); 1954 if (ret) { 1955 mlog_errno(ret); 1956 goto out; 1957 } 1958 1959 ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh, 1960 OCFS2_JOURNAL_ACCESS_WRITE); 1961 if (ret) { 1962 mlog_errno(ret); 1963 goto out; 1964 } 1965 1966 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 1967 1968 le32_add_cpu(&rb->rf_clusters, -1); 1969 1970 /* 1971 * check whether we need to restore the root refcount block if 1972 * there is no leaf extent block at atll. 1973 */ 1974 if (!rb->rf_list.l_next_free_rec) { 1975 BUG_ON(rb->rf_clusters); 1976 1977 mlog(0, "reset refcount tree root %llu to be a record block.\n", 1978 (unsigned long long)ref_root_bh->b_blocknr); 1979 1980 rb->rf_flags = 0; 1981 rb->rf_parent = 0; 1982 rb->rf_cpos = 0; 1983 memset(&rb->rf_records, 0, sb->s_blocksize - 1984 offsetof(struct ocfs2_refcount_block, rf_records)); 1985 rb->rf_records.rl_count = 1986 cpu_to_le16(ocfs2_refcount_recs_per_rb(sb)); 1987 } 1988 1989 ocfs2_journal_dirty(handle, ref_root_bh); 1990 1991 out: 1992 return ret; 1993 } 1994 1995 static int ocfs2_decrease_refcount_rec(handle_t *handle, 1996 struct ocfs2_caching_info *ci, 1997 struct buffer_head *ref_root_bh, 1998 struct buffer_head *ref_leaf_bh, 1999 int index, u64 cpos, unsigned int len, 2000 struct ocfs2_alloc_context *meta_ac, 2001 struct ocfs2_cached_dealloc_ctxt *dealloc) 2002 { 2003 int ret; 2004 struct ocfs2_refcount_block *rb = 2005 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 2006 struct ocfs2_refcount_rec *rec = &rb->rf_records.rl_recs[index]; 2007 2008 BUG_ON(cpos < le64_to_cpu(rec->r_cpos)); 2009 BUG_ON(cpos + len > 2010 le64_to_cpu(rec->r_cpos) + le32_to_cpu(rec->r_clusters)); 2011 2012 if (cpos == le64_to_cpu(rec->r_cpos) && 2013 len == le32_to_cpu(rec->r_clusters)) 2014 ret = ocfs2_change_refcount_rec(handle, ci, 2015 ref_leaf_bh, index, -1); 2016 else { 2017 struct ocfs2_refcount_rec split = *rec; 2018 split.r_cpos = cpu_to_le64(cpos); 2019 split.r_clusters = cpu_to_le32(len); 2020 2021 le32_add_cpu(&split.r_refcount, -1); 2022 2023 mlog(0, "split refcount rec, start %llu, " 2024 "len %u, count %u, original start %llu, len %u\n", 2025 (unsigned long long)le64_to_cpu(split.r_cpos), 2026 len, le32_to_cpu(split.r_refcount), 2027 (unsigned long long)le64_to_cpu(rec->r_cpos), 2028 le32_to_cpu(rec->r_clusters)); 2029 ret = ocfs2_split_refcount_rec(handle, ci, 2030 ref_root_bh, ref_leaf_bh, 2031 &split, index, 2032 meta_ac, dealloc); 2033 } 2034 2035 if (ret) { 2036 mlog_errno(ret); 2037 goto out; 2038 } 2039 2040 /* Remove the leaf refcount block if it contains no refcount record. */ 2041 if (!rb->rf_records.rl_used && ref_leaf_bh != ref_root_bh) { 2042 ret = ocfs2_remove_refcount_extent(handle, ci, ref_root_bh, 2043 ref_leaf_bh, meta_ac, 2044 dealloc); 2045 if (ret) 2046 mlog_errno(ret); 2047 } 2048 2049 out: 2050 return ret; 2051 } 2052 2053 static int __ocfs2_decrease_refcount(handle_t *handle, 2054 struct ocfs2_caching_info *ci, 2055 struct buffer_head *ref_root_bh, 2056 u64 cpos, u32 len, 2057 struct ocfs2_alloc_context *meta_ac, 2058 struct ocfs2_cached_dealloc_ctxt *dealloc) 2059 { 2060 int ret = 0, index = 0; 2061 struct ocfs2_refcount_rec rec; 2062 unsigned int r_count = 0, r_len; 2063 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 2064 struct buffer_head *ref_leaf_bh = NULL; 2065 2066 mlog(0, "Tree owner %llu, decrease refcount start %llu, len %u\n", 2067 (unsigned long long)ocfs2_metadata_cache_owner(ci), 2068 (unsigned long long)cpos, len); 2069 2070 while (len) { 2071 ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 2072 cpos, len, &rec, &index, 2073 &ref_leaf_bh); 2074 if (ret) { 2075 mlog_errno(ret); 2076 goto out; 2077 } 2078 2079 r_count = le32_to_cpu(rec.r_refcount); 2080 BUG_ON(r_count == 0); 2081 2082 r_len = min((u64)(cpos + len), le64_to_cpu(rec.r_cpos) + 2083 le32_to_cpu(rec.r_clusters)) - cpos; 2084 2085 ret = ocfs2_decrease_refcount_rec(handle, ci, ref_root_bh, 2086 ref_leaf_bh, index, 2087 cpos, r_len, 2088 meta_ac, dealloc); 2089 if (ret) { 2090 mlog_errno(ret); 2091 goto out; 2092 } 2093 2094 if (le32_to_cpu(rec.r_refcount) == 1) { 2095 ret = ocfs2_cache_cluster_dealloc(dealloc, 2096 ocfs2_clusters_to_blocks(sb, cpos), 2097 r_len); 2098 if (ret) { 2099 mlog_errno(ret); 2100 goto out; 2101 } 2102 } 2103 2104 cpos += r_len; 2105 len -= r_len; 2106 brelse(ref_leaf_bh); 2107 ref_leaf_bh = NULL; 2108 } 2109 2110 out: 2111 brelse(ref_leaf_bh); 2112 return ret; 2113 } 2114 2115 /* Caller must hold refcount tree lock. */ 2116 int ocfs2_decrease_refcount(struct inode *inode, 2117 handle_t *handle, u32 cpos, u32 len, 2118 struct ocfs2_alloc_context *meta_ac, 2119 struct ocfs2_cached_dealloc_ctxt *dealloc) 2120 { 2121 int ret; 2122 u64 ref_blkno; 2123 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2124 struct buffer_head *ref_root_bh = NULL; 2125 struct ocfs2_refcount_tree *tree; 2126 2127 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); 2128 2129 ret = ocfs2_get_refcount_block(inode, &ref_blkno); 2130 if (ret) { 2131 mlog_errno(ret); 2132 goto out; 2133 } 2134 2135 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, &tree); 2136 if (ret) { 2137 mlog_errno(ret); 2138 goto out; 2139 } 2140 2141 ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno, 2142 &ref_root_bh); 2143 if (ret) { 2144 mlog_errno(ret); 2145 goto out; 2146 } 2147 2148 ret = __ocfs2_decrease_refcount(handle, &tree->rf_ci, ref_root_bh, 2149 cpos, len, meta_ac, dealloc); 2150 if (ret) 2151 mlog_errno(ret); 2152 out: 2153 brelse(ref_root_bh); 2154 return ret; 2155 } 2156