1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * refcounttree.c 5 * 6 * Copyright (C) 2009 Oracle. All rights reserved. 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public 10 * License version 2 as published by the Free Software Foundation. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 */ 17 18 #include <linux/sort.h> 19 #define MLOG_MASK_PREFIX ML_REFCOUNT 20 #include <cluster/masklog.h> 21 #include "ocfs2.h" 22 #include "inode.h" 23 #include "alloc.h" 24 #include "suballoc.h" 25 #include "journal.h" 26 #include "uptodate.h" 27 #include "super.h" 28 #include "buffer_head_io.h" 29 #include "blockcheck.h" 30 #include "refcounttree.h" 31 #include "sysfile.h" 32 #include "dlmglue.h" 33 #include "extent_map.h" 34 #include "aops.h" 35 #include "xattr.h" 36 37 #include <linux/bio.h> 38 #include <linux/blkdev.h> 39 #include <linux/gfp.h> 40 #include <linux/slab.h> 41 #include <linux/writeback.h> 42 #include <linux/pagevec.h> 43 #include <linux/swap.h> 44 45 struct ocfs2_cow_context { 46 struct inode *inode; 47 u32 cow_start; 48 u32 cow_len; 49 struct ocfs2_extent_tree data_et; 50 struct ocfs2_refcount_tree *ref_tree; 51 struct buffer_head *ref_root_bh; 52 struct ocfs2_alloc_context *meta_ac; 53 struct ocfs2_alloc_context *data_ac; 54 struct ocfs2_cached_dealloc_ctxt dealloc; 55 void *cow_object; 56 struct ocfs2_post_refcount *post_refcount; 57 int extra_credits; 58 int (*get_clusters)(struct ocfs2_cow_context *context, 59 u32 v_cluster, u32 *p_cluster, 60 u32 *num_clusters, 61 unsigned int *extent_flags); 62 int (*cow_duplicate_clusters)(handle_t *handle, 63 struct ocfs2_cow_context *context, 64 u32 cpos, u32 old_cluster, 65 u32 new_cluster, u32 new_len); 66 }; 67 68 static inline struct ocfs2_refcount_tree * 69 cache_info_to_refcount(struct ocfs2_caching_info *ci) 70 { 71 return container_of(ci, struct ocfs2_refcount_tree, rf_ci); 72 } 73 74 static int ocfs2_validate_refcount_block(struct super_block *sb, 75 struct buffer_head *bh) 76 { 77 int rc; 78 struct ocfs2_refcount_block *rb = 79 (struct ocfs2_refcount_block *)bh->b_data; 80 81 mlog(0, "Validating refcount block %llu\n", 82 (unsigned long long)bh->b_blocknr); 83 84 BUG_ON(!buffer_uptodate(bh)); 85 86 /* 87 * If the ecc fails, we return the error but otherwise 88 * leave the filesystem running. We know any error is 89 * local to this block. 90 */ 91 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check); 92 if (rc) { 93 mlog(ML_ERROR, "Checksum failed for refcount block %llu\n", 94 (unsigned long long)bh->b_blocknr); 95 return rc; 96 } 97 98 99 if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) { 100 ocfs2_error(sb, 101 "Refcount block #%llu has bad signature %.*s", 102 (unsigned long long)bh->b_blocknr, 7, 103 rb->rf_signature); 104 return -EINVAL; 105 } 106 107 if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) { 108 ocfs2_error(sb, 109 "Refcount block #%llu has an invalid rf_blkno " 110 "of %llu", 111 (unsigned long long)bh->b_blocknr, 112 (unsigned long long)le64_to_cpu(rb->rf_blkno)); 113 return -EINVAL; 114 } 115 116 if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) { 117 ocfs2_error(sb, 118 "Refcount block #%llu has an invalid " 119 "rf_fs_generation of #%u", 120 (unsigned long long)bh->b_blocknr, 121 le32_to_cpu(rb->rf_fs_generation)); 122 return -EINVAL; 123 } 124 125 return 0; 126 } 127 128 static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci, 129 u64 rb_blkno, 130 struct buffer_head **bh) 131 { 132 int rc; 133 struct buffer_head *tmp = *bh; 134 135 rc = ocfs2_read_block(ci, rb_blkno, &tmp, 136 ocfs2_validate_refcount_block); 137 138 /* If ocfs2_read_block() got us a new bh, pass it up. */ 139 if (!rc && !*bh) 140 *bh = tmp; 141 142 return rc; 143 } 144 145 static u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci) 146 { 147 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 148 149 return rf->rf_blkno; 150 } 151 152 static struct super_block * 153 ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci) 154 { 155 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 156 157 return rf->rf_sb; 158 } 159 160 static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci) 161 { 162 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 163 164 spin_lock(&rf->rf_lock); 165 } 166 167 static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci) 168 { 169 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 170 171 spin_unlock(&rf->rf_lock); 172 } 173 174 static void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci) 175 { 176 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 177 178 mutex_lock(&rf->rf_io_mutex); 179 } 180 181 static void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci) 182 { 183 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 184 185 mutex_unlock(&rf->rf_io_mutex); 186 } 187 188 static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = { 189 .co_owner = ocfs2_refcount_cache_owner, 190 .co_get_super = ocfs2_refcount_cache_get_super, 191 .co_cache_lock = ocfs2_refcount_cache_lock, 192 .co_cache_unlock = ocfs2_refcount_cache_unlock, 193 .co_io_lock = ocfs2_refcount_cache_io_lock, 194 .co_io_unlock = ocfs2_refcount_cache_io_unlock, 195 }; 196 197 static struct ocfs2_refcount_tree * 198 ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno) 199 { 200 struct rb_node *n = osb->osb_rf_lock_tree.rb_node; 201 struct ocfs2_refcount_tree *tree = NULL; 202 203 while (n) { 204 tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node); 205 206 if (blkno < tree->rf_blkno) 207 n = n->rb_left; 208 else if (blkno > tree->rf_blkno) 209 n = n->rb_right; 210 else 211 return tree; 212 } 213 214 return NULL; 215 } 216 217 /* osb_lock is already locked. */ 218 static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb, 219 struct ocfs2_refcount_tree *new) 220 { 221 u64 rf_blkno = new->rf_blkno; 222 struct rb_node *parent = NULL; 223 struct rb_node **p = &osb->osb_rf_lock_tree.rb_node; 224 struct ocfs2_refcount_tree *tmp; 225 226 while (*p) { 227 parent = *p; 228 229 tmp = rb_entry(parent, struct ocfs2_refcount_tree, 230 rf_node); 231 232 if (rf_blkno < tmp->rf_blkno) 233 p = &(*p)->rb_left; 234 else if (rf_blkno > tmp->rf_blkno) 235 p = &(*p)->rb_right; 236 else { 237 /* This should never happen! */ 238 mlog(ML_ERROR, "Duplicate refcount block %llu found!\n", 239 (unsigned long long)rf_blkno); 240 BUG(); 241 } 242 } 243 244 rb_link_node(&new->rf_node, parent, p); 245 rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree); 246 } 247 248 static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree) 249 { 250 ocfs2_metadata_cache_exit(&tree->rf_ci); 251 ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres); 252 ocfs2_lock_res_free(&tree->rf_lockres); 253 kfree(tree); 254 } 255 256 static inline void 257 ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb, 258 struct ocfs2_refcount_tree *tree) 259 { 260 rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree); 261 if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree) 262 osb->osb_ref_tree_lru = NULL; 263 } 264 265 static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb, 266 struct ocfs2_refcount_tree *tree) 267 { 268 spin_lock(&osb->osb_lock); 269 ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree); 270 spin_unlock(&osb->osb_lock); 271 } 272 273 void ocfs2_kref_remove_refcount_tree(struct kref *kref) 274 { 275 struct ocfs2_refcount_tree *tree = 276 container_of(kref, struct ocfs2_refcount_tree, rf_getcnt); 277 278 ocfs2_free_refcount_tree(tree); 279 } 280 281 static inline void 282 ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree) 283 { 284 kref_get(&tree->rf_getcnt); 285 } 286 287 static inline void 288 ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree) 289 { 290 kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree); 291 } 292 293 static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new, 294 struct super_block *sb) 295 { 296 ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops); 297 mutex_init(&new->rf_io_mutex); 298 new->rf_sb = sb; 299 spin_lock_init(&new->rf_lock); 300 } 301 302 static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb, 303 struct ocfs2_refcount_tree *new, 304 u64 rf_blkno, u32 generation) 305 { 306 init_rwsem(&new->rf_sem); 307 ocfs2_refcount_lock_res_init(&new->rf_lockres, osb, 308 rf_blkno, generation); 309 } 310 311 static struct ocfs2_refcount_tree* 312 ocfs2_allocate_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno) 313 { 314 struct ocfs2_refcount_tree *new; 315 316 new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS); 317 if (!new) 318 return NULL; 319 320 new->rf_blkno = rf_blkno; 321 kref_init(&new->rf_getcnt); 322 ocfs2_init_refcount_tree_ci(new, osb->sb); 323 324 return new; 325 } 326 327 static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno, 328 struct ocfs2_refcount_tree **ret_tree) 329 { 330 int ret = 0; 331 struct ocfs2_refcount_tree *tree, *new = NULL; 332 struct buffer_head *ref_root_bh = NULL; 333 struct ocfs2_refcount_block *ref_rb; 334 335 spin_lock(&osb->osb_lock); 336 if (osb->osb_ref_tree_lru && 337 osb->osb_ref_tree_lru->rf_blkno == rf_blkno) 338 tree = osb->osb_ref_tree_lru; 339 else 340 tree = ocfs2_find_refcount_tree(osb, rf_blkno); 341 if (tree) 342 goto out; 343 344 spin_unlock(&osb->osb_lock); 345 346 new = ocfs2_allocate_refcount_tree(osb, rf_blkno); 347 if (!new) { 348 ret = -ENOMEM; 349 mlog_errno(ret); 350 return ret; 351 } 352 /* 353 * We need the generation to create the refcount tree lock and since 354 * it isn't changed during the tree modification, we are safe here to 355 * read without protection. 356 * We also have to purge the cache after we create the lock since the 357 * refcount block may have the stale data. It can only be trusted when 358 * we hold the refcount lock. 359 */ 360 ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh); 361 if (ret) { 362 mlog_errno(ret); 363 ocfs2_metadata_cache_exit(&new->rf_ci); 364 kfree(new); 365 return ret; 366 } 367 368 ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 369 new->rf_generation = le32_to_cpu(ref_rb->rf_generation); 370 ocfs2_init_refcount_tree_lock(osb, new, rf_blkno, 371 new->rf_generation); 372 ocfs2_metadata_cache_purge(&new->rf_ci); 373 374 spin_lock(&osb->osb_lock); 375 tree = ocfs2_find_refcount_tree(osb, rf_blkno); 376 if (tree) 377 goto out; 378 379 ocfs2_insert_refcount_tree(osb, new); 380 381 tree = new; 382 new = NULL; 383 384 out: 385 *ret_tree = tree; 386 387 osb->osb_ref_tree_lru = tree; 388 389 spin_unlock(&osb->osb_lock); 390 391 if (new) 392 ocfs2_free_refcount_tree(new); 393 394 brelse(ref_root_bh); 395 return ret; 396 } 397 398 static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno) 399 { 400 int ret; 401 struct buffer_head *di_bh = NULL; 402 struct ocfs2_dinode *di; 403 404 ret = ocfs2_read_inode_block(inode, &di_bh); 405 if (ret) { 406 mlog_errno(ret); 407 goto out; 408 } 409 410 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); 411 412 di = (struct ocfs2_dinode *)di_bh->b_data; 413 *ref_blkno = le64_to_cpu(di->i_refcount_loc); 414 brelse(di_bh); 415 out: 416 return ret; 417 } 418 419 static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb, 420 struct ocfs2_refcount_tree *tree, int rw) 421 { 422 int ret; 423 424 ret = ocfs2_refcount_lock(tree, rw); 425 if (ret) { 426 mlog_errno(ret); 427 goto out; 428 } 429 430 if (rw) 431 down_write(&tree->rf_sem); 432 else 433 down_read(&tree->rf_sem); 434 435 out: 436 return ret; 437 } 438 439 /* 440 * Lock the refcount tree pointed by ref_blkno and return the tree. 441 * In most case, we lock the tree and read the refcount block. 442 * So read it here if the caller really needs it. 443 * 444 * If the tree has been re-created by other node, it will free the 445 * old one and re-create it. 446 */ 447 int ocfs2_lock_refcount_tree(struct ocfs2_super *osb, 448 u64 ref_blkno, int rw, 449 struct ocfs2_refcount_tree **ret_tree, 450 struct buffer_head **ref_bh) 451 { 452 int ret, delete_tree = 0; 453 struct ocfs2_refcount_tree *tree = NULL; 454 struct buffer_head *ref_root_bh = NULL; 455 struct ocfs2_refcount_block *rb; 456 457 again: 458 ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree); 459 if (ret) { 460 mlog_errno(ret); 461 return ret; 462 } 463 464 ocfs2_refcount_tree_get(tree); 465 466 ret = __ocfs2_lock_refcount_tree(osb, tree, rw); 467 if (ret) { 468 mlog_errno(ret); 469 ocfs2_refcount_tree_put(tree); 470 goto out; 471 } 472 473 ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno, 474 &ref_root_bh); 475 if (ret) { 476 mlog_errno(ret); 477 ocfs2_unlock_refcount_tree(osb, tree, rw); 478 ocfs2_refcount_tree_put(tree); 479 goto out; 480 } 481 482 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 483 /* 484 * If the refcount block has been freed and re-created, we may need 485 * to recreate the refcount tree also. 486 * 487 * Here we just remove the tree from the rb-tree, and the last 488 * kref holder will unlock and delete this refcount_tree. 489 * Then we goto "again" and ocfs2_get_refcount_tree will create 490 * the new refcount tree for us. 491 */ 492 if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) { 493 if (!tree->rf_removed) { 494 ocfs2_erase_refcount_tree_from_list(osb, tree); 495 tree->rf_removed = 1; 496 delete_tree = 1; 497 } 498 499 ocfs2_unlock_refcount_tree(osb, tree, rw); 500 /* 501 * We get an extra reference when we create the refcount 502 * tree, so another put will destroy it. 503 */ 504 if (delete_tree) 505 ocfs2_refcount_tree_put(tree); 506 brelse(ref_root_bh); 507 ref_root_bh = NULL; 508 goto again; 509 } 510 511 *ret_tree = tree; 512 if (ref_bh) { 513 *ref_bh = ref_root_bh; 514 ref_root_bh = NULL; 515 } 516 out: 517 brelse(ref_root_bh); 518 return ret; 519 } 520 521 int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw, 522 struct ocfs2_refcount_tree **ret_tree, 523 struct buffer_head **ref_bh) 524 { 525 int ret; 526 u64 ref_blkno; 527 528 ret = ocfs2_get_refcount_block(inode, &ref_blkno); 529 if (ret) { 530 mlog_errno(ret); 531 return ret; 532 } 533 534 return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, 535 rw, ret_tree, ref_bh); 536 } 537 538 void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb, 539 struct ocfs2_refcount_tree *tree, int rw) 540 { 541 if (rw) 542 up_write(&tree->rf_sem); 543 else 544 up_read(&tree->rf_sem); 545 546 ocfs2_refcount_unlock(tree, rw); 547 ocfs2_refcount_tree_put(tree); 548 } 549 550 void ocfs2_purge_refcount_trees(struct ocfs2_super *osb) 551 { 552 struct rb_node *node; 553 struct ocfs2_refcount_tree *tree; 554 struct rb_root *root = &osb->osb_rf_lock_tree; 555 556 while ((node = rb_last(root)) != NULL) { 557 tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node); 558 559 mlog(0, "Purge tree %llu\n", 560 (unsigned long long) tree->rf_blkno); 561 562 rb_erase(&tree->rf_node, root); 563 ocfs2_free_refcount_tree(tree); 564 } 565 } 566 567 /* 568 * Create a refcount tree for an inode. 569 * We take for granted that the inode is already locked. 570 */ 571 static int ocfs2_create_refcount_tree(struct inode *inode, 572 struct buffer_head *di_bh) 573 { 574 int ret; 575 handle_t *handle = NULL; 576 struct ocfs2_alloc_context *meta_ac = NULL; 577 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 578 struct ocfs2_inode_info *oi = OCFS2_I(inode); 579 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 580 struct buffer_head *new_bh = NULL; 581 struct ocfs2_refcount_block *rb; 582 struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL; 583 u16 suballoc_bit_start; 584 u32 num_got; 585 u64 first_blkno; 586 587 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); 588 589 mlog(0, "create tree for inode %lu\n", inode->i_ino); 590 591 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); 592 if (ret) { 593 mlog_errno(ret); 594 goto out; 595 } 596 597 handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_CREATE_CREDITS); 598 if (IS_ERR(handle)) { 599 ret = PTR_ERR(handle); 600 mlog_errno(ret); 601 goto out; 602 } 603 604 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 605 OCFS2_JOURNAL_ACCESS_WRITE); 606 if (ret) { 607 mlog_errno(ret); 608 goto out_commit; 609 } 610 611 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, 612 &suballoc_bit_start, &num_got, 613 &first_blkno); 614 if (ret) { 615 mlog_errno(ret); 616 goto out_commit; 617 } 618 619 new_tree = ocfs2_allocate_refcount_tree(osb, first_blkno); 620 if (!new_tree) { 621 ret = -ENOMEM; 622 mlog_errno(ret); 623 goto out_commit; 624 } 625 626 new_bh = sb_getblk(inode->i_sb, first_blkno); 627 ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh); 628 629 ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh, 630 OCFS2_JOURNAL_ACCESS_CREATE); 631 if (ret) { 632 mlog_errno(ret); 633 goto out_commit; 634 } 635 636 /* Initialize ocfs2_refcount_block. */ 637 rb = (struct ocfs2_refcount_block *)new_bh->b_data; 638 memset(rb, 0, inode->i_sb->s_blocksize); 639 strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); 640 rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num); 641 rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 642 rb->rf_fs_generation = cpu_to_le32(osb->fs_generation); 643 rb->rf_blkno = cpu_to_le64(first_blkno); 644 rb->rf_count = cpu_to_le32(1); 645 rb->rf_records.rl_count = 646 cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb)); 647 spin_lock(&osb->osb_lock); 648 rb->rf_generation = osb->s_next_generation++; 649 spin_unlock(&osb->osb_lock); 650 651 ocfs2_journal_dirty(handle, new_bh); 652 653 spin_lock(&oi->ip_lock); 654 oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL; 655 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 656 di->i_refcount_loc = cpu_to_le64(first_blkno); 657 spin_unlock(&oi->ip_lock); 658 659 mlog(0, "created tree for inode %lu, refblock %llu\n", 660 inode->i_ino, (unsigned long long)first_blkno); 661 662 ocfs2_journal_dirty(handle, di_bh); 663 664 /* 665 * We have to init the tree lock here since it will use 666 * the generation number to create it. 667 */ 668 new_tree->rf_generation = le32_to_cpu(rb->rf_generation); 669 ocfs2_init_refcount_tree_lock(osb, new_tree, first_blkno, 670 new_tree->rf_generation); 671 672 spin_lock(&osb->osb_lock); 673 tree = ocfs2_find_refcount_tree(osb, first_blkno); 674 675 /* 676 * We've just created a new refcount tree in this block. If 677 * we found a refcount tree on the ocfs2_super, it must be 678 * one we just deleted. We free the old tree before 679 * inserting the new tree. 680 */ 681 BUG_ON(tree && tree->rf_generation == new_tree->rf_generation); 682 if (tree) 683 ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree); 684 ocfs2_insert_refcount_tree(osb, new_tree); 685 spin_unlock(&osb->osb_lock); 686 new_tree = NULL; 687 if (tree) 688 ocfs2_refcount_tree_put(tree); 689 690 out_commit: 691 ocfs2_commit_trans(osb, handle); 692 693 out: 694 if (new_tree) { 695 ocfs2_metadata_cache_exit(&new_tree->rf_ci); 696 kfree(new_tree); 697 } 698 699 brelse(new_bh); 700 if (meta_ac) 701 ocfs2_free_alloc_context(meta_ac); 702 703 return ret; 704 } 705 706 static int ocfs2_set_refcount_tree(struct inode *inode, 707 struct buffer_head *di_bh, 708 u64 refcount_loc) 709 { 710 int ret; 711 handle_t *handle = NULL; 712 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 713 struct ocfs2_inode_info *oi = OCFS2_I(inode); 714 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 715 struct buffer_head *ref_root_bh = NULL; 716 struct ocfs2_refcount_block *rb; 717 struct ocfs2_refcount_tree *ref_tree; 718 719 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); 720 721 ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, 722 &ref_tree, &ref_root_bh); 723 if (ret) { 724 mlog_errno(ret); 725 return ret; 726 } 727 728 handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_SET_CREDITS); 729 if (IS_ERR(handle)) { 730 ret = PTR_ERR(handle); 731 mlog_errno(ret); 732 goto out; 733 } 734 735 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 736 OCFS2_JOURNAL_ACCESS_WRITE); 737 if (ret) { 738 mlog_errno(ret); 739 goto out_commit; 740 } 741 742 ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, ref_root_bh, 743 OCFS2_JOURNAL_ACCESS_WRITE); 744 if (ret) { 745 mlog_errno(ret); 746 goto out_commit; 747 } 748 749 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 750 le32_add_cpu(&rb->rf_count, 1); 751 752 ocfs2_journal_dirty(handle, ref_root_bh); 753 754 spin_lock(&oi->ip_lock); 755 oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL; 756 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 757 di->i_refcount_loc = cpu_to_le64(refcount_loc); 758 spin_unlock(&oi->ip_lock); 759 ocfs2_journal_dirty(handle, di_bh); 760 761 out_commit: 762 ocfs2_commit_trans(osb, handle); 763 out: 764 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 765 brelse(ref_root_bh); 766 767 return ret; 768 } 769 770 int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh) 771 { 772 int ret, delete_tree = 0; 773 handle_t *handle = NULL; 774 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 775 struct ocfs2_inode_info *oi = OCFS2_I(inode); 776 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 777 struct ocfs2_refcount_block *rb; 778 struct inode *alloc_inode = NULL; 779 struct buffer_head *alloc_bh = NULL; 780 struct buffer_head *blk_bh = NULL; 781 struct ocfs2_refcount_tree *ref_tree; 782 int credits = OCFS2_REFCOUNT_TREE_REMOVE_CREDITS; 783 u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc); 784 u16 bit = 0; 785 786 if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) 787 return 0; 788 789 BUG_ON(!ref_blkno); 790 ret = ocfs2_lock_refcount_tree(osb, ref_blkno, 1, &ref_tree, &blk_bh); 791 if (ret) { 792 mlog_errno(ret); 793 return ret; 794 } 795 796 rb = (struct ocfs2_refcount_block *)blk_bh->b_data; 797 798 /* 799 * If we are the last user, we need to free the block. 800 * So lock the allocator ahead. 801 */ 802 if (le32_to_cpu(rb->rf_count) == 1) { 803 blk = le64_to_cpu(rb->rf_blkno); 804 bit = le16_to_cpu(rb->rf_suballoc_bit); 805 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 806 807 alloc_inode = ocfs2_get_system_file_inode(osb, 808 EXTENT_ALLOC_SYSTEM_INODE, 809 le16_to_cpu(rb->rf_suballoc_slot)); 810 if (!alloc_inode) { 811 ret = -ENOMEM; 812 mlog_errno(ret); 813 goto out; 814 } 815 mutex_lock(&alloc_inode->i_mutex); 816 817 ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1); 818 if (ret) { 819 mlog_errno(ret); 820 goto out_mutex; 821 } 822 823 credits += OCFS2_SUBALLOC_FREE; 824 } 825 826 handle = ocfs2_start_trans(osb, credits); 827 if (IS_ERR(handle)) { 828 ret = PTR_ERR(handle); 829 mlog_errno(ret); 830 goto out_unlock; 831 } 832 833 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 834 OCFS2_JOURNAL_ACCESS_WRITE); 835 if (ret) { 836 mlog_errno(ret); 837 goto out_commit; 838 } 839 840 ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, blk_bh, 841 OCFS2_JOURNAL_ACCESS_WRITE); 842 if (ret) { 843 mlog_errno(ret); 844 goto out_commit; 845 } 846 847 spin_lock(&oi->ip_lock); 848 oi->ip_dyn_features &= ~OCFS2_HAS_REFCOUNT_FL; 849 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 850 di->i_refcount_loc = 0; 851 spin_unlock(&oi->ip_lock); 852 ocfs2_journal_dirty(handle, di_bh); 853 854 le32_add_cpu(&rb->rf_count , -1); 855 ocfs2_journal_dirty(handle, blk_bh); 856 857 if (!rb->rf_count) { 858 delete_tree = 1; 859 ocfs2_erase_refcount_tree_from_list(osb, ref_tree); 860 ret = ocfs2_free_suballoc_bits(handle, alloc_inode, 861 alloc_bh, bit, bg_blkno, 1); 862 if (ret) 863 mlog_errno(ret); 864 } 865 866 out_commit: 867 ocfs2_commit_trans(osb, handle); 868 out_unlock: 869 if (alloc_inode) { 870 ocfs2_inode_unlock(alloc_inode, 1); 871 brelse(alloc_bh); 872 } 873 out_mutex: 874 if (alloc_inode) { 875 mutex_unlock(&alloc_inode->i_mutex); 876 iput(alloc_inode); 877 } 878 out: 879 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 880 if (delete_tree) 881 ocfs2_refcount_tree_put(ref_tree); 882 brelse(blk_bh); 883 884 return ret; 885 } 886 887 static void ocfs2_find_refcount_rec_in_rl(struct ocfs2_caching_info *ci, 888 struct buffer_head *ref_leaf_bh, 889 u64 cpos, unsigned int len, 890 struct ocfs2_refcount_rec *ret_rec, 891 int *index) 892 { 893 int i = 0; 894 struct ocfs2_refcount_block *rb = 895 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 896 struct ocfs2_refcount_rec *rec = NULL; 897 898 for (; i < le16_to_cpu(rb->rf_records.rl_used); i++) { 899 rec = &rb->rf_records.rl_recs[i]; 900 901 if (le64_to_cpu(rec->r_cpos) + 902 le32_to_cpu(rec->r_clusters) <= cpos) 903 continue; 904 else if (le64_to_cpu(rec->r_cpos) > cpos) 905 break; 906 907 /* ok, cpos fail in this rec. Just return. */ 908 if (ret_rec) 909 *ret_rec = *rec; 910 goto out; 911 } 912 913 if (ret_rec) { 914 /* We meet with a hole here, so fake the rec. */ 915 ret_rec->r_cpos = cpu_to_le64(cpos); 916 ret_rec->r_refcount = 0; 917 if (i < le16_to_cpu(rb->rf_records.rl_used) && 918 le64_to_cpu(rec->r_cpos) < cpos + len) 919 ret_rec->r_clusters = 920 cpu_to_le32(le64_to_cpu(rec->r_cpos) - cpos); 921 else 922 ret_rec->r_clusters = cpu_to_le32(len); 923 } 924 925 out: 926 *index = i; 927 } 928 929 /* 930 * Given a cpos and len, try to find the refcount record which contains cpos. 931 * 1. If cpos can be found in one refcount record, return the record. 932 * 2. If cpos can't be found, return a fake record which start from cpos 933 * and end at a small value between cpos+len and start of the next record. 934 * This fake record has r_refcount = 0. 935 */ 936 static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci, 937 struct buffer_head *ref_root_bh, 938 u64 cpos, unsigned int len, 939 struct ocfs2_refcount_rec *ret_rec, 940 int *index, 941 struct buffer_head **ret_bh) 942 { 943 int ret = 0, i, found; 944 u32 low_cpos; 945 struct ocfs2_extent_list *el; 946 struct ocfs2_extent_rec *tmp, *rec = NULL; 947 struct ocfs2_extent_block *eb; 948 struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; 949 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 950 struct ocfs2_refcount_block *rb = 951 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 952 953 if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) { 954 ocfs2_find_refcount_rec_in_rl(ci, ref_root_bh, cpos, len, 955 ret_rec, index); 956 *ret_bh = ref_root_bh; 957 get_bh(ref_root_bh); 958 return 0; 959 } 960 961 el = &rb->rf_list; 962 low_cpos = cpos & OCFS2_32BIT_POS_MASK; 963 964 if (el->l_tree_depth) { 965 ret = ocfs2_find_leaf(ci, el, low_cpos, &eb_bh); 966 if (ret) { 967 mlog_errno(ret); 968 goto out; 969 } 970 971 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 972 el = &eb->h_list; 973 974 if (el->l_tree_depth) { 975 ocfs2_error(sb, 976 "refcount tree %llu has non zero tree " 977 "depth in leaf btree tree block %llu\n", 978 (unsigned long long)ocfs2_metadata_cache_owner(ci), 979 (unsigned long long)eb_bh->b_blocknr); 980 ret = -EROFS; 981 goto out; 982 } 983 } 984 985 found = 0; 986 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 987 rec = &el->l_recs[i]; 988 989 if (le32_to_cpu(rec->e_cpos) <= low_cpos) { 990 found = 1; 991 break; 992 } 993 } 994 995 /* adjust len when we have ocfs2_extent_rec after it. */ 996 if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) { 997 tmp = &el->l_recs[i+1]; 998 999 if (le32_to_cpu(tmp->e_cpos) < cpos + len) 1000 len = le32_to_cpu(tmp->e_cpos) - cpos; 1001 } 1002 1003 ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno), 1004 &ref_leaf_bh); 1005 if (ret) { 1006 mlog_errno(ret); 1007 goto out; 1008 } 1009 1010 ocfs2_find_refcount_rec_in_rl(ci, ref_leaf_bh, cpos, len, 1011 ret_rec, index); 1012 *ret_bh = ref_leaf_bh; 1013 out: 1014 brelse(eb_bh); 1015 return ret; 1016 } 1017 1018 enum ocfs2_ref_rec_contig { 1019 REF_CONTIG_NONE = 0, 1020 REF_CONTIG_LEFT, 1021 REF_CONTIG_RIGHT, 1022 REF_CONTIG_LEFTRIGHT, 1023 }; 1024 1025 static enum ocfs2_ref_rec_contig 1026 ocfs2_refcount_rec_adjacent(struct ocfs2_refcount_block *rb, 1027 int index) 1028 { 1029 if ((rb->rf_records.rl_recs[index].r_refcount == 1030 rb->rf_records.rl_recs[index + 1].r_refcount) && 1031 (le64_to_cpu(rb->rf_records.rl_recs[index].r_cpos) + 1032 le32_to_cpu(rb->rf_records.rl_recs[index].r_clusters) == 1033 le64_to_cpu(rb->rf_records.rl_recs[index + 1].r_cpos))) 1034 return REF_CONTIG_RIGHT; 1035 1036 return REF_CONTIG_NONE; 1037 } 1038 1039 static enum ocfs2_ref_rec_contig 1040 ocfs2_refcount_rec_contig(struct ocfs2_refcount_block *rb, 1041 int index) 1042 { 1043 enum ocfs2_ref_rec_contig ret = REF_CONTIG_NONE; 1044 1045 if (index < le16_to_cpu(rb->rf_records.rl_used) - 1) 1046 ret = ocfs2_refcount_rec_adjacent(rb, index); 1047 1048 if (index > 0) { 1049 enum ocfs2_ref_rec_contig tmp; 1050 1051 tmp = ocfs2_refcount_rec_adjacent(rb, index - 1); 1052 1053 if (tmp == REF_CONTIG_RIGHT) { 1054 if (ret == REF_CONTIG_RIGHT) 1055 ret = REF_CONTIG_LEFTRIGHT; 1056 else 1057 ret = REF_CONTIG_LEFT; 1058 } 1059 } 1060 1061 return ret; 1062 } 1063 1064 static void ocfs2_rotate_refcount_rec_left(struct ocfs2_refcount_block *rb, 1065 int index) 1066 { 1067 BUG_ON(rb->rf_records.rl_recs[index].r_refcount != 1068 rb->rf_records.rl_recs[index+1].r_refcount); 1069 1070 le32_add_cpu(&rb->rf_records.rl_recs[index].r_clusters, 1071 le32_to_cpu(rb->rf_records.rl_recs[index+1].r_clusters)); 1072 1073 if (index < le16_to_cpu(rb->rf_records.rl_used) - 2) 1074 memmove(&rb->rf_records.rl_recs[index + 1], 1075 &rb->rf_records.rl_recs[index + 2], 1076 sizeof(struct ocfs2_refcount_rec) * 1077 (le16_to_cpu(rb->rf_records.rl_used) - index - 2)); 1078 1079 memset(&rb->rf_records.rl_recs[le16_to_cpu(rb->rf_records.rl_used) - 1], 1080 0, sizeof(struct ocfs2_refcount_rec)); 1081 le16_add_cpu(&rb->rf_records.rl_used, -1); 1082 } 1083 1084 /* 1085 * Merge the refcount rec if we are contiguous with the adjacent recs. 1086 */ 1087 static void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb, 1088 int index) 1089 { 1090 enum ocfs2_ref_rec_contig contig = 1091 ocfs2_refcount_rec_contig(rb, index); 1092 1093 if (contig == REF_CONTIG_NONE) 1094 return; 1095 1096 if (contig == REF_CONTIG_LEFT || contig == REF_CONTIG_LEFTRIGHT) { 1097 BUG_ON(index == 0); 1098 index--; 1099 } 1100 1101 ocfs2_rotate_refcount_rec_left(rb, index); 1102 1103 if (contig == REF_CONTIG_LEFTRIGHT) 1104 ocfs2_rotate_refcount_rec_left(rb, index); 1105 } 1106 1107 /* 1108 * Change the refcount indexed by "index" in ref_bh. 1109 * If refcount reaches 0, remove it. 1110 */ 1111 static int ocfs2_change_refcount_rec(handle_t *handle, 1112 struct ocfs2_caching_info *ci, 1113 struct buffer_head *ref_leaf_bh, 1114 int index, int change) 1115 { 1116 int ret; 1117 struct ocfs2_refcount_block *rb = 1118 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1119 struct ocfs2_refcount_list *rl = &rb->rf_records; 1120 struct ocfs2_refcount_rec *rec = &rl->rl_recs[index]; 1121 1122 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, 1123 OCFS2_JOURNAL_ACCESS_WRITE); 1124 if (ret) { 1125 mlog_errno(ret); 1126 goto out; 1127 } 1128 1129 mlog(0, "change index %d, old count %u, change %d\n", index, 1130 le32_to_cpu(rec->r_refcount), change); 1131 le32_add_cpu(&rec->r_refcount, change); 1132 1133 if (!rec->r_refcount) { 1134 if (index != le16_to_cpu(rl->rl_used) - 1) { 1135 memmove(rec, rec + 1, 1136 (le16_to_cpu(rl->rl_used) - index - 1) * 1137 sizeof(struct ocfs2_refcount_rec)); 1138 memset(&rl->rl_recs[le16_to_cpu(rl->rl_used) - 1], 1139 0, sizeof(struct ocfs2_refcount_rec)); 1140 } 1141 1142 le16_add_cpu(&rl->rl_used, -1); 1143 } else 1144 ocfs2_refcount_rec_merge(rb, index); 1145 1146 ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1147 if (ret) 1148 mlog_errno(ret); 1149 out: 1150 return ret; 1151 } 1152 1153 static int ocfs2_expand_inline_ref_root(handle_t *handle, 1154 struct ocfs2_caching_info *ci, 1155 struct buffer_head *ref_root_bh, 1156 struct buffer_head **ref_leaf_bh, 1157 struct ocfs2_alloc_context *meta_ac) 1158 { 1159 int ret; 1160 u16 suballoc_bit_start; 1161 u32 num_got; 1162 u64 blkno; 1163 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1164 struct buffer_head *new_bh = NULL; 1165 struct ocfs2_refcount_block *new_rb; 1166 struct ocfs2_refcount_block *root_rb = 1167 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 1168 1169 ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh, 1170 OCFS2_JOURNAL_ACCESS_WRITE); 1171 if (ret) { 1172 mlog_errno(ret); 1173 goto out; 1174 } 1175 1176 ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, 1177 &suballoc_bit_start, &num_got, 1178 &blkno); 1179 if (ret) { 1180 mlog_errno(ret); 1181 goto out; 1182 } 1183 1184 new_bh = sb_getblk(sb, blkno); 1185 if (new_bh == NULL) { 1186 ret = -EIO; 1187 mlog_errno(ret); 1188 goto out; 1189 } 1190 ocfs2_set_new_buffer_uptodate(ci, new_bh); 1191 1192 ret = ocfs2_journal_access_rb(handle, ci, new_bh, 1193 OCFS2_JOURNAL_ACCESS_CREATE); 1194 if (ret) { 1195 mlog_errno(ret); 1196 goto out; 1197 } 1198 1199 /* 1200 * Initialize ocfs2_refcount_block. 1201 * It should contain the same information as the old root. 1202 * so just memcpy it and change the corresponding field. 1203 */ 1204 memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize); 1205 1206 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; 1207 new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); 1208 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1209 new_rb->rf_blkno = cpu_to_le64(blkno); 1210 new_rb->rf_cpos = cpu_to_le32(0); 1211 new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr); 1212 new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL); 1213 ocfs2_journal_dirty(handle, new_bh); 1214 1215 /* Now change the root. */ 1216 memset(&root_rb->rf_list, 0, sb->s_blocksize - 1217 offsetof(struct ocfs2_refcount_block, rf_list)); 1218 root_rb->rf_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_rb(sb)); 1219 root_rb->rf_clusters = cpu_to_le32(1); 1220 root_rb->rf_list.l_next_free_rec = cpu_to_le16(1); 1221 root_rb->rf_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 1222 root_rb->rf_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 1223 root_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_TREE_FL); 1224 1225 ocfs2_journal_dirty(handle, ref_root_bh); 1226 1227 mlog(0, "new leaf block %llu, used %u\n", (unsigned long long)blkno, 1228 le16_to_cpu(new_rb->rf_records.rl_used)); 1229 1230 *ref_leaf_bh = new_bh; 1231 new_bh = NULL; 1232 out: 1233 brelse(new_bh); 1234 return ret; 1235 } 1236 1237 static int ocfs2_refcount_rec_no_intersect(struct ocfs2_refcount_rec *prev, 1238 struct ocfs2_refcount_rec *next) 1239 { 1240 if (ocfs2_get_ref_rec_low_cpos(prev) + le32_to_cpu(prev->r_clusters) <= 1241 ocfs2_get_ref_rec_low_cpos(next)) 1242 return 1; 1243 1244 return 0; 1245 } 1246 1247 static int cmp_refcount_rec_by_low_cpos(const void *a, const void *b) 1248 { 1249 const struct ocfs2_refcount_rec *l = a, *r = b; 1250 u32 l_cpos = ocfs2_get_ref_rec_low_cpos(l); 1251 u32 r_cpos = ocfs2_get_ref_rec_low_cpos(r); 1252 1253 if (l_cpos > r_cpos) 1254 return 1; 1255 if (l_cpos < r_cpos) 1256 return -1; 1257 return 0; 1258 } 1259 1260 static int cmp_refcount_rec_by_cpos(const void *a, const void *b) 1261 { 1262 const struct ocfs2_refcount_rec *l = a, *r = b; 1263 u64 l_cpos = le64_to_cpu(l->r_cpos); 1264 u64 r_cpos = le64_to_cpu(r->r_cpos); 1265 1266 if (l_cpos > r_cpos) 1267 return 1; 1268 if (l_cpos < r_cpos) 1269 return -1; 1270 return 0; 1271 } 1272 1273 static void swap_refcount_rec(void *a, void *b, int size) 1274 { 1275 struct ocfs2_refcount_rec *l = a, *r = b, tmp; 1276 1277 tmp = *(struct ocfs2_refcount_rec *)l; 1278 *(struct ocfs2_refcount_rec *)l = 1279 *(struct ocfs2_refcount_rec *)r; 1280 *(struct ocfs2_refcount_rec *)r = tmp; 1281 } 1282 1283 /* 1284 * The refcount cpos are ordered by their 64bit cpos, 1285 * But we will use the low 32 bit to be the e_cpos in the b-tree. 1286 * So we need to make sure that this pos isn't intersected with others. 1287 * 1288 * Note: The refcount block is already sorted by their low 32 bit cpos, 1289 * So just try the middle pos first, and we will exit when we find 1290 * the good position. 1291 */ 1292 static int ocfs2_find_refcount_split_pos(struct ocfs2_refcount_list *rl, 1293 u32 *split_pos, int *split_index) 1294 { 1295 int num_used = le16_to_cpu(rl->rl_used); 1296 int delta, middle = num_used / 2; 1297 1298 for (delta = 0; delta < middle; delta++) { 1299 /* Let's check delta earlier than middle */ 1300 if (ocfs2_refcount_rec_no_intersect( 1301 &rl->rl_recs[middle - delta - 1], 1302 &rl->rl_recs[middle - delta])) { 1303 *split_index = middle - delta; 1304 break; 1305 } 1306 1307 /* For even counts, don't walk off the end */ 1308 if ((middle + delta + 1) == num_used) 1309 continue; 1310 1311 /* Now try delta past middle */ 1312 if (ocfs2_refcount_rec_no_intersect( 1313 &rl->rl_recs[middle + delta], 1314 &rl->rl_recs[middle + delta + 1])) { 1315 *split_index = middle + delta + 1; 1316 break; 1317 } 1318 } 1319 1320 if (delta >= middle) 1321 return -ENOSPC; 1322 1323 *split_pos = ocfs2_get_ref_rec_low_cpos(&rl->rl_recs[*split_index]); 1324 return 0; 1325 } 1326 1327 static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh, 1328 struct buffer_head *new_bh, 1329 u32 *split_cpos) 1330 { 1331 int split_index = 0, num_moved, ret; 1332 u32 cpos = 0; 1333 struct ocfs2_refcount_block *rb = 1334 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1335 struct ocfs2_refcount_list *rl = &rb->rf_records; 1336 struct ocfs2_refcount_block *new_rb = 1337 (struct ocfs2_refcount_block *)new_bh->b_data; 1338 struct ocfs2_refcount_list *new_rl = &new_rb->rf_records; 1339 1340 mlog(0, "split old leaf refcount block %llu, count = %u, used = %u\n", 1341 (unsigned long long)ref_leaf_bh->b_blocknr, 1342 le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used)); 1343 1344 /* 1345 * XXX: Improvement later. 1346 * If we know all the high 32 bit cpos is the same, no need to sort. 1347 * 1348 * In order to make the whole process safe, we do: 1349 * 1. sort the entries by their low 32 bit cpos first so that we can 1350 * find the split cpos easily. 1351 * 2. call ocfs2_insert_extent to insert the new refcount block. 1352 * 3. move the refcount rec to the new block. 1353 * 4. sort the entries by their 64 bit cpos. 1354 * 5. dirty the new_rb and rb. 1355 */ 1356 sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), 1357 sizeof(struct ocfs2_refcount_rec), 1358 cmp_refcount_rec_by_low_cpos, swap_refcount_rec); 1359 1360 ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index); 1361 if (ret) { 1362 mlog_errno(ret); 1363 return ret; 1364 } 1365 1366 new_rb->rf_cpos = cpu_to_le32(cpos); 1367 1368 /* move refcount records starting from split_index to the new block. */ 1369 num_moved = le16_to_cpu(rl->rl_used) - split_index; 1370 memcpy(new_rl->rl_recs, &rl->rl_recs[split_index], 1371 num_moved * sizeof(struct ocfs2_refcount_rec)); 1372 1373 /*ok, remove the entries we just moved over to the other block. */ 1374 memset(&rl->rl_recs[split_index], 0, 1375 num_moved * sizeof(struct ocfs2_refcount_rec)); 1376 1377 /* change old and new rl_used accordingly. */ 1378 le16_add_cpu(&rl->rl_used, -num_moved); 1379 new_rl->rl_used = cpu_to_le32(num_moved); 1380 1381 sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), 1382 sizeof(struct ocfs2_refcount_rec), 1383 cmp_refcount_rec_by_cpos, swap_refcount_rec); 1384 1385 sort(&new_rl->rl_recs, le16_to_cpu(new_rl->rl_used), 1386 sizeof(struct ocfs2_refcount_rec), 1387 cmp_refcount_rec_by_cpos, swap_refcount_rec); 1388 1389 *split_cpos = cpos; 1390 return 0; 1391 } 1392 1393 static int ocfs2_new_leaf_refcount_block(handle_t *handle, 1394 struct ocfs2_caching_info *ci, 1395 struct buffer_head *ref_root_bh, 1396 struct buffer_head *ref_leaf_bh, 1397 struct ocfs2_alloc_context *meta_ac) 1398 { 1399 int ret; 1400 u16 suballoc_bit_start; 1401 u32 num_got, new_cpos; 1402 u64 blkno; 1403 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1404 struct ocfs2_refcount_block *root_rb = 1405 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 1406 struct buffer_head *new_bh = NULL; 1407 struct ocfs2_refcount_block *new_rb; 1408 struct ocfs2_extent_tree ref_et; 1409 1410 BUG_ON(!(le32_to_cpu(root_rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)); 1411 1412 ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh, 1413 OCFS2_JOURNAL_ACCESS_WRITE); 1414 if (ret) { 1415 mlog_errno(ret); 1416 goto out; 1417 } 1418 1419 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, 1420 OCFS2_JOURNAL_ACCESS_WRITE); 1421 if (ret) { 1422 mlog_errno(ret); 1423 goto out; 1424 } 1425 1426 ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, 1427 &suballoc_bit_start, &num_got, 1428 &blkno); 1429 if (ret) { 1430 mlog_errno(ret); 1431 goto out; 1432 } 1433 1434 new_bh = sb_getblk(sb, blkno); 1435 if (new_bh == NULL) { 1436 ret = -EIO; 1437 mlog_errno(ret); 1438 goto out; 1439 } 1440 ocfs2_set_new_buffer_uptodate(ci, new_bh); 1441 1442 ret = ocfs2_journal_access_rb(handle, ci, new_bh, 1443 OCFS2_JOURNAL_ACCESS_CREATE); 1444 if (ret) { 1445 mlog_errno(ret); 1446 goto out; 1447 } 1448 1449 /* Initialize ocfs2_refcount_block. */ 1450 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; 1451 memset(new_rb, 0, sb->s_blocksize); 1452 strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); 1453 new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); 1454 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1455 new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); 1456 new_rb->rf_blkno = cpu_to_le64(blkno); 1457 new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr); 1458 new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL); 1459 new_rb->rf_records.rl_count = 1460 cpu_to_le16(ocfs2_refcount_recs_per_rb(sb)); 1461 new_rb->rf_generation = root_rb->rf_generation; 1462 1463 ret = ocfs2_divide_leaf_refcount_block(ref_leaf_bh, new_bh, &new_cpos); 1464 if (ret) { 1465 mlog_errno(ret); 1466 goto out; 1467 } 1468 1469 ocfs2_journal_dirty(handle, ref_leaf_bh); 1470 ocfs2_journal_dirty(handle, new_bh); 1471 1472 ocfs2_init_refcount_extent_tree(&ref_et, ci, ref_root_bh); 1473 1474 mlog(0, "insert new leaf block %llu at %u\n", 1475 (unsigned long long)new_bh->b_blocknr, new_cpos); 1476 1477 /* Insert the new leaf block with the specific offset cpos. */ 1478 ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr, 1479 1, 0, meta_ac); 1480 if (ret) 1481 mlog_errno(ret); 1482 1483 out: 1484 brelse(new_bh); 1485 return ret; 1486 } 1487 1488 static int ocfs2_expand_refcount_tree(handle_t *handle, 1489 struct ocfs2_caching_info *ci, 1490 struct buffer_head *ref_root_bh, 1491 struct buffer_head *ref_leaf_bh, 1492 struct ocfs2_alloc_context *meta_ac) 1493 { 1494 int ret; 1495 struct buffer_head *expand_bh = NULL; 1496 1497 if (ref_root_bh == ref_leaf_bh) { 1498 /* 1499 * the old root bh hasn't been expanded to a b-tree, 1500 * so expand it first. 1501 */ 1502 ret = ocfs2_expand_inline_ref_root(handle, ci, ref_root_bh, 1503 &expand_bh, meta_ac); 1504 if (ret) { 1505 mlog_errno(ret); 1506 goto out; 1507 } 1508 } else { 1509 expand_bh = ref_leaf_bh; 1510 get_bh(expand_bh); 1511 } 1512 1513 1514 /* Now add a new refcount block into the tree.*/ 1515 ret = ocfs2_new_leaf_refcount_block(handle, ci, ref_root_bh, 1516 expand_bh, meta_ac); 1517 if (ret) 1518 mlog_errno(ret); 1519 out: 1520 brelse(expand_bh); 1521 return ret; 1522 } 1523 1524 /* 1525 * Adjust the extent rec in b-tree representing ref_leaf_bh. 1526 * 1527 * Only called when we have inserted a new refcount rec at index 0 1528 * which means ocfs2_extent_rec.e_cpos may need some change. 1529 */ 1530 static int ocfs2_adjust_refcount_rec(handle_t *handle, 1531 struct ocfs2_caching_info *ci, 1532 struct buffer_head *ref_root_bh, 1533 struct buffer_head *ref_leaf_bh, 1534 struct ocfs2_refcount_rec *rec) 1535 { 1536 int ret = 0, i; 1537 u32 new_cpos, old_cpos; 1538 struct ocfs2_path *path = NULL; 1539 struct ocfs2_extent_tree et; 1540 struct ocfs2_refcount_block *rb = 1541 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 1542 struct ocfs2_extent_list *el; 1543 1544 if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) 1545 goto out; 1546 1547 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1548 old_cpos = le32_to_cpu(rb->rf_cpos); 1549 new_cpos = le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK; 1550 if (old_cpos <= new_cpos) 1551 goto out; 1552 1553 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); 1554 1555 path = ocfs2_new_path_from_et(&et); 1556 if (!path) { 1557 ret = -ENOMEM; 1558 mlog_errno(ret); 1559 goto out; 1560 } 1561 1562 ret = ocfs2_find_path(ci, path, old_cpos); 1563 if (ret) { 1564 mlog_errno(ret); 1565 goto out; 1566 } 1567 1568 /* 1569 * 2 more credits, one for the leaf refcount block, one for 1570 * the extent block contains the extent rec. 1571 */ 1572 ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2); 1573 if (ret < 0) { 1574 mlog_errno(ret); 1575 goto out; 1576 } 1577 1578 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, 1579 OCFS2_JOURNAL_ACCESS_WRITE); 1580 if (ret < 0) { 1581 mlog_errno(ret); 1582 goto out; 1583 } 1584 1585 ret = ocfs2_journal_access_eb(handle, ci, path_leaf_bh(path), 1586 OCFS2_JOURNAL_ACCESS_WRITE); 1587 if (ret < 0) { 1588 mlog_errno(ret); 1589 goto out; 1590 } 1591 1592 /* change the leaf extent block first. */ 1593 el = path_leaf_el(path); 1594 1595 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) 1596 if (le32_to_cpu(el->l_recs[i].e_cpos) == old_cpos) 1597 break; 1598 1599 BUG_ON(i == le16_to_cpu(el->l_next_free_rec)); 1600 1601 el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); 1602 1603 /* change the r_cpos in the leaf block. */ 1604 rb->rf_cpos = cpu_to_le32(new_cpos); 1605 1606 ocfs2_journal_dirty(handle, path_leaf_bh(path)); 1607 ocfs2_journal_dirty(handle, ref_leaf_bh); 1608 1609 out: 1610 ocfs2_free_path(path); 1611 return ret; 1612 } 1613 1614 static int ocfs2_insert_refcount_rec(handle_t *handle, 1615 struct ocfs2_caching_info *ci, 1616 struct buffer_head *ref_root_bh, 1617 struct buffer_head *ref_leaf_bh, 1618 struct ocfs2_refcount_rec *rec, 1619 int index, 1620 struct ocfs2_alloc_context *meta_ac) 1621 { 1622 int ret; 1623 struct ocfs2_refcount_block *rb = 1624 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1625 struct ocfs2_refcount_list *rf_list = &rb->rf_records; 1626 struct buffer_head *new_bh = NULL; 1627 1628 BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL); 1629 1630 if (rf_list->rl_used == rf_list->rl_count) { 1631 u64 cpos = le64_to_cpu(rec->r_cpos); 1632 u32 len = le32_to_cpu(rec->r_clusters); 1633 1634 ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh, 1635 ref_leaf_bh, meta_ac); 1636 if (ret) { 1637 mlog_errno(ret); 1638 goto out; 1639 } 1640 1641 ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 1642 cpos, len, NULL, &index, 1643 &new_bh); 1644 if (ret) { 1645 mlog_errno(ret); 1646 goto out; 1647 } 1648 1649 ref_leaf_bh = new_bh; 1650 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1651 rf_list = &rb->rf_records; 1652 } 1653 1654 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, 1655 OCFS2_JOURNAL_ACCESS_WRITE); 1656 if (ret) { 1657 mlog_errno(ret); 1658 goto out; 1659 } 1660 1661 if (index < le16_to_cpu(rf_list->rl_used)) 1662 memmove(&rf_list->rl_recs[index + 1], 1663 &rf_list->rl_recs[index], 1664 (le16_to_cpu(rf_list->rl_used) - index) * 1665 sizeof(struct ocfs2_refcount_rec)); 1666 1667 mlog(0, "insert refcount record start %llu, len %u, count %u " 1668 "to leaf block %llu at index %d\n", 1669 (unsigned long long)le64_to_cpu(rec->r_cpos), 1670 le32_to_cpu(rec->r_clusters), le32_to_cpu(rec->r_refcount), 1671 (unsigned long long)ref_leaf_bh->b_blocknr, index); 1672 1673 rf_list->rl_recs[index] = *rec; 1674 1675 le16_add_cpu(&rf_list->rl_used, 1); 1676 1677 ocfs2_refcount_rec_merge(rb, index); 1678 1679 ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1680 if (ret) { 1681 mlog_errno(ret); 1682 goto out; 1683 } 1684 1685 if (index == 0) { 1686 ret = ocfs2_adjust_refcount_rec(handle, ci, 1687 ref_root_bh, 1688 ref_leaf_bh, rec); 1689 if (ret) 1690 mlog_errno(ret); 1691 } 1692 out: 1693 brelse(new_bh); 1694 return ret; 1695 } 1696 1697 /* 1698 * Split the refcount_rec indexed by "index" in ref_leaf_bh. 1699 * This is much simple than our b-tree code. 1700 * split_rec is the new refcount rec we want to insert. 1701 * If split_rec->r_refcount > 0, we are changing the refcount(in case we 1702 * increase refcount or decrease a refcount to non-zero). 1703 * If split_rec->r_refcount == 0, we are punching a hole in current refcount 1704 * rec( in case we decrease a refcount to zero). 1705 */ 1706 static int ocfs2_split_refcount_rec(handle_t *handle, 1707 struct ocfs2_caching_info *ci, 1708 struct buffer_head *ref_root_bh, 1709 struct buffer_head *ref_leaf_bh, 1710 struct ocfs2_refcount_rec *split_rec, 1711 int index, 1712 struct ocfs2_alloc_context *meta_ac, 1713 struct ocfs2_cached_dealloc_ctxt *dealloc) 1714 { 1715 int ret, recs_need; 1716 u32 len; 1717 struct ocfs2_refcount_block *rb = 1718 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1719 struct ocfs2_refcount_list *rf_list = &rb->rf_records; 1720 struct ocfs2_refcount_rec *orig_rec = &rf_list->rl_recs[index]; 1721 struct ocfs2_refcount_rec *tail_rec = NULL; 1722 struct buffer_head *new_bh = NULL; 1723 1724 BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL); 1725 1726 mlog(0, "original r_pos %llu, cluster %u, split %llu, cluster %u\n", 1727 le64_to_cpu(orig_rec->r_cpos), le32_to_cpu(orig_rec->r_clusters), 1728 le64_to_cpu(split_rec->r_cpos), 1729 le32_to_cpu(split_rec->r_clusters)); 1730 1731 /* 1732 * If we just need to split the header or tail clusters, 1733 * no more recs are needed, just split is OK. 1734 * Otherwise we at least need one new recs. 1735 */ 1736 if (!split_rec->r_refcount && 1737 (split_rec->r_cpos == orig_rec->r_cpos || 1738 le64_to_cpu(split_rec->r_cpos) + 1739 le32_to_cpu(split_rec->r_clusters) == 1740 le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters))) 1741 recs_need = 0; 1742 else 1743 recs_need = 1; 1744 1745 /* 1746 * We need one more rec if we split in the middle and the new rec have 1747 * some refcount in it. 1748 */ 1749 if (split_rec->r_refcount && 1750 (split_rec->r_cpos != orig_rec->r_cpos && 1751 le64_to_cpu(split_rec->r_cpos) + 1752 le32_to_cpu(split_rec->r_clusters) != 1753 le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters))) 1754 recs_need++; 1755 1756 /* If the leaf block don't have enough record, expand it. */ 1757 if (le16_to_cpu(rf_list->rl_used) + recs_need > rf_list->rl_count) { 1758 struct ocfs2_refcount_rec tmp_rec; 1759 u64 cpos = le64_to_cpu(orig_rec->r_cpos); 1760 len = le32_to_cpu(orig_rec->r_clusters); 1761 ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh, 1762 ref_leaf_bh, meta_ac); 1763 if (ret) { 1764 mlog_errno(ret); 1765 goto out; 1766 } 1767 1768 /* 1769 * We have to re-get it since now cpos may be moved to 1770 * another leaf block. 1771 */ 1772 ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 1773 cpos, len, &tmp_rec, &index, 1774 &new_bh); 1775 if (ret) { 1776 mlog_errno(ret); 1777 goto out; 1778 } 1779 1780 ref_leaf_bh = new_bh; 1781 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1782 rf_list = &rb->rf_records; 1783 orig_rec = &rf_list->rl_recs[index]; 1784 } 1785 1786 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, 1787 OCFS2_JOURNAL_ACCESS_WRITE); 1788 if (ret) { 1789 mlog_errno(ret); 1790 goto out; 1791 } 1792 1793 /* 1794 * We have calculated out how many new records we need and store 1795 * in recs_need, so spare enough space first by moving the records 1796 * after "index" to the end. 1797 */ 1798 if (index != le16_to_cpu(rf_list->rl_used) - 1) 1799 memmove(&rf_list->rl_recs[index + 1 + recs_need], 1800 &rf_list->rl_recs[index + 1], 1801 (le16_to_cpu(rf_list->rl_used) - index - 1) * 1802 sizeof(struct ocfs2_refcount_rec)); 1803 1804 len = (le64_to_cpu(orig_rec->r_cpos) + 1805 le32_to_cpu(orig_rec->r_clusters)) - 1806 (le64_to_cpu(split_rec->r_cpos) + 1807 le32_to_cpu(split_rec->r_clusters)); 1808 1809 /* 1810 * If we have "len", the we will split in the tail and move it 1811 * to the end of the space we have just spared. 1812 */ 1813 if (len) { 1814 tail_rec = &rf_list->rl_recs[index + recs_need]; 1815 1816 memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec)); 1817 le64_add_cpu(&tail_rec->r_cpos, 1818 le32_to_cpu(tail_rec->r_clusters) - len); 1819 tail_rec->r_clusters = le32_to_cpu(len); 1820 } 1821 1822 /* 1823 * If the split pos isn't the same as the original one, we need to 1824 * split in the head. 1825 * 1826 * Note: We have the chance that split_rec.r_refcount = 0, 1827 * recs_need = 0 and len > 0, which means we just cut the head from 1828 * the orig_rec and in that case we have done some modification in 1829 * orig_rec above, so the check for r_cpos is faked. 1830 */ 1831 if (split_rec->r_cpos != orig_rec->r_cpos && tail_rec != orig_rec) { 1832 len = le64_to_cpu(split_rec->r_cpos) - 1833 le64_to_cpu(orig_rec->r_cpos); 1834 orig_rec->r_clusters = cpu_to_le32(len); 1835 index++; 1836 } 1837 1838 le16_add_cpu(&rf_list->rl_used, recs_need); 1839 1840 if (split_rec->r_refcount) { 1841 rf_list->rl_recs[index] = *split_rec; 1842 mlog(0, "insert refcount record start %llu, len %u, count %u " 1843 "to leaf block %llu at index %d\n", 1844 (unsigned long long)le64_to_cpu(split_rec->r_cpos), 1845 le32_to_cpu(split_rec->r_clusters), 1846 le32_to_cpu(split_rec->r_refcount), 1847 (unsigned long long)ref_leaf_bh->b_blocknr, index); 1848 1849 ocfs2_refcount_rec_merge(rb, index); 1850 } 1851 1852 ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1853 if (ret) 1854 mlog_errno(ret); 1855 1856 out: 1857 brelse(new_bh); 1858 return ret; 1859 } 1860 1861 static int __ocfs2_increase_refcount(handle_t *handle, 1862 struct ocfs2_caching_info *ci, 1863 struct buffer_head *ref_root_bh, 1864 u64 cpos, u32 len, 1865 struct ocfs2_alloc_context *meta_ac, 1866 struct ocfs2_cached_dealloc_ctxt *dealloc) 1867 { 1868 int ret = 0, index; 1869 struct buffer_head *ref_leaf_bh = NULL; 1870 struct ocfs2_refcount_rec rec; 1871 unsigned int set_len = 0; 1872 1873 mlog(0, "Tree owner %llu, add refcount start %llu, len %u\n", 1874 (unsigned long long)ocfs2_metadata_cache_owner(ci), 1875 (unsigned long long)cpos, len); 1876 1877 while (len) { 1878 ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 1879 cpos, len, &rec, &index, 1880 &ref_leaf_bh); 1881 if (ret) { 1882 mlog_errno(ret); 1883 goto out; 1884 } 1885 1886 set_len = le32_to_cpu(rec.r_clusters); 1887 1888 /* 1889 * Here we may meet with 3 situations: 1890 * 1891 * 1. If we find an already existing record, and the length 1892 * is the same, cool, we just need to increase the r_refcount 1893 * and it is OK. 1894 * 2. If we find a hole, just insert it with r_refcount = 1. 1895 * 3. If we are in the middle of one extent record, split 1896 * it. 1897 */ 1898 if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos && 1899 set_len <= len) { 1900 mlog(0, "increase refcount rec, start %llu, len %u, " 1901 "count %u\n", (unsigned long long)cpos, set_len, 1902 le32_to_cpu(rec.r_refcount)); 1903 ret = ocfs2_change_refcount_rec(handle, ci, 1904 ref_leaf_bh, index, 1); 1905 if (ret) { 1906 mlog_errno(ret); 1907 goto out; 1908 } 1909 } else if (!rec.r_refcount) { 1910 rec.r_refcount = cpu_to_le32(1); 1911 1912 mlog(0, "insert refcount rec, start %llu, len %u\n", 1913 (unsigned long long)le64_to_cpu(rec.r_cpos), 1914 set_len); 1915 ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh, 1916 ref_leaf_bh, 1917 &rec, index, meta_ac); 1918 if (ret) { 1919 mlog_errno(ret); 1920 goto out; 1921 } 1922 } else { 1923 set_len = min((u64)(cpos + len), 1924 le64_to_cpu(rec.r_cpos) + set_len) - cpos; 1925 rec.r_cpos = cpu_to_le64(cpos); 1926 rec.r_clusters = cpu_to_le32(set_len); 1927 le32_add_cpu(&rec.r_refcount, 1); 1928 1929 mlog(0, "split refcount rec, start %llu, " 1930 "len %u, count %u\n", 1931 (unsigned long long)le64_to_cpu(rec.r_cpos), 1932 set_len, le32_to_cpu(rec.r_refcount)); 1933 ret = ocfs2_split_refcount_rec(handle, ci, 1934 ref_root_bh, ref_leaf_bh, 1935 &rec, index, 1936 meta_ac, dealloc); 1937 if (ret) { 1938 mlog_errno(ret); 1939 goto out; 1940 } 1941 } 1942 1943 cpos += set_len; 1944 len -= set_len; 1945 brelse(ref_leaf_bh); 1946 ref_leaf_bh = NULL; 1947 } 1948 1949 out: 1950 brelse(ref_leaf_bh); 1951 return ret; 1952 } 1953 1954 static int ocfs2_remove_refcount_extent(handle_t *handle, 1955 struct ocfs2_caching_info *ci, 1956 struct buffer_head *ref_root_bh, 1957 struct buffer_head *ref_leaf_bh, 1958 struct ocfs2_alloc_context *meta_ac, 1959 struct ocfs2_cached_dealloc_ctxt *dealloc) 1960 { 1961 int ret; 1962 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1963 struct ocfs2_refcount_block *rb = 1964 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1965 struct ocfs2_extent_tree et; 1966 1967 BUG_ON(rb->rf_records.rl_used); 1968 1969 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); 1970 ret = ocfs2_remove_extent(handle, &et, le32_to_cpu(rb->rf_cpos), 1971 1, meta_ac, dealloc); 1972 if (ret) { 1973 mlog_errno(ret); 1974 goto out; 1975 } 1976 1977 ocfs2_remove_from_cache(ci, ref_leaf_bh); 1978 1979 /* 1980 * add the freed block to the dealloc so that it will be freed 1981 * when we run dealloc. 1982 */ 1983 ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE, 1984 le16_to_cpu(rb->rf_suballoc_slot), 1985 le64_to_cpu(rb->rf_blkno), 1986 le16_to_cpu(rb->rf_suballoc_bit)); 1987 if (ret) { 1988 mlog_errno(ret); 1989 goto out; 1990 } 1991 1992 ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh, 1993 OCFS2_JOURNAL_ACCESS_WRITE); 1994 if (ret) { 1995 mlog_errno(ret); 1996 goto out; 1997 } 1998 1999 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 2000 2001 le32_add_cpu(&rb->rf_clusters, -1); 2002 2003 /* 2004 * check whether we need to restore the root refcount block if 2005 * there is no leaf extent block at atll. 2006 */ 2007 if (!rb->rf_list.l_next_free_rec) { 2008 BUG_ON(rb->rf_clusters); 2009 2010 mlog(0, "reset refcount tree root %llu to be a record block.\n", 2011 (unsigned long long)ref_root_bh->b_blocknr); 2012 2013 rb->rf_flags = 0; 2014 rb->rf_parent = 0; 2015 rb->rf_cpos = 0; 2016 memset(&rb->rf_records, 0, sb->s_blocksize - 2017 offsetof(struct ocfs2_refcount_block, rf_records)); 2018 rb->rf_records.rl_count = 2019 cpu_to_le16(ocfs2_refcount_recs_per_rb(sb)); 2020 } 2021 2022 ocfs2_journal_dirty(handle, ref_root_bh); 2023 2024 out: 2025 return ret; 2026 } 2027 2028 static int ocfs2_decrease_refcount_rec(handle_t *handle, 2029 struct ocfs2_caching_info *ci, 2030 struct buffer_head *ref_root_bh, 2031 struct buffer_head *ref_leaf_bh, 2032 int index, u64 cpos, unsigned int len, 2033 struct ocfs2_alloc_context *meta_ac, 2034 struct ocfs2_cached_dealloc_ctxt *dealloc) 2035 { 2036 int ret; 2037 struct ocfs2_refcount_block *rb = 2038 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 2039 struct ocfs2_refcount_rec *rec = &rb->rf_records.rl_recs[index]; 2040 2041 BUG_ON(cpos < le64_to_cpu(rec->r_cpos)); 2042 BUG_ON(cpos + len > 2043 le64_to_cpu(rec->r_cpos) + le32_to_cpu(rec->r_clusters)); 2044 2045 if (cpos == le64_to_cpu(rec->r_cpos) && 2046 len == le32_to_cpu(rec->r_clusters)) 2047 ret = ocfs2_change_refcount_rec(handle, ci, 2048 ref_leaf_bh, index, -1); 2049 else { 2050 struct ocfs2_refcount_rec split = *rec; 2051 split.r_cpos = cpu_to_le64(cpos); 2052 split.r_clusters = cpu_to_le32(len); 2053 2054 le32_add_cpu(&split.r_refcount, -1); 2055 2056 mlog(0, "split refcount rec, start %llu, " 2057 "len %u, count %u, original start %llu, len %u\n", 2058 (unsigned long long)le64_to_cpu(split.r_cpos), 2059 len, le32_to_cpu(split.r_refcount), 2060 (unsigned long long)le64_to_cpu(rec->r_cpos), 2061 le32_to_cpu(rec->r_clusters)); 2062 ret = ocfs2_split_refcount_rec(handle, ci, 2063 ref_root_bh, ref_leaf_bh, 2064 &split, index, 2065 meta_ac, dealloc); 2066 } 2067 2068 if (ret) { 2069 mlog_errno(ret); 2070 goto out; 2071 } 2072 2073 /* Remove the leaf refcount block if it contains no refcount record. */ 2074 if (!rb->rf_records.rl_used && ref_leaf_bh != ref_root_bh) { 2075 ret = ocfs2_remove_refcount_extent(handle, ci, ref_root_bh, 2076 ref_leaf_bh, meta_ac, 2077 dealloc); 2078 if (ret) 2079 mlog_errno(ret); 2080 } 2081 2082 out: 2083 return ret; 2084 } 2085 2086 static int __ocfs2_decrease_refcount(handle_t *handle, 2087 struct ocfs2_caching_info *ci, 2088 struct buffer_head *ref_root_bh, 2089 u64 cpos, u32 len, 2090 struct ocfs2_alloc_context *meta_ac, 2091 struct ocfs2_cached_dealloc_ctxt *dealloc, 2092 int delete) 2093 { 2094 int ret = 0, index = 0; 2095 struct ocfs2_refcount_rec rec; 2096 unsigned int r_count = 0, r_len; 2097 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 2098 struct buffer_head *ref_leaf_bh = NULL; 2099 2100 mlog(0, "Tree owner %llu, decrease refcount start %llu, " 2101 "len %u, delete %u\n", 2102 (unsigned long long)ocfs2_metadata_cache_owner(ci), 2103 (unsigned long long)cpos, len, delete); 2104 2105 while (len) { 2106 ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 2107 cpos, len, &rec, &index, 2108 &ref_leaf_bh); 2109 if (ret) { 2110 mlog_errno(ret); 2111 goto out; 2112 } 2113 2114 r_count = le32_to_cpu(rec.r_refcount); 2115 BUG_ON(r_count == 0); 2116 if (!delete) 2117 BUG_ON(r_count > 1); 2118 2119 r_len = min((u64)(cpos + len), le64_to_cpu(rec.r_cpos) + 2120 le32_to_cpu(rec.r_clusters)) - cpos; 2121 2122 ret = ocfs2_decrease_refcount_rec(handle, ci, ref_root_bh, 2123 ref_leaf_bh, index, 2124 cpos, r_len, 2125 meta_ac, dealloc); 2126 if (ret) { 2127 mlog_errno(ret); 2128 goto out; 2129 } 2130 2131 if (le32_to_cpu(rec.r_refcount) == 1 && delete) { 2132 ret = ocfs2_cache_cluster_dealloc(dealloc, 2133 ocfs2_clusters_to_blocks(sb, cpos), 2134 r_len); 2135 if (ret) { 2136 mlog_errno(ret); 2137 goto out; 2138 } 2139 } 2140 2141 cpos += r_len; 2142 len -= r_len; 2143 brelse(ref_leaf_bh); 2144 ref_leaf_bh = NULL; 2145 } 2146 2147 out: 2148 brelse(ref_leaf_bh); 2149 return ret; 2150 } 2151 2152 /* Caller must hold refcount tree lock. */ 2153 int ocfs2_decrease_refcount(struct inode *inode, 2154 handle_t *handle, u32 cpos, u32 len, 2155 struct ocfs2_alloc_context *meta_ac, 2156 struct ocfs2_cached_dealloc_ctxt *dealloc, 2157 int delete) 2158 { 2159 int ret; 2160 u64 ref_blkno; 2161 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2162 struct buffer_head *ref_root_bh = NULL; 2163 struct ocfs2_refcount_tree *tree; 2164 2165 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); 2166 2167 ret = ocfs2_get_refcount_block(inode, &ref_blkno); 2168 if (ret) { 2169 mlog_errno(ret); 2170 goto out; 2171 } 2172 2173 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, &tree); 2174 if (ret) { 2175 mlog_errno(ret); 2176 goto out; 2177 } 2178 2179 ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno, 2180 &ref_root_bh); 2181 if (ret) { 2182 mlog_errno(ret); 2183 goto out; 2184 } 2185 2186 ret = __ocfs2_decrease_refcount(handle, &tree->rf_ci, ref_root_bh, 2187 cpos, len, meta_ac, dealloc, delete); 2188 if (ret) 2189 mlog_errno(ret); 2190 out: 2191 brelse(ref_root_bh); 2192 return ret; 2193 } 2194 2195 /* 2196 * Mark the already-existing extent at cpos as refcounted for len clusters. 2197 * This adds the refcount extent flag. 2198 * 2199 * If the existing extent is larger than the request, initiate a 2200 * split. An attempt will be made at merging with adjacent extents. 2201 * 2202 * The caller is responsible for passing down meta_ac if we'll need it. 2203 */ 2204 static int ocfs2_mark_extent_refcounted(struct inode *inode, 2205 struct ocfs2_extent_tree *et, 2206 handle_t *handle, u32 cpos, 2207 u32 len, u32 phys, 2208 struct ocfs2_alloc_context *meta_ac, 2209 struct ocfs2_cached_dealloc_ctxt *dealloc) 2210 { 2211 int ret; 2212 2213 mlog(0, "Inode %lu refcount tree cpos %u, len %u, phys cluster %u\n", 2214 inode->i_ino, cpos, len, phys); 2215 2216 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { 2217 ocfs2_error(inode->i_sb, "Inode %lu want to use refcount " 2218 "tree, but the feature bit is not set in the " 2219 "super block.", inode->i_ino); 2220 ret = -EROFS; 2221 goto out; 2222 } 2223 2224 ret = ocfs2_change_extent_flag(handle, et, cpos, 2225 len, phys, meta_ac, dealloc, 2226 OCFS2_EXT_REFCOUNTED, 0); 2227 if (ret) 2228 mlog_errno(ret); 2229 2230 out: 2231 return ret; 2232 } 2233 2234 /* 2235 * Given some contiguous physical clusters, calculate what we need 2236 * for modifying their refcount. 2237 */ 2238 static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, 2239 struct ocfs2_caching_info *ci, 2240 struct buffer_head *ref_root_bh, 2241 u64 start_cpos, 2242 u32 clusters, 2243 int *meta_add, 2244 int *credits) 2245 { 2246 int ret = 0, index, ref_blocks = 0, recs_add = 0; 2247 u64 cpos = start_cpos; 2248 struct ocfs2_refcount_block *rb; 2249 struct ocfs2_refcount_rec rec; 2250 struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL; 2251 u32 len; 2252 2253 mlog(0, "start_cpos %llu, clusters %u\n", 2254 (unsigned long long)start_cpos, clusters); 2255 while (clusters) { 2256 ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 2257 cpos, clusters, &rec, 2258 &index, &ref_leaf_bh); 2259 if (ret) { 2260 mlog_errno(ret); 2261 goto out; 2262 } 2263 2264 if (ref_leaf_bh != prev_bh) { 2265 /* 2266 * Now we encounter a new leaf block, so calculate 2267 * whether we need to extend the old leaf. 2268 */ 2269 if (prev_bh) { 2270 rb = (struct ocfs2_refcount_block *) 2271 prev_bh->b_data; 2272 2273 if (le64_to_cpu(rb->rf_records.rl_used) + 2274 recs_add > 2275 le16_to_cpu(rb->rf_records.rl_count)) 2276 ref_blocks++; 2277 } 2278 2279 recs_add = 0; 2280 *credits += 1; 2281 brelse(prev_bh); 2282 prev_bh = ref_leaf_bh; 2283 get_bh(prev_bh); 2284 } 2285 2286 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 2287 2288 mlog(0, "recs_add %d,cpos %llu, clusters %u, rec->r_cpos %llu," 2289 "rec->r_clusters %u, rec->r_refcount %u, index %d\n", 2290 recs_add, (unsigned long long)cpos, clusters, 2291 (unsigned long long)le64_to_cpu(rec.r_cpos), 2292 le32_to_cpu(rec.r_clusters), 2293 le32_to_cpu(rec.r_refcount), index); 2294 2295 len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + 2296 le32_to_cpu(rec.r_clusters)) - cpos; 2297 /* 2298 * If the refcount rec already exist, cool. We just need 2299 * to check whether there is a split. Otherwise we just need 2300 * to increase the refcount. 2301 * If we will insert one, increases recs_add. 2302 * 2303 * We record all the records which will be inserted to the 2304 * same refcount block, so that we can tell exactly whether 2305 * we need a new refcount block or not. 2306 */ 2307 if (rec.r_refcount) { 2308 /* Check whether we need a split at the beginning. */ 2309 if (cpos == start_cpos && 2310 cpos != le64_to_cpu(rec.r_cpos)) 2311 recs_add++; 2312 2313 /* Check whether we need a split in the end. */ 2314 if (cpos + clusters < le64_to_cpu(rec.r_cpos) + 2315 le32_to_cpu(rec.r_clusters)) 2316 recs_add++; 2317 } else 2318 recs_add++; 2319 2320 brelse(ref_leaf_bh); 2321 ref_leaf_bh = NULL; 2322 clusters -= len; 2323 cpos += len; 2324 } 2325 2326 if (prev_bh) { 2327 rb = (struct ocfs2_refcount_block *)prev_bh->b_data; 2328 2329 if (le64_to_cpu(rb->rf_records.rl_used) + recs_add > 2330 le16_to_cpu(rb->rf_records.rl_count)) 2331 ref_blocks++; 2332 2333 *credits += 1; 2334 } 2335 2336 if (!ref_blocks) 2337 goto out; 2338 2339 mlog(0, "we need ref_blocks %d\n", ref_blocks); 2340 *meta_add += ref_blocks; 2341 *credits += ref_blocks; 2342 2343 /* 2344 * So we may need ref_blocks to insert into the tree. 2345 * That also means we need to change the b-tree and add that number 2346 * of records since we never merge them. 2347 * We need one more block for expansion since the new created leaf 2348 * block is also full and needs split. 2349 */ 2350 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 2351 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) { 2352 struct ocfs2_extent_tree et; 2353 2354 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); 2355 *meta_add += ocfs2_extend_meta_needed(et.et_root_el); 2356 *credits += ocfs2_calc_extend_credits(sb, 2357 et.et_root_el, 2358 ref_blocks); 2359 } else { 2360 *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 2361 *meta_add += 1; 2362 } 2363 2364 out: 2365 brelse(ref_leaf_bh); 2366 brelse(prev_bh); 2367 return ret; 2368 } 2369 2370 /* 2371 * For refcount tree, we will decrease some contiguous clusters 2372 * refcount count, so just go through it to see how many blocks 2373 * we gonna touch and whether we need to create new blocks. 2374 * 2375 * Normally the refcount blocks store these refcount should be 2376 * continguous also, so that we can get the number easily. 2377 * As for meta_ac, we will at most add split 2 refcount record and 2378 * 2 more refcount block, so just check it in a rough way. 2379 * 2380 * Caller must hold refcount tree lock. 2381 */ 2382 int ocfs2_prepare_refcount_change_for_del(struct inode *inode, 2383 struct buffer_head *di_bh, 2384 u64 phys_blkno, 2385 u32 clusters, 2386 int *credits, 2387 struct ocfs2_alloc_context **meta_ac) 2388 { 2389 int ret, ref_blocks = 0; 2390 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2391 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2392 struct buffer_head *ref_root_bh = NULL; 2393 struct ocfs2_refcount_tree *tree; 2394 u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno); 2395 2396 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { 2397 ocfs2_error(inode->i_sb, "Inode %lu want to use refcount " 2398 "tree, but the feature bit is not set in the " 2399 "super block.", inode->i_ino); 2400 ret = -EROFS; 2401 goto out; 2402 } 2403 2404 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); 2405 2406 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), 2407 le64_to_cpu(di->i_refcount_loc), &tree); 2408 if (ret) { 2409 mlog_errno(ret); 2410 goto out; 2411 } 2412 2413 ret = ocfs2_read_refcount_block(&tree->rf_ci, 2414 le64_to_cpu(di->i_refcount_loc), 2415 &ref_root_bh); 2416 if (ret) { 2417 mlog_errno(ret); 2418 goto out; 2419 } 2420 2421 ret = ocfs2_calc_refcount_meta_credits(inode->i_sb, 2422 &tree->rf_ci, 2423 ref_root_bh, 2424 start_cpos, clusters, 2425 &ref_blocks, credits); 2426 if (ret) { 2427 mlog_errno(ret); 2428 goto out; 2429 } 2430 2431 mlog(0, "reserve new metadata %d, credits = %d\n", 2432 ref_blocks, *credits); 2433 2434 if (ref_blocks) { 2435 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2436 ref_blocks, meta_ac); 2437 if (ret) 2438 mlog_errno(ret); 2439 } 2440 2441 out: 2442 brelse(ref_root_bh); 2443 return ret; 2444 } 2445 2446 #define MAX_CONTIG_BYTES 1048576 2447 2448 static inline unsigned int ocfs2_cow_contig_clusters(struct super_block *sb) 2449 { 2450 return ocfs2_clusters_for_bytes(sb, MAX_CONTIG_BYTES); 2451 } 2452 2453 static inline unsigned int ocfs2_cow_contig_mask(struct super_block *sb) 2454 { 2455 return ~(ocfs2_cow_contig_clusters(sb) - 1); 2456 } 2457 2458 /* 2459 * Given an extent that starts at 'start' and an I/O that starts at 'cpos', 2460 * find an offset (start + (n * contig_clusters)) that is closest to cpos 2461 * while still being less than or equal to it. 2462 * 2463 * The goal is to break the extent at a multiple of contig_clusters. 2464 */ 2465 static inline unsigned int ocfs2_cow_align_start(struct super_block *sb, 2466 unsigned int start, 2467 unsigned int cpos) 2468 { 2469 BUG_ON(start > cpos); 2470 2471 return start + ((cpos - start) & ocfs2_cow_contig_mask(sb)); 2472 } 2473 2474 /* 2475 * Given a cluster count of len, pad it out so that it is a multiple 2476 * of contig_clusters. 2477 */ 2478 static inline unsigned int ocfs2_cow_align_length(struct super_block *sb, 2479 unsigned int len) 2480 { 2481 unsigned int padded = 2482 (len + (ocfs2_cow_contig_clusters(sb) - 1)) & 2483 ocfs2_cow_contig_mask(sb); 2484 2485 /* Did we wrap? */ 2486 if (padded < len) 2487 padded = UINT_MAX; 2488 2489 return padded; 2490 } 2491 2492 /* 2493 * Calculate out the start and number of virtual clusters we need to to CoW. 2494 * 2495 * cpos is vitual start cluster position we want to do CoW in a 2496 * file and write_len is the cluster length. 2497 * max_cpos is the place where we want to stop CoW intentionally. 2498 * 2499 * Normal we will start CoW from the beginning of extent record cotaining cpos. 2500 * We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we 2501 * get good I/O from the resulting extent tree. 2502 */ 2503 static int ocfs2_refcount_cal_cow_clusters(struct inode *inode, 2504 struct ocfs2_extent_list *el, 2505 u32 cpos, 2506 u32 write_len, 2507 u32 max_cpos, 2508 u32 *cow_start, 2509 u32 *cow_len) 2510 { 2511 int ret = 0; 2512 int tree_height = le16_to_cpu(el->l_tree_depth), i; 2513 struct buffer_head *eb_bh = NULL; 2514 struct ocfs2_extent_block *eb = NULL; 2515 struct ocfs2_extent_rec *rec; 2516 unsigned int want_clusters, rec_end = 0; 2517 int contig_clusters = ocfs2_cow_contig_clusters(inode->i_sb); 2518 int leaf_clusters; 2519 2520 BUG_ON(cpos + write_len > max_cpos); 2521 2522 if (tree_height > 0) { 2523 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, cpos, &eb_bh); 2524 if (ret) { 2525 mlog_errno(ret); 2526 goto out; 2527 } 2528 2529 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 2530 el = &eb->h_list; 2531 2532 if (el->l_tree_depth) { 2533 ocfs2_error(inode->i_sb, 2534 "Inode %lu has non zero tree depth in " 2535 "leaf block %llu\n", inode->i_ino, 2536 (unsigned long long)eb_bh->b_blocknr); 2537 ret = -EROFS; 2538 goto out; 2539 } 2540 } 2541 2542 *cow_len = 0; 2543 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 2544 rec = &el->l_recs[i]; 2545 2546 if (ocfs2_is_empty_extent(rec)) { 2547 mlog_bug_on_msg(i != 0, "Inode %lu has empty record in " 2548 "index %d\n", inode->i_ino, i); 2549 continue; 2550 } 2551 2552 if (le32_to_cpu(rec->e_cpos) + 2553 le16_to_cpu(rec->e_leaf_clusters) <= cpos) 2554 continue; 2555 2556 if (*cow_len == 0) { 2557 /* 2558 * We should find a refcounted record in the 2559 * first pass. 2560 */ 2561 BUG_ON(!(rec->e_flags & OCFS2_EXT_REFCOUNTED)); 2562 *cow_start = le32_to_cpu(rec->e_cpos); 2563 } 2564 2565 /* 2566 * If we encounter a hole, a non-refcounted record or 2567 * pass the max_cpos, stop the search. 2568 */ 2569 if ((!(rec->e_flags & OCFS2_EXT_REFCOUNTED)) || 2570 (*cow_len && rec_end != le32_to_cpu(rec->e_cpos)) || 2571 (max_cpos <= le32_to_cpu(rec->e_cpos))) 2572 break; 2573 2574 leaf_clusters = le16_to_cpu(rec->e_leaf_clusters); 2575 rec_end = le32_to_cpu(rec->e_cpos) + leaf_clusters; 2576 if (rec_end > max_cpos) { 2577 rec_end = max_cpos; 2578 leaf_clusters = rec_end - le32_to_cpu(rec->e_cpos); 2579 } 2580 2581 /* 2582 * How many clusters do we actually need from 2583 * this extent? First we see how many we actually 2584 * need to complete the write. If that's smaller 2585 * than contig_clusters, we try for contig_clusters. 2586 */ 2587 if (!*cow_len) 2588 want_clusters = write_len; 2589 else 2590 want_clusters = (cpos + write_len) - 2591 (*cow_start + *cow_len); 2592 if (want_clusters < contig_clusters) 2593 want_clusters = contig_clusters; 2594 2595 /* 2596 * If the write does not cover the whole extent, we 2597 * need to calculate how we're going to split the extent. 2598 * We try to do it on contig_clusters boundaries. 2599 * 2600 * Any extent smaller than contig_clusters will be 2601 * CoWed in its entirety. 2602 */ 2603 if (leaf_clusters <= contig_clusters) 2604 *cow_len += leaf_clusters; 2605 else if (*cow_len || (*cow_start == cpos)) { 2606 /* 2607 * This extent needs to be CoW'd from its 2608 * beginning, so all we have to do is compute 2609 * how many clusters to grab. We align 2610 * want_clusters to the edge of contig_clusters 2611 * to get better I/O. 2612 */ 2613 want_clusters = ocfs2_cow_align_length(inode->i_sb, 2614 want_clusters); 2615 2616 if (leaf_clusters < want_clusters) 2617 *cow_len += leaf_clusters; 2618 else 2619 *cow_len += want_clusters; 2620 } else if ((*cow_start + contig_clusters) >= 2621 (cpos + write_len)) { 2622 /* 2623 * Breaking off contig_clusters at the front 2624 * of the extent will cover our write. That's 2625 * easy. 2626 */ 2627 *cow_len = contig_clusters; 2628 } else if ((rec_end - cpos) <= contig_clusters) { 2629 /* 2630 * Breaking off contig_clusters at the tail of 2631 * this extent will cover cpos. 2632 */ 2633 *cow_start = rec_end - contig_clusters; 2634 *cow_len = contig_clusters; 2635 } else if ((rec_end - cpos) <= want_clusters) { 2636 /* 2637 * While we can't fit the entire write in this 2638 * extent, we know that the write goes from cpos 2639 * to the end of the extent. Break that off. 2640 * We try to break it at some multiple of 2641 * contig_clusters from the front of the extent. 2642 * Failing that (ie, cpos is within 2643 * contig_clusters of the front), we'll CoW the 2644 * entire extent. 2645 */ 2646 *cow_start = ocfs2_cow_align_start(inode->i_sb, 2647 *cow_start, cpos); 2648 *cow_len = rec_end - *cow_start; 2649 } else { 2650 /* 2651 * Ok, the entire write lives in the middle of 2652 * this extent. Let's try to slice the extent up 2653 * nicely. Optimally, our CoW region starts at 2654 * m*contig_clusters from the beginning of the 2655 * extent and goes for n*contig_clusters, 2656 * covering the entire write. 2657 */ 2658 *cow_start = ocfs2_cow_align_start(inode->i_sb, 2659 *cow_start, cpos); 2660 2661 want_clusters = (cpos + write_len) - *cow_start; 2662 want_clusters = ocfs2_cow_align_length(inode->i_sb, 2663 want_clusters); 2664 if (*cow_start + want_clusters <= rec_end) 2665 *cow_len = want_clusters; 2666 else 2667 *cow_len = rec_end - *cow_start; 2668 } 2669 2670 /* Have we covered our entire write yet? */ 2671 if ((*cow_start + *cow_len) >= (cpos + write_len)) 2672 break; 2673 2674 /* 2675 * If we reach the end of the extent block and don't get enough 2676 * clusters, continue with the next extent block if possible. 2677 */ 2678 if (i + 1 == le16_to_cpu(el->l_next_free_rec) && 2679 eb && eb->h_next_leaf_blk) { 2680 brelse(eb_bh); 2681 eb_bh = NULL; 2682 2683 ret = ocfs2_read_extent_block(INODE_CACHE(inode), 2684 le64_to_cpu(eb->h_next_leaf_blk), 2685 &eb_bh); 2686 if (ret) { 2687 mlog_errno(ret); 2688 goto out; 2689 } 2690 2691 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 2692 el = &eb->h_list; 2693 i = -1; 2694 } 2695 } 2696 2697 out: 2698 brelse(eb_bh); 2699 return ret; 2700 } 2701 2702 /* 2703 * Prepare meta_ac, data_ac and calculate credits when we want to add some 2704 * num_clusters in data_tree "et" and change the refcount for the old 2705 * clusters(starting form p_cluster) in the refcount tree. 2706 * 2707 * Note: 2708 * 1. since we may split the old tree, so we at most will need num_clusters + 2 2709 * more new leaf records. 2710 * 2. In some case, we may not need to reserve new clusters(e.g, reflink), so 2711 * just give data_ac = NULL. 2712 */ 2713 static int ocfs2_lock_refcount_allocators(struct super_block *sb, 2714 u32 p_cluster, u32 num_clusters, 2715 struct ocfs2_extent_tree *et, 2716 struct ocfs2_caching_info *ref_ci, 2717 struct buffer_head *ref_root_bh, 2718 struct ocfs2_alloc_context **meta_ac, 2719 struct ocfs2_alloc_context **data_ac, 2720 int *credits) 2721 { 2722 int ret = 0, meta_add = 0; 2723 int num_free_extents = ocfs2_num_free_extents(OCFS2_SB(sb), et); 2724 2725 if (num_free_extents < 0) { 2726 ret = num_free_extents; 2727 mlog_errno(ret); 2728 goto out; 2729 } 2730 2731 if (num_free_extents < num_clusters + 2) 2732 meta_add = 2733 ocfs2_extend_meta_needed(et->et_root_el); 2734 2735 *credits += ocfs2_calc_extend_credits(sb, et->et_root_el, 2736 num_clusters + 2); 2737 2738 ret = ocfs2_calc_refcount_meta_credits(sb, ref_ci, ref_root_bh, 2739 p_cluster, num_clusters, 2740 &meta_add, credits); 2741 if (ret) { 2742 mlog_errno(ret); 2743 goto out; 2744 } 2745 2746 mlog(0, "reserve new metadata %d, clusters %u, credits = %d\n", 2747 meta_add, num_clusters, *credits); 2748 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(sb), meta_add, 2749 meta_ac); 2750 if (ret) { 2751 mlog_errno(ret); 2752 goto out; 2753 } 2754 2755 if (data_ac) { 2756 ret = ocfs2_reserve_clusters(OCFS2_SB(sb), num_clusters, 2757 data_ac); 2758 if (ret) 2759 mlog_errno(ret); 2760 } 2761 2762 out: 2763 if (ret) { 2764 if (*meta_ac) { 2765 ocfs2_free_alloc_context(*meta_ac); 2766 *meta_ac = NULL; 2767 } 2768 } 2769 2770 return ret; 2771 } 2772 2773 static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh) 2774 { 2775 BUG_ON(buffer_dirty(bh)); 2776 2777 clear_buffer_mapped(bh); 2778 2779 return 0; 2780 } 2781 2782 static int ocfs2_duplicate_clusters_by_page(handle_t *handle, 2783 struct ocfs2_cow_context *context, 2784 u32 cpos, u32 old_cluster, 2785 u32 new_cluster, u32 new_len) 2786 { 2787 int ret = 0, partial; 2788 struct ocfs2_caching_info *ci = context->data_et.et_ci; 2789 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 2790 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); 2791 struct page *page; 2792 pgoff_t page_index; 2793 unsigned int from, to; 2794 loff_t offset, end, map_end; 2795 struct address_space *mapping = context->inode->i_mapping; 2796 2797 mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, 2798 new_cluster, new_len, cpos); 2799 2800 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; 2801 end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); 2802 2803 while (offset < end) { 2804 page_index = offset >> PAGE_CACHE_SHIFT; 2805 map_end = (page_index + 1) << PAGE_CACHE_SHIFT; 2806 if (map_end > end) 2807 map_end = end; 2808 2809 /* from, to is the offset within the page. */ 2810 from = offset & (PAGE_CACHE_SIZE - 1); 2811 to = PAGE_CACHE_SIZE; 2812 if (map_end & (PAGE_CACHE_SIZE - 1)) 2813 to = map_end & (PAGE_CACHE_SIZE - 1); 2814 2815 page = grab_cache_page(mapping, page_index); 2816 2817 /* This page can't be dirtied before we CoW it out. */ 2818 BUG_ON(PageDirty(page)); 2819 2820 if (!PageUptodate(page)) { 2821 ret = block_read_full_page(page, ocfs2_get_block); 2822 if (ret) { 2823 mlog_errno(ret); 2824 goto unlock; 2825 } 2826 lock_page(page); 2827 } 2828 2829 if (page_has_buffers(page)) { 2830 ret = walk_page_buffers(handle, page_buffers(page), 2831 from, to, &partial, 2832 ocfs2_clear_cow_buffer); 2833 if (ret) { 2834 mlog_errno(ret); 2835 goto unlock; 2836 } 2837 } 2838 2839 ocfs2_map_and_dirty_page(context->inode, 2840 handle, from, to, 2841 page, 0, &new_block); 2842 mark_page_accessed(page); 2843 unlock: 2844 unlock_page(page); 2845 page_cache_release(page); 2846 page = NULL; 2847 offset = map_end; 2848 if (ret) 2849 break; 2850 } 2851 2852 return ret; 2853 } 2854 2855 static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, 2856 struct ocfs2_cow_context *context, 2857 u32 cpos, u32 old_cluster, 2858 u32 new_cluster, u32 new_len) 2859 { 2860 int ret = 0; 2861 struct super_block *sb = context->inode->i_sb; 2862 struct ocfs2_caching_info *ci = context->data_et.et_ci; 2863 int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); 2864 u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster); 2865 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); 2866 struct ocfs2_super *osb = OCFS2_SB(sb); 2867 struct buffer_head *old_bh = NULL; 2868 struct buffer_head *new_bh = NULL; 2869 2870 mlog(0, "old_cluster %u, new %u, len %u\n", old_cluster, 2871 new_cluster, new_len); 2872 2873 for (i = 0; i < blocks; i++, old_block++, new_block++) { 2874 new_bh = sb_getblk(osb->sb, new_block); 2875 if (new_bh == NULL) { 2876 ret = -EIO; 2877 mlog_errno(ret); 2878 break; 2879 } 2880 2881 ocfs2_set_new_buffer_uptodate(ci, new_bh); 2882 2883 ret = ocfs2_read_block(ci, old_block, &old_bh, NULL); 2884 if (ret) { 2885 mlog_errno(ret); 2886 break; 2887 } 2888 2889 ret = ocfs2_journal_access(handle, ci, new_bh, 2890 OCFS2_JOURNAL_ACCESS_CREATE); 2891 if (ret) { 2892 mlog_errno(ret); 2893 break; 2894 } 2895 2896 memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize); 2897 ret = ocfs2_journal_dirty(handle, new_bh); 2898 if (ret) { 2899 mlog_errno(ret); 2900 break; 2901 } 2902 2903 brelse(new_bh); 2904 brelse(old_bh); 2905 new_bh = NULL; 2906 old_bh = NULL; 2907 } 2908 2909 brelse(new_bh); 2910 brelse(old_bh); 2911 return ret; 2912 } 2913 2914 static int ocfs2_clear_ext_refcount(handle_t *handle, 2915 struct ocfs2_extent_tree *et, 2916 u32 cpos, u32 p_cluster, u32 len, 2917 unsigned int ext_flags, 2918 struct ocfs2_alloc_context *meta_ac, 2919 struct ocfs2_cached_dealloc_ctxt *dealloc) 2920 { 2921 int ret, index; 2922 struct ocfs2_extent_rec replace_rec; 2923 struct ocfs2_path *path = NULL; 2924 struct ocfs2_extent_list *el; 2925 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci); 2926 u64 ino = ocfs2_metadata_cache_owner(et->et_ci); 2927 2928 mlog(0, "inode %llu cpos %u, len %u, p_cluster %u, ext_flags %u\n", 2929 (unsigned long long)ino, cpos, len, p_cluster, ext_flags); 2930 2931 memset(&replace_rec, 0, sizeof(replace_rec)); 2932 replace_rec.e_cpos = cpu_to_le32(cpos); 2933 replace_rec.e_leaf_clusters = cpu_to_le16(len); 2934 replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(sb, 2935 p_cluster)); 2936 replace_rec.e_flags = ext_flags; 2937 replace_rec.e_flags &= ~OCFS2_EXT_REFCOUNTED; 2938 2939 path = ocfs2_new_path_from_et(et); 2940 if (!path) { 2941 ret = -ENOMEM; 2942 mlog_errno(ret); 2943 goto out; 2944 } 2945 2946 ret = ocfs2_find_path(et->et_ci, path, cpos); 2947 if (ret) { 2948 mlog_errno(ret); 2949 goto out; 2950 } 2951 2952 el = path_leaf_el(path); 2953 2954 index = ocfs2_search_extent_list(el, cpos); 2955 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 2956 ocfs2_error(sb, 2957 "Inode %llu has an extent at cpos %u which can no " 2958 "longer be found.\n", 2959 (unsigned long long)ino, cpos); 2960 ret = -EROFS; 2961 goto out; 2962 } 2963 2964 ret = ocfs2_split_extent(handle, et, path, index, 2965 &replace_rec, meta_ac, dealloc); 2966 if (ret) 2967 mlog_errno(ret); 2968 2969 out: 2970 ocfs2_free_path(path); 2971 return ret; 2972 } 2973 2974 static int ocfs2_replace_clusters(handle_t *handle, 2975 struct ocfs2_cow_context *context, 2976 u32 cpos, u32 old, 2977 u32 new, u32 len, 2978 unsigned int ext_flags) 2979 { 2980 int ret; 2981 struct ocfs2_caching_info *ci = context->data_et.et_ci; 2982 u64 ino = ocfs2_metadata_cache_owner(ci); 2983 2984 mlog(0, "inode %llu, cpos %u, old %u, new %u, len %u, ext_flags %u\n", 2985 (unsigned long long)ino, cpos, old, new, len, ext_flags); 2986 2987 /*If the old clusters is unwritten, no need to duplicate. */ 2988 if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { 2989 ret = context->cow_duplicate_clusters(handle, context, cpos, 2990 old, new, len); 2991 if (ret) { 2992 mlog_errno(ret); 2993 goto out; 2994 } 2995 } 2996 2997 ret = ocfs2_clear_ext_refcount(handle, &context->data_et, 2998 cpos, new, len, ext_flags, 2999 context->meta_ac, &context->dealloc); 3000 if (ret) 3001 mlog_errno(ret); 3002 out: 3003 return ret; 3004 } 3005 3006 static int ocfs2_cow_sync_writeback(struct super_block *sb, 3007 struct ocfs2_cow_context *context, 3008 u32 cpos, u32 num_clusters) 3009 { 3010 int ret = 0; 3011 loff_t offset, end, map_end; 3012 pgoff_t page_index; 3013 struct page *page; 3014 3015 if (ocfs2_should_order_data(context->inode)) 3016 return 0; 3017 3018 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; 3019 end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits); 3020 3021 ret = filemap_fdatawrite_range(context->inode->i_mapping, 3022 offset, end - 1); 3023 if (ret < 0) { 3024 mlog_errno(ret); 3025 return ret; 3026 } 3027 3028 while (offset < end) { 3029 page_index = offset >> PAGE_CACHE_SHIFT; 3030 map_end = (page_index + 1) << PAGE_CACHE_SHIFT; 3031 if (map_end > end) 3032 map_end = end; 3033 3034 page = grab_cache_page(context->inode->i_mapping, page_index); 3035 BUG_ON(!page); 3036 3037 wait_on_page_writeback(page); 3038 if (PageError(page)) { 3039 ret = -EIO; 3040 mlog_errno(ret); 3041 } else 3042 mark_page_accessed(page); 3043 3044 unlock_page(page); 3045 page_cache_release(page); 3046 page = NULL; 3047 offset = map_end; 3048 if (ret) 3049 break; 3050 } 3051 3052 return ret; 3053 } 3054 3055 static int ocfs2_di_get_clusters(struct ocfs2_cow_context *context, 3056 u32 v_cluster, u32 *p_cluster, 3057 u32 *num_clusters, 3058 unsigned int *extent_flags) 3059 { 3060 return ocfs2_get_clusters(context->inode, v_cluster, p_cluster, 3061 num_clusters, extent_flags); 3062 } 3063 3064 static int ocfs2_make_clusters_writable(struct super_block *sb, 3065 struct ocfs2_cow_context *context, 3066 u32 cpos, u32 p_cluster, 3067 u32 num_clusters, unsigned int e_flags) 3068 { 3069 int ret, delete, index, credits = 0; 3070 u32 new_bit, new_len; 3071 unsigned int set_len; 3072 struct ocfs2_super *osb = OCFS2_SB(sb); 3073 handle_t *handle; 3074 struct buffer_head *ref_leaf_bh = NULL; 3075 struct ocfs2_caching_info *ref_ci = &context->ref_tree->rf_ci; 3076 struct ocfs2_refcount_rec rec; 3077 3078 mlog(0, "cpos %u, p_cluster %u, num_clusters %u, e_flags %u\n", 3079 cpos, p_cluster, num_clusters, e_flags); 3080 3081 ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters, 3082 &context->data_et, 3083 ref_ci, 3084 context->ref_root_bh, 3085 &context->meta_ac, 3086 &context->data_ac, &credits); 3087 if (ret) { 3088 mlog_errno(ret); 3089 return ret; 3090 } 3091 3092 if (context->post_refcount) 3093 credits += context->post_refcount->credits; 3094 3095 credits += context->extra_credits; 3096 handle = ocfs2_start_trans(osb, credits); 3097 if (IS_ERR(handle)) { 3098 ret = PTR_ERR(handle); 3099 mlog_errno(ret); 3100 goto out; 3101 } 3102 3103 while (num_clusters) { 3104 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh, 3105 p_cluster, num_clusters, 3106 &rec, &index, &ref_leaf_bh); 3107 if (ret) { 3108 mlog_errno(ret); 3109 goto out_commit; 3110 } 3111 3112 BUG_ON(!rec.r_refcount); 3113 set_len = min((u64)p_cluster + num_clusters, 3114 le64_to_cpu(rec.r_cpos) + 3115 le32_to_cpu(rec.r_clusters)) - p_cluster; 3116 3117 /* 3118 * There are many different situation here. 3119 * 1. If refcount == 1, remove the flag and don't COW. 3120 * 2. If refcount > 1, allocate clusters. 3121 * Here we may not allocate r_len once at a time, so continue 3122 * until we reach num_clusters. 3123 */ 3124 if (le32_to_cpu(rec.r_refcount) == 1) { 3125 delete = 0; 3126 ret = ocfs2_clear_ext_refcount(handle, 3127 &context->data_et, 3128 cpos, p_cluster, 3129 set_len, e_flags, 3130 context->meta_ac, 3131 &context->dealloc); 3132 if (ret) { 3133 mlog_errno(ret); 3134 goto out_commit; 3135 } 3136 } else { 3137 delete = 1; 3138 3139 ret = __ocfs2_claim_clusters(osb, handle, 3140 context->data_ac, 3141 1, set_len, 3142 &new_bit, &new_len); 3143 if (ret) { 3144 mlog_errno(ret); 3145 goto out_commit; 3146 } 3147 3148 ret = ocfs2_replace_clusters(handle, context, 3149 cpos, p_cluster, new_bit, 3150 new_len, e_flags); 3151 if (ret) { 3152 mlog_errno(ret); 3153 goto out_commit; 3154 } 3155 set_len = new_len; 3156 } 3157 3158 ret = __ocfs2_decrease_refcount(handle, ref_ci, 3159 context->ref_root_bh, 3160 p_cluster, set_len, 3161 context->meta_ac, 3162 &context->dealloc, delete); 3163 if (ret) { 3164 mlog_errno(ret); 3165 goto out_commit; 3166 } 3167 3168 cpos += set_len; 3169 p_cluster += set_len; 3170 num_clusters -= set_len; 3171 brelse(ref_leaf_bh); 3172 ref_leaf_bh = NULL; 3173 } 3174 3175 /* handle any post_cow action. */ 3176 if (context->post_refcount && context->post_refcount->func) { 3177 ret = context->post_refcount->func(context->inode, handle, 3178 context->post_refcount->para); 3179 if (ret) { 3180 mlog_errno(ret); 3181 goto out_commit; 3182 } 3183 } 3184 3185 /* 3186 * Here we should write the new page out first if we are 3187 * in write-back mode. 3188 */ 3189 if (context->get_clusters == ocfs2_di_get_clusters) { 3190 ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters); 3191 if (ret) 3192 mlog_errno(ret); 3193 } 3194 3195 out_commit: 3196 ocfs2_commit_trans(osb, handle); 3197 3198 out: 3199 if (context->data_ac) { 3200 ocfs2_free_alloc_context(context->data_ac); 3201 context->data_ac = NULL; 3202 } 3203 if (context->meta_ac) { 3204 ocfs2_free_alloc_context(context->meta_ac); 3205 context->meta_ac = NULL; 3206 } 3207 brelse(ref_leaf_bh); 3208 3209 return ret; 3210 } 3211 3212 static int ocfs2_replace_cow(struct ocfs2_cow_context *context) 3213 { 3214 int ret = 0; 3215 struct inode *inode = context->inode; 3216 u32 cow_start = context->cow_start, cow_len = context->cow_len; 3217 u32 p_cluster, num_clusters; 3218 unsigned int ext_flags; 3219 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3220 3221 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { 3222 ocfs2_error(inode->i_sb, "Inode %lu want to use refcount " 3223 "tree, but the feature bit is not set in the " 3224 "super block.", inode->i_ino); 3225 return -EROFS; 3226 } 3227 3228 ocfs2_init_dealloc_ctxt(&context->dealloc); 3229 3230 while (cow_len) { 3231 ret = context->get_clusters(context, cow_start, &p_cluster, 3232 &num_clusters, &ext_flags); 3233 if (ret) { 3234 mlog_errno(ret); 3235 break; 3236 } 3237 3238 BUG_ON(!(ext_flags & OCFS2_EXT_REFCOUNTED)); 3239 3240 if (cow_len < num_clusters) 3241 num_clusters = cow_len; 3242 3243 ret = ocfs2_make_clusters_writable(inode->i_sb, context, 3244 cow_start, p_cluster, 3245 num_clusters, ext_flags); 3246 if (ret) { 3247 mlog_errno(ret); 3248 break; 3249 } 3250 3251 cow_len -= num_clusters; 3252 cow_start += num_clusters; 3253 } 3254 3255 if (ocfs2_dealloc_has_cluster(&context->dealloc)) { 3256 ocfs2_schedule_truncate_log_flush(osb, 1); 3257 ocfs2_run_deallocs(osb, &context->dealloc); 3258 } 3259 3260 return ret; 3261 } 3262 3263 /* 3264 * Starting at cpos, try to CoW write_len clusters. Don't CoW 3265 * past max_cpos. This will stop when it runs into a hole or an 3266 * unrefcounted extent. 3267 */ 3268 static int ocfs2_refcount_cow_hunk(struct inode *inode, 3269 struct buffer_head *di_bh, 3270 u32 cpos, u32 write_len, u32 max_cpos) 3271 { 3272 int ret; 3273 u32 cow_start = 0, cow_len = 0; 3274 struct ocfs2_inode_info *oi = OCFS2_I(inode); 3275 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3276 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 3277 struct buffer_head *ref_root_bh = NULL; 3278 struct ocfs2_refcount_tree *ref_tree; 3279 struct ocfs2_cow_context *context = NULL; 3280 3281 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); 3282 3283 ret = ocfs2_refcount_cal_cow_clusters(inode, &di->id2.i_list, 3284 cpos, write_len, max_cpos, 3285 &cow_start, &cow_len); 3286 if (ret) { 3287 mlog_errno(ret); 3288 goto out; 3289 } 3290 3291 mlog(0, "CoW inode %lu, cpos %u, write_len %u, cow_start %u, " 3292 "cow_len %u\n", inode->i_ino, 3293 cpos, write_len, cow_start, cow_len); 3294 3295 BUG_ON(cow_len == 0); 3296 3297 context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); 3298 if (!context) { 3299 ret = -ENOMEM; 3300 mlog_errno(ret); 3301 goto out; 3302 } 3303 3304 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 3305 1, &ref_tree, &ref_root_bh); 3306 if (ret) { 3307 mlog_errno(ret); 3308 goto out; 3309 } 3310 3311 context->inode = inode; 3312 context->cow_start = cow_start; 3313 context->cow_len = cow_len; 3314 context->ref_tree = ref_tree; 3315 context->ref_root_bh = ref_root_bh; 3316 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; 3317 context->get_clusters = ocfs2_di_get_clusters; 3318 3319 ocfs2_init_dinode_extent_tree(&context->data_et, 3320 INODE_CACHE(inode), di_bh); 3321 3322 ret = ocfs2_replace_cow(context); 3323 if (ret) 3324 mlog_errno(ret); 3325 3326 /* 3327 * truncate the extent map here since no matter whether we meet with 3328 * any error during the action, we shouldn't trust cached extent map 3329 * any more. 3330 */ 3331 ocfs2_extent_map_trunc(inode, cow_start); 3332 3333 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3334 brelse(ref_root_bh); 3335 out: 3336 kfree(context); 3337 return ret; 3338 } 3339 3340 /* 3341 * CoW any and all clusters between cpos and cpos+write_len. 3342 * Don't CoW past max_cpos. If this returns successfully, all 3343 * clusters between cpos and cpos+write_len are safe to modify. 3344 */ 3345 int ocfs2_refcount_cow(struct inode *inode, 3346 struct buffer_head *di_bh, 3347 u32 cpos, u32 write_len, u32 max_cpos) 3348 { 3349 int ret = 0; 3350 u32 p_cluster, num_clusters; 3351 unsigned int ext_flags; 3352 3353 while (write_len) { 3354 ret = ocfs2_get_clusters(inode, cpos, &p_cluster, 3355 &num_clusters, &ext_flags); 3356 if (ret) { 3357 mlog_errno(ret); 3358 break; 3359 } 3360 3361 if (write_len < num_clusters) 3362 num_clusters = write_len; 3363 3364 if (ext_flags & OCFS2_EXT_REFCOUNTED) { 3365 ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, 3366 num_clusters, max_cpos); 3367 if (ret) { 3368 mlog_errno(ret); 3369 break; 3370 } 3371 } 3372 3373 write_len -= num_clusters; 3374 cpos += num_clusters; 3375 } 3376 3377 return ret; 3378 } 3379 3380 static int ocfs2_xattr_value_get_clusters(struct ocfs2_cow_context *context, 3381 u32 v_cluster, u32 *p_cluster, 3382 u32 *num_clusters, 3383 unsigned int *extent_flags) 3384 { 3385 struct inode *inode = context->inode; 3386 struct ocfs2_xattr_value_root *xv = context->cow_object; 3387 3388 return ocfs2_xattr_get_clusters(inode, v_cluster, p_cluster, 3389 num_clusters, &xv->xr_list, 3390 extent_flags); 3391 } 3392 3393 /* 3394 * Given a xattr value root, calculate the most meta/credits we need for 3395 * refcount tree change if we truncate it to 0. 3396 */ 3397 int ocfs2_refcounted_xattr_delete_need(struct inode *inode, 3398 struct ocfs2_caching_info *ref_ci, 3399 struct buffer_head *ref_root_bh, 3400 struct ocfs2_xattr_value_root *xv, 3401 int *meta_add, int *credits) 3402 { 3403 int ret = 0, index, ref_blocks = 0; 3404 u32 p_cluster, num_clusters; 3405 u32 cpos = 0, clusters = le32_to_cpu(xv->xr_clusters); 3406 struct ocfs2_refcount_block *rb; 3407 struct ocfs2_refcount_rec rec; 3408 struct buffer_head *ref_leaf_bh = NULL; 3409 3410 while (cpos < clusters) { 3411 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 3412 &num_clusters, &xv->xr_list, 3413 NULL); 3414 if (ret) { 3415 mlog_errno(ret); 3416 goto out; 3417 } 3418 3419 cpos += num_clusters; 3420 3421 while (num_clusters) { 3422 ret = ocfs2_get_refcount_rec(ref_ci, ref_root_bh, 3423 p_cluster, num_clusters, 3424 &rec, &index, 3425 &ref_leaf_bh); 3426 if (ret) { 3427 mlog_errno(ret); 3428 goto out; 3429 } 3430 3431 BUG_ON(!rec.r_refcount); 3432 3433 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 3434 3435 /* 3436 * We really don't know whether the other clusters is in 3437 * this refcount block or not, so just take the worst 3438 * case that all the clusters are in this block and each 3439 * one will split a refcount rec, so totally we need 3440 * clusters * 2 new refcount rec. 3441 */ 3442 if (le64_to_cpu(rb->rf_records.rl_used) + clusters * 2 > 3443 le16_to_cpu(rb->rf_records.rl_count)) 3444 ref_blocks++; 3445 3446 *credits += 1; 3447 brelse(ref_leaf_bh); 3448 ref_leaf_bh = NULL; 3449 3450 if (num_clusters <= le32_to_cpu(rec.r_clusters)) 3451 break; 3452 else 3453 num_clusters -= le32_to_cpu(rec.r_clusters); 3454 p_cluster += num_clusters; 3455 } 3456 } 3457 3458 *meta_add += ref_blocks; 3459 if (!ref_blocks) 3460 goto out; 3461 3462 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 3463 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) 3464 *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 3465 else { 3466 struct ocfs2_extent_tree et; 3467 3468 ocfs2_init_refcount_extent_tree(&et, ref_ci, ref_root_bh); 3469 *credits += ocfs2_calc_extend_credits(inode->i_sb, 3470 et.et_root_el, 3471 ref_blocks); 3472 } 3473 3474 out: 3475 brelse(ref_leaf_bh); 3476 return ret; 3477 } 3478 3479 /* 3480 * Do CoW for xattr. 3481 */ 3482 int ocfs2_refcount_cow_xattr(struct inode *inode, 3483 struct ocfs2_dinode *di, 3484 struct ocfs2_xattr_value_buf *vb, 3485 struct ocfs2_refcount_tree *ref_tree, 3486 struct buffer_head *ref_root_bh, 3487 u32 cpos, u32 write_len, 3488 struct ocfs2_post_refcount *post) 3489 { 3490 int ret; 3491 struct ocfs2_xattr_value_root *xv = vb->vb_xv; 3492 struct ocfs2_inode_info *oi = OCFS2_I(inode); 3493 struct ocfs2_cow_context *context = NULL; 3494 u32 cow_start, cow_len; 3495 3496 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); 3497 3498 ret = ocfs2_refcount_cal_cow_clusters(inode, &xv->xr_list, 3499 cpos, write_len, UINT_MAX, 3500 &cow_start, &cow_len); 3501 if (ret) { 3502 mlog_errno(ret); 3503 goto out; 3504 } 3505 3506 BUG_ON(cow_len == 0); 3507 3508 context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); 3509 if (!context) { 3510 ret = -ENOMEM; 3511 mlog_errno(ret); 3512 goto out; 3513 } 3514 3515 context->inode = inode; 3516 context->cow_start = cow_start; 3517 context->cow_len = cow_len; 3518 context->ref_tree = ref_tree; 3519 context->ref_root_bh = ref_root_bh;; 3520 context->cow_object = xv; 3521 3522 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd; 3523 /* We need the extra credits for duplicate_clusters by jbd. */ 3524 context->extra_credits = 3525 ocfs2_clusters_to_blocks(inode->i_sb, 1) * cow_len; 3526 context->get_clusters = ocfs2_xattr_value_get_clusters; 3527 context->post_refcount = post; 3528 3529 ocfs2_init_xattr_value_extent_tree(&context->data_et, 3530 INODE_CACHE(inode), vb); 3531 3532 ret = ocfs2_replace_cow(context); 3533 if (ret) 3534 mlog_errno(ret); 3535 3536 out: 3537 kfree(context); 3538 return ret; 3539 } 3540 3541 /* 3542 * Insert a new extent into refcount tree and mark a extent rec 3543 * as refcounted in the dinode tree. 3544 */ 3545 int ocfs2_add_refcount_flag(struct inode *inode, 3546 struct ocfs2_extent_tree *data_et, 3547 struct ocfs2_caching_info *ref_ci, 3548 struct buffer_head *ref_root_bh, 3549 u32 cpos, u32 p_cluster, u32 num_clusters, 3550 struct ocfs2_cached_dealloc_ctxt *dealloc, 3551 struct ocfs2_post_refcount *post) 3552 { 3553 int ret; 3554 handle_t *handle; 3555 int credits = 1, ref_blocks = 0; 3556 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3557 struct ocfs2_alloc_context *meta_ac = NULL; 3558 3559 ret = ocfs2_calc_refcount_meta_credits(inode->i_sb, 3560 ref_ci, ref_root_bh, 3561 p_cluster, num_clusters, 3562 &ref_blocks, &credits); 3563 if (ret) { 3564 mlog_errno(ret); 3565 goto out; 3566 } 3567 3568 mlog(0, "reserve new metadata %d, credits = %d\n", 3569 ref_blocks, credits); 3570 3571 if (ref_blocks) { 3572 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 3573 ref_blocks, &meta_ac); 3574 if (ret) { 3575 mlog_errno(ret); 3576 goto out; 3577 } 3578 } 3579 3580 if (post) 3581 credits += post->credits; 3582 3583 handle = ocfs2_start_trans(osb, credits); 3584 if (IS_ERR(handle)) { 3585 ret = PTR_ERR(handle); 3586 mlog_errno(ret); 3587 goto out; 3588 } 3589 3590 ret = ocfs2_mark_extent_refcounted(inode, data_et, handle, 3591 cpos, num_clusters, p_cluster, 3592 meta_ac, dealloc); 3593 if (ret) { 3594 mlog_errno(ret); 3595 goto out_commit; 3596 } 3597 3598 ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh, 3599 p_cluster, num_clusters, 3600 meta_ac, dealloc); 3601 if (ret) { 3602 mlog_errno(ret); 3603 goto out_commit; 3604 } 3605 3606 if (post && post->func) { 3607 ret = post->func(inode, handle, post->para); 3608 if (ret) 3609 mlog_errno(ret); 3610 } 3611 3612 out_commit: 3613 ocfs2_commit_trans(osb, handle); 3614 out: 3615 if (meta_ac) 3616 ocfs2_free_alloc_context(meta_ac); 3617 return ret; 3618 } 3619 3620 static int ocfs2_change_ctime(struct inode *inode, 3621 struct buffer_head *di_bh) 3622 { 3623 int ret; 3624 handle_t *handle; 3625 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 3626 3627 handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), 3628 OCFS2_INODE_UPDATE_CREDITS); 3629 if (IS_ERR(handle)) { 3630 ret = PTR_ERR(handle); 3631 mlog_errno(ret); 3632 goto out; 3633 } 3634 3635 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 3636 OCFS2_JOURNAL_ACCESS_WRITE); 3637 if (ret) { 3638 mlog_errno(ret); 3639 goto out_commit; 3640 } 3641 3642 inode->i_ctime = CURRENT_TIME; 3643 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 3644 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 3645 3646 ocfs2_journal_dirty(handle, di_bh); 3647 3648 out_commit: 3649 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 3650 out: 3651 return ret; 3652 } 3653 3654 static int ocfs2_attach_refcount_tree(struct inode *inode, 3655 struct buffer_head *di_bh) 3656 { 3657 int ret, data_changed = 0; 3658 struct buffer_head *ref_root_bh = NULL; 3659 struct ocfs2_inode_info *oi = OCFS2_I(inode); 3660 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 3661 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3662 struct ocfs2_refcount_tree *ref_tree; 3663 unsigned int ext_flags; 3664 loff_t size; 3665 u32 cpos, num_clusters, clusters, p_cluster; 3666 struct ocfs2_cached_dealloc_ctxt dealloc; 3667 struct ocfs2_extent_tree di_et; 3668 3669 ocfs2_init_dealloc_ctxt(&dealloc); 3670 3671 if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) { 3672 ret = ocfs2_create_refcount_tree(inode, di_bh); 3673 if (ret) { 3674 mlog_errno(ret); 3675 goto out; 3676 } 3677 } 3678 3679 BUG_ON(!di->i_refcount_loc); 3680 ret = ocfs2_lock_refcount_tree(osb, 3681 le64_to_cpu(di->i_refcount_loc), 1, 3682 &ref_tree, &ref_root_bh); 3683 if (ret) { 3684 mlog_errno(ret); 3685 goto out; 3686 } 3687 3688 ocfs2_init_dinode_extent_tree(&di_et, INODE_CACHE(inode), di_bh); 3689 3690 size = i_size_read(inode); 3691 clusters = ocfs2_clusters_for_bytes(inode->i_sb, size); 3692 3693 cpos = 0; 3694 while (cpos < clusters) { 3695 ret = ocfs2_get_clusters(inode, cpos, &p_cluster, 3696 &num_clusters, &ext_flags); 3697 3698 if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) { 3699 ret = ocfs2_add_refcount_flag(inode, &di_et, 3700 &ref_tree->rf_ci, 3701 ref_root_bh, cpos, 3702 p_cluster, num_clusters, 3703 &dealloc, NULL); 3704 if (ret) { 3705 mlog_errno(ret); 3706 goto unlock; 3707 } 3708 3709 data_changed = 1; 3710 } 3711 cpos += num_clusters; 3712 } 3713 3714 if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) { 3715 ret = ocfs2_xattr_attach_refcount_tree(inode, di_bh, 3716 &ref_tree->rf_ci, 3717 ref_root_bh, 3718 &dealloc); 3719 if (ret) { 3720 mlog_errno(ret); 3721 goto unlock; 3722 } 3723 } 3724 3725 if (data_changed) { 3726 ret = ocfs2_change_ctime(inode, di_bh); 3727 if (ret) 3728 mlog_errno(ret); 3729 } 3730 3731 unlock: 3732 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3733 brelse(ref_root_bh); 3734 3735 if (!ret && ocfs2_dealloc_has_cluster(&dealloc)) { 3736 ocfs2_schedule_truncate_log_flush(osb, 1); 3737 ocfs2_run_deallocs(osb, &dealloc); 3738 } 3739 out: 3740 /* 3741 * Empty the extent map so that we may get the right extent 3742 * record from the disk. 3743 */ 3744 ocfs2_extent_map_trunc(inode, 0); 3745 3746 return ret; 3747 } 3748 3749 static int ocfs2_add_refcounted_extent(struct inode *inode, 3750 struct ocfs2_extent_tree *et, 3751 struct ocfs2_caching_info *ref_ci, 3752 struct buffer_head *ref_root_bh, 3753 u32 cpos, u32 p_cluster, u32 num_clusters, 3754 unsigned int ext_flags, 3755 struct ocfs2_cached_dealloc_ctxt *dealloc) 3756 { 3757 int ret; 3758 handle_t *handle; 3759 int credits = 0; 3760 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3761 struct ocfs2_alloc_context *meta_ac = NULL; 3762 3763 ret = ocfs2_lock_refcount_allocators(inode->i_sb, 3764 p_cluster, num_clusters, 3765 et, ref_ci, 3766 ref_root_bh, &meta_ac, 3767 NULL, &credits); 3768 if (ret) { 3769 mlog_errno(ret); 3770 goto out; 3771 } 3772 3773 handle = ocfs2_start_trans(osb, credits); 3774 if (IS_ERR(handle)) { 3775 ret = PTR_ERR(handle); 3776 mlog_errno(ret); 3777 goto out; 3778 } 3779 3780 ret = ocfs2_insert_extent(handle, et, cpos, 3781 cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb, 3782 p_cluster)), 3783 num_clusters, ext_flags, meta_ac); 3784 if (ret) { 3785 mlog_errno(ret); 3786 goto out_commit; 3787 } 3788 3789 ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh, 3790 p_cluster, num_clusters, 3791 meta_ac, dealloc); 3792 if (ret) 3793 mlog_errno(ret); 3794 3795 out_commit: 3796 ocfs2_commit_trans(osb, handle); 3797 out: 3798 if (meta_ac) 3799 ocfs2_free_alloc_context(meta_ac); 3800 return ret; 3801 } 3802 3803 static int ocfs2_duplicate_extent_list(struct inode *s_inode, 3804 struct inode *t_inode, 3805 struct buffer_head *t_bh, 3806 struct ocfs2_caching_info *ref_ci, 3807 struct buffer_head *ref_root_bh, 3808 struct ocfs2_cached_dealloc_ctxt *dealloc) 3809 { 3810 int ret = 0; 3811 u32 p_cluster, num_clusters, clusters, cpos; 3812 loff_t size; 3813 unsigned int ext_flags; 3814 struct ocfs2_extent_tree et; 3815 3816 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(t_inode), t_bh); 3817 3818 size = i_size_read(s_inode); 3819 clusters = ocfs2_clusters_for_bytes(s_inode->i_sb, size); 3820 3821 cpos = 0; 3822 while (cpos < clusters) { 3823 ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster, 3824 &num_clusters, &ext_flags); 3825 3826 if (p_cluster) { 3827 ret = ocfs2_add_refcounted_extent(t_inode, &et, 3828 ref_ci, ref_root_bh, 3829 cpos, p_cluster, 3830 num_clusters, 3831 ext_flags, 3832 dealloc); 3833 if (ret) { 3834 mlog_errno(ret); 3835 goto out; 3836 } 3837 } 3838 3839 cpos += num_clusters; 3840 } 3841 3842 out: 3843 return ret; 3844 } 3845 3846 /* 3847 * change the new file's attributes to the src. 3848 * 3849 * reflink creates a snapshot of a file, that means the attributes 3850 * must be identical except for three exceptions - nlink, ino, and ctime. 3851 */ 3852 static int ocfs2_complete_reflink(struct inode *s_inode, 3853 struct buffer_head *s_bh, 3854 struct inode *t_inode, 3855 struct buffer_head *t_bh) 3856 { 3857 int ret; 3858 handle_t *handle; 3859 struct ocfs2_dinode *s_di = (struct ocfs2_dinode *)s_bh->b_data; 3860 struct ocfs2_dinode *di = (struct ocfs2_dinode *)t_bh->b_data; 3861 loff_t size = i_size_read(s_inode); 3862 3863 handle = ocfs2_start_trans(OCFS2_SB(t_inode->i_sb), 3864 OCFS2_INODE_UPDATE_CREDITS); 3865 if (IS_ERR(handle)) { 3866 ret = PTR_ERR(handle); 3867 mlog_errno(ret); 3868 return ret; 3869 } 3870 3871 ret = ocfs2_journal_access_di(handle, INODE_CACHE(t_inode), t_bh, 3872 OCFS2_JOURNAL_ACCESS_WRITE); 3873 if (ret) { 3874 mlog_errno(ret); 3875 goto out_commit; 3876 } 3877 3878 spin_lock(&OCFS2_I(t_inode)->ip_lock); 3879 OCFS2_I(t_inode)->ip_clusters = OCFS2_I(s_inode)->ip_clusters; 3880 OCFS2_I(t_inode)->ip_attr = OCFS2_I(s_inode)->ip_attr; 3881 OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features; 3882 spin_unlock(&OCFS2_I(t_inode)->ip_lock); 3883 i_size_write(t_inode, size); 3884 3885 di->i_xattr_inline_size = s_di->i_xattr_inline_size; 3886 di->i_clusters = s_di->i_clusters; 3887 di->i_size = s_di->i_size; 3888 di->i_dyn_features = s_di->i_dyn_features; 3889 di->i_attr = s_di->i_attr; 3890 di->i_uid = s_di->i_uid; 3891 di->i_gid = s_di->i_gid; 3892 di->i_mode = s_di->i_mode; 3893 3894 /* 3895 * update time. 3896 * we want mtime to appear identical to the source and update ctime. 3897 */ 3898 t_inode->i_ctime = CURRENT_TIME; 3899 3900 di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec); 3901 di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec); 3902 3903 t_inode->i_mtime = s_inode->i_mtime; 3904 di->i_mtime = s_di->i_mtime; 3905 di->i_mtime_nsec = s_di->i_mtime_nsec; 3906 3907 ocfs2_journal_dirty(handle, t_bh); 3908 3909 out_commit: 3910 ocfs2_commit_trans(OCFS2_SB(t_inode->i_sb), handle); 3911 return ret; 3912 } 3913 3914 static int ocfs2_create_reflink_node(struct inode *s_inode, 3915 struct buffer_head *s_bh, 3916 struct inode *t_inode, 3917 struct buffer_head *t_bh) 3918 { 3919 int ret; 3920 struct buffer_head *ref_root_bh = NULL; 3921 struct ocfs2_cached_dealloc_ctxt dealloc; 3922 struct ocfs2_super *osb = OCFS2_SB(s_inode->i_sb); 3923 struct ocfs2_refcount_block *rb; 3924 struct ocfs2_dinode *di = (struct ocfs2_dinode *)s_bh->b_data; 3925 struct ocfs2_refcount_tree *ref_tree; 3926 3927 ocfs2_init_dealloc_ctxt(&dealloc); 3928 3929 ret = ocfs2_set_refcount_tree(t_inode, t_bh, 3930 le64_to_cpu(di->i_refcount_loc)); 3931 if (ret) { 3932 mlog_errno(ret); 3933 goto out; 3934 } 3935 3936 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 3937 1, &ref_tree, &ref_root_bh); 3938 if (ret) { 3939 mlog_errno(ret); 3940 goto out; 3941 } 3942 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 3943 3944 ret = ocfs2_duplicate_extent_list(s_inode, t_inode, t_bh, 3945 &ref_tree->rf_ci, ref_root_bh, 3946 &dealloc); 3947 if (ret) { 3948 mlog_errno(ret); 3949 goto out_unlock_refcount; 3950 } 3951 3952 ret = ocfs2_complete_reflink(s_inode, s_bh, t_inode, t_bh); 3953 if (ret) 3954 mlog_errno(ret); 3955 3956 out_unlock_refcount: 3957 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3958 brelse(ref_root_bh); 3959 out: 3960 if (ocfs2_dealloc_has_cluster(&dealloc)) { 3961 ocfs2_schedule_truncate_log_flush(osb, 1); 3962 ocfs2_run_deallocs(osb, &dealloc); 3963 } 3964 3965 return ret; 3966 } 3967