1f2c870e3STao Ma /* -*- mode: c; c-basic-offset: 8; -*- 2f2c870e3STao Ma * vim: noexpandtab sw=8 ts=8 sts=0: 3f2c870e3STao Ma * 4f2c870e3STao Ma * refcounttree.c 5f2c870e3STao Ma * 6f2c870e3STao Ma * Copyright (C) 2009 Oracle. All rights reserved. 7f2c870e3STao Ma * 8f2c870e3STao Ma * This program is free software; you can redistribute it and/or 9f2c870e3STao Ma * modify it under the terms of the GNU General Public 10f2c870e3STao Ma * License version 2 as published by the Free Software Foundation. 11f2c870e3STao Ma * 12f2c870e3STao Ma * This program is distributed in the hope that it will be useful, 13f2c870e3STao Ma * but WITHOUT ANY WARRANTY; without even the implied warranty of 14f2c870e3STao Ma * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15f2c870e3STao Ma * General Public License for more details. 16f2c870e3STao Ma */ 17f2c870e3STao Ma 18e73a819dSTao Ma #include <linux/sort.h> 19f2c870e3STao Ma #define MLOG_MASK_PREFIX ML_REFCOUNT 20f2c870e3STao Ma #include <cluster/masklog.h> 21f2c870e3STao Ma #include "ocfs2.h" 22f2c870e3STao Ma #include "inode.h" 23f2c870e3STao Ma #include "alloc.h" 24f2c870e3STao Ma #include "suballoc.h" 25f2c870e3STao Ma #include "journal.h" 26f2c870e3STao Ma #include "uptodate.h" 27f2c870e3STao Ma #include "super.h" 28f2c870e3STao Ma #include "buffer_head_io.h" 29f2c870e3STao Ma #include "blockcheck.h" 30c732eb16STao Ma #include "refcounttree.h" 318bf396deSTao Ma #include "sysfile.h" 32374a263eSTao Ma #include "dlmglue.h" 33e73a819dSTao Ma #include "extent_map.h" 34c732eb16STao Ma 35c732eb16STao Ma static inline struct ocfs2_refcount_tree * 36c732eb16STao Ma cache_info_to_refcount(struct ocfs2_caching_info *ci) 37c732eb16STao Ma { 38c732eb16STao Ma return container_of(ci, struct ocfs2_refcount_tree, rf_ci); 39c732eb16STao Ma } 40f2c870e3STao Ma 41f2c870e3STao Ma static int ocfs2_validate_refcount_block(struct super_block *sb, 42f2c870e3STao Ma struct buffer_head *bh) 43f2c870e3STao Ma { 44f2c870e3STao Ma int rc; 45f2c870e3STao Ma struct ocfs2_refcount_block *rb = 46f2c870e3STao Ma (struct ocfs2_refcount_block *)bh->b_data; 47f2c870e3STao Ma 48f2c870e3STao Ma mlog(0, "Validating refcount block %llu\n", 49f2c870e3STao Ma (unsigned long long)bh->b_blocknr); 50f2c870e3STao Ma 51f2c870e3STao Ma BUG_ON(!buffer_uptodate(bh)); 52f2c870e3STao Ma 53f2c870e3STao Ma /* 54f2c870e3STao Ma * If the ecc fails, we return the error but otherwise 55f2c870e3STao Ma * leave the filesystem running. We know any error is 56f2c870e3STao Ma * local to this block. 57f2c870e3STao Ma */ 58f2c870e3STao Ma rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check); 59f2c870e3STao Ma if (rc) { 60f2c870e3STao Ma mlog(ML_ERROR, "Checksum failed for refcount block %llu\n", 61f2c870e3STao Ma (unsigned long long)bh->b_blocknr); 62f2c870e3STao Ma return rc; 63f2c870e3STao Ma } 64f2c870e3STao Ma 65f2c870e3STao Ma 66f2c870e3STao Ma if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) { 67f2c870e3STao Ma ocfs2_error(sb, 68f2c870e3STao Ma "Refcount block #%llu has bad signature %.*s", 69f2c870e3STao Ma (unsigned long long)bh->b_blocknr, 7, 70f2c870e3STao Ma rb->rf_signature); 71f2c870e3STao Ma return -EINVAL; 72f2c870e3STao Ma } 73f2c870e3STao Ma 74f2c870e3STao Ma if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) { 75f2c870e3STao Ma ocfs2_error(sb, 76f2c870e3STao Ma "Refcount block #%llu has an invalid rf_blkno " 77f2c870e3STao Ma "of %llu", 78f2c870e3STao Ma (unsigned long long)bh->b_blocknr, 79f2c870e3STao Ma (unsigned long long)le64_to_cpu(rb->rf_blkno)); 80f2c870e3STao Ma return -EINVAL; 81f2c870e3STao Ma } 82f2c870e3STao Ma 83f2c870e3STao Ma if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) { 84f2c870e3STao Ma ocfs2_error(sb, 85f2c870e3STao Ma "Refcount block #%llu has an invalid " 86f2c870e3STao Ma "rf_fs_generation of #%u", 87f2c870e3STao Ma (unsigned long long)bh->b_blocknr, 88f2c870e3STao Ma le32_to_cpu(rb->rf_fs_generation)); 89f2c870e3STao Ma return -EINVAL; 90f2c870e3STao Ma } 91f2c870e3STao Ma 92f2c870e3STao Ma return 0; 93f2c870e3STao Ma } 94f2c870e3STao Ma 95f2c870e3STao Ma static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci, 96f2c870e3STao Ma u64 rb_blkno, 97f2c870e3STao Ma struct buffer_head **bh) 98f2c870e3STao Ma { 99f2c870e3STao Ma int rc; 100f2c870e3STao Ma struct buffer_head *tmp = *bh; 101f2c870e3STao Ma 102f2c870e3STao Ma rc = ocfs2_read_block(ci, rb_blkno, &tmp, 103f2c870e3STao Ma ocfs2_validate_refcount_block); 104f2c870e3STao Ma 105f2c870e3STao Ma /* If ocfs2_read_block() got us a new bh, pass it up. */ 106f2c870e3STao Ma if (!rc && !*bh) 107f2c870e3STao Ma *bh = tmp; 108f2c870e3STao Ma 109f2c870e3STao Ma return rc; 110f2c870e3STao Ma } 111c732eb16STao Ma 112c732eb16STao Ma static u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci) 113c732eb16STao Ma { 114c732eb16STao Ma struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 115c732eb16STao Ma 116c732eb16STao Ma return rf->rf_blkno; 117c732eb16STao Ma } 118c732eb16STao Ma 119c732eb16STao Ma static struct super_block * 120c732eb16STao Ma ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci) 121c732eb16STao Ma { 122c732eb16STao Ma struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 123c732eb16STao Ma 124c732eb16STao Ma return rf->rf_sb; 125c732eb16STao Ma } 126c732eb16STao Ma 127c732eb16STao Ma static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci) 128c732eb16STao Ma { 129c732eb16STao Ma struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 130c732eb16STao Ma 131c732eb16STao Ma spin_lock(&rf->rf_lock); 132c732eb16STao Ma } 133c732eb16STao Ma 134c732eb16STao Ma static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci) 135c732eb16STao Ma { 136c732eb16STao Ma struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 137c732eb16STao Ma 138c732eb16STao Ma spin_unlock(&rf->rf_lock); 139c732eb16STao Ma } 140c732eb16STao Ma 141c732eb16STao Ma static void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci) 142c732eb16STao Ma { 143c732eb16STao Ma struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 144c732eb16STao Ma 145c732eb16STao Ma mutex_lock(&rf->rf_io_mutex); 146c732eb16STao Ma } 147c732eb16STao Ma 148c732eb16STao Ma static void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci) 149c732eb16STao Ma { 150c732eb16STao Ma struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); 151c732eb16STao Ma 152c732eb16STao Ma mutex_unlock(&rf->rf_io_mutex); 153c732eb16STao Ma } 154c732eb16STao Ma 155c732eb16STao Ma static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = { 156c732eb16STao Ma .co_owner = ocfs2_refcount_cache_owner, 157c732eb16STao Ma .co_get_super = ocfs2_refcount_cache_get_super, 158c732eb16STao Ma .co_cache_lock = ocfs2_refcount_cache_lock, 159c732eb16STao Ma .co_cache_unlock = ocfs2_refcount_cache_unlock, 160c732eb16STao Ma .co_io_lock = ocfs2_refcount_cache_io_lock, 161c732eb16STao Ma .co_io_unlock = ocfs2_refcount_cache_io_unlock, 162c732eb16STao Ma }; 163374a263eSTao Ma 164374a263eSTao Ma static struct ocfs2_refcount_tree * 165374a263eSTao Ma ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno) 166374a263eSTao Ma { 167374a263eSTao Ma struct rb_node *n = osb->osb_rf_lock_tree.rb_node; 168374a263eSTao Ma struct ocfs2_refcount_tree *tree = NULL; 169374a263eSTao Ma 170374a263eSTao Ma while (n) { 171374a263eSTao Ma tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node); 172374a263eSTao Ma 173374a263eSTao Ma if (blkno < tree->rf_blkno) 174374a263eSTao Ma n = n->rb_left; 175374a263eSTao Ma else if (blkno > tree->rf_blkno) 176374a263eSTao Ma n = n->rb_right; 177374a263eSTao Ma else 178374a263eSTao Ma return tree; 179374a263eSTao Ma } 180374a263eSTao Ma 181374a263eSTao Ma return NULL; 182374a263eSTao Ma } 183374a263eSTao Ma 184374a263eSTao Ma /* osb_lock is already locked. */ 185374a263eSTao Ma static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb, 186374a263eSTao Ma struct ocfs2_refcount_tree *new) 187374a263eSTao Ma { 188374a263eSTao Ma u64 rf_blkno = new->rf_blkno; 189374a263eSTao Ma struct rb_node *parent = NULL; 190374a263eSTao Ma struct rb_node **p = &osb->osb_rf_lock_tree.rb_node; 191374a263eSTao Ma struct ocfs2_refcount_tree *tmp; 192374a263eSTao Ma 193374a263eSTao Ma while (*p) { 194374a263eSTao Ma parent = *p; 195374a263eSTao Ma 196374a263eSTao Ma tmp = rb_entry(parent, struct ocfs2_refcount_tree, 197374a263eSTao Ma rf_node); 198374a263eSTao Ma 199374a263eSTao Ma if (rf_blkno < tmp->rf_blkno) 200374a263eSTao Ma p = &(*p)->rb_left; 201374a263eSTao Ma else if (rf_blkno > tmp->rf_blkno) 202374a263eSTao Ma p = &(*p)->rb_right; 203374a263eSTao Ma else { 204374a263eSTao Ma /* This should never happen! */ 205374a263eSTao Ma mlog(ML_ERROR, "Duplicate refcount block %llu found!\n", 206374a263eSTao Ma (unsigned long long)rf_blkno); 207374a263eSTao Ma BUG(); 208374a263eSTao Ma } 209374a263eSTao Ma } 210374a263eSTao Ma 211374a263eSTao Ma rb_link_node(&new->rf_node, parent, p); 212374a263eSTao Ma rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree); 213374a263eSTao Ma } 214374a263eSTao Ma 215374a263eSTao Ma static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree) 216374a263eSTao Ma { 217374a263eSTao Ma ocfs2_metadata_cache_exit(&tree->rf_ci); 218374a263eSTao Ma ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres); 219374a263eSTao Ma ocfs2_lock_res_free(&tree->rf_lockres); 220374a263eSTao Ma kfree(tree); 221374a263eSTao Ma } 222374a263eSTao Ma 223374a263eSTao Ma static inline void 224374a263eSTao Ma ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb, 225374a263eSTao Ma struct ocfs2_refcount_tree *tree) 226374a263eSTao Ma { 227374a263eSTao Ma rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree); 228374a263eSTao Ma if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree) 229374a263eSTao Ma osb->osb_ref_tree_lru = NULL; 230374a263eSTao Ma } 231374a263eSTao Ma 232374a263eSTao Ma static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb, 233374a263eSTao Ma struct ocfs2_refcount_tree *tree) 234374a263eSTao Ma { 235374a263eSTao Ma spin_lock(&osb->osb_lock); 236374a263eSTao Ma ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree); 237374a263eSTao Ma spin_unlock(&osb->osb_lock); 238374a263eSTao Ma } 239374a263eSTao Ma 240374a263eSTao Ma void ocfs2_kref_remove_refcount_tree(struct kref *kref) 241374a263eSTao Ma { 242374a263eSTao Ma struct ocfs2_refcount_tree *tree = 243374a263eSTao Ma container_of(kref, struct ocfs2_refcount_tree, rf_getcnt); 244374a263eSTao Ma 245374a263eSTao Ma ocfs2_free_refcount_tree(tree); 246374a263eSTao Ma } 247374a263eSTao Ma 248374a263eSTao Ma static inline void 249374a263eSTao Ma ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree) 250374a263eSTao Ma { 251374a263eSTao Ma kref_get(&tree->rf_getcnt); 252374a263eSTao Ma } 253374a263eSTao Ma 254374a263eSTao Ma static inline void 255374a263eSTao Ma ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree) 256374a263eSTao Ma { 257374a263eSTao Ma kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree); 258374a263eSTao Ma } 259374a263eSTao Ma 260374a263eSTao Ma static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new, 261374a263eSTao Ma struct super_block *sb) 262374a263eSTao Ma { 263374a263eSTao Ma ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops); 264374a263eSTao Ma mutex_init(&new->rf_io_mutex); 265374a263eSTao Ma new->rf_sb = sb; 266374a263eSTao Ma spin_lock_init(&new->rf_lock); 267374a263eSTao Ma } 268374a263eSTao Ma 269374a263eSTao Ma static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb, 270374a263eSTao Ma struct ocfs2_refcount_tree *new, 271374a263eSTao Ma u64 rf_blkno, u32 generation) 272374a263eSTao Ma { 273374a263eSTao Ma init_rwsem(&new->rf_sem); 274374a263eSTao Ma ocfs2_refcount_lock_res_init(&new->rf_lockres, osb, 275374a263eSTao Ma rf_blkno, generation); 276374a263eSTao Ma } 277374a263eSTao Ma 2788bf396deSTao Ma static struct ocfs2_refcount_tree* 2798bf396deSTao Ma ocfs2_allocate_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno) 2808bf396deSTao Ma { 2818bf396deSTao Ma struct ocfs2_refcount_tree *new; 2828bf396deSTao Ma 2838bf396deSTao Ma new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS); 2848bf396deSTao Ma if (!new) 2858bf396deSTao Ma return NULL; 2868bf396deSTao Ma 2878bf396deSTao Ma new->rf_blkno = rf_blkno; 2888bf396deSTao Ma kref_init(&new->rf_getcnt); 2898bf396deSTao Ma ocfs2_init_refcount_tree_ci(new, osb->sb); 2908bf396deSTao Ma 2918bf396deSTao Ma return new; 2928bf396deSTao Ma } 2938bf396deSTao Ma 294374a263eSTao Ma static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno, 295374a263eSTao Ma struct ocfs2_refcount_tree **ret_tree) 296374a263eSTao Ma { 297374a263eSTao Ma int ret = 0; 298374a263eSTao Ma struct ocfs2_refcount_tree *tree, *new = NULL; 299374a263eSTao Ma struct buffer_head *ref_root_bh = NULL; 300374a263eSTao Ma struct ocfs2_refcount_block *ref_rb; 301374a263eSTao Ma 302374a263eSTao Ma spin_lock(&osb->osb_lock); 303374a263eSTao Ma if (osb->osb_ref_tree_lru && 304374a263eSTao Ma osb->osb_ref_tree_lru->rf_blkno == rf_blkno) 305374a263eSTao Ma tree = osb->osb_ref_tree_lru; 306374a263eSTao Ma else 307374a263eSTao Ma tree = ocfs2_find_refcount_tree(osb, rf_blkno); 308374a263eSTao Ma if (tree) 309374a263eSTao Ma goto out; 310374a263eSTao Ma 311374a263eSTao Ma spin_unlock(&osb->osb_lock); 312374a263eSTao Ma 3138bf396deSTao Ma new = ocfs2_allocate_refcount_tree(osb, rf_blkno); 314374a263eSTao Ma if (!new) { 315374a263eSTao Ma ret = -ENOMEM; 3168bf396deSTao Ma mlog_errno(ret); 317374a263eSTao Ma return ret; 318374a263eSTao Ma } 319374a263eSTao Ma /* 320374a263eSTao Ma * We need the generation to create the refcount tree lock and since 321374a263eSTao Ma * it isn't changed during the tree modification, we are safe here to 322374a263eSTao Ma * read without protection. 323374a263eSTao Ma * We also have to purge the cache after we create the lock since the 324374a263eSTao Ma * refcount block may have the stale data. It can only be trusted when 325374a263eSTao Ma * we hold the refcount lock. 326374a263eSTao Ma */ 327374a263eSTao Ma ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh); 328374a263eSTao Ma if (ret) { 329374a263eSTao Ma mlog_errno(ret); 330374a263eSTao Ma ocfs2_metadata_cache_exit(&new->rf_ci); 331374a263eSTao Ma kfree(new); 332374a263eSTao Ma return ret; 333374a263eSTao Ma } 334374a263eSTao Ma 335374a263eSTao Ma ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 336374a263eSTao Ma new->rf_generation = le32_to_cpu(ref_rb->rf_generation); 337374a263eSTao Ma ocfs2_init_refcount_tree_lock(osb, new, rf_blkno, 338374a263eSTao Ma new->rf_generation); 339374a263eSTao Ma ocfs2_metadata_cache_purge(&new->rf_ci); 340374a263eSTao Ma 341374a263eSTao Ma spin_lock(&osb->osb_lock); 342374a263eSTao Ma tree = ocfs2_find_refcount_tree(osb, rf_blkno); 343374a263eSTao Ma if (tree) 344374a263eSTao Ma goto out; 345374a263eSTao Ma 346374a263eSTao Ma ocfs2_insert_refcount_tree(osb, new); 347374a263eSTao Ma 348374a263eSTao Ma tree = new; 349374a263eSTao Ma new = NULL; 350374a263eSTao Ma 351374a263eSTao Ma out: 352374a263eSTao Ma *ret_tree = tree; 353374a263eSTao Ma 354374a263eSTao Ma osb->osb_ref_tree_lru = tree; 355374a263eSTao Ma 356374a263eSTao Ma spin_unlock(&osb->osb_lock); 357374a263eSTao Ma 358374a263eSTao Ma if (new) 359374a263eSTao Ma ocfs2_free_refcount_tree(new); 360374a263eSTao Ma 361374a263eSTao Ma brelse(ref_root_bh); 362374a263eSTao Ma return ret; 363374a263eSTao Ma } 364374a263eSTao Ma 365374a263eSTao Ma static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno) 366374a263eSTao Ma { 367374a263eSTao Ma int ret; 368374a263eSTao Ma struct buffer_head *di_bh = NULL; 369374a263eSTao Ma struct ocfs2_dinode *di; 370374a263eSTao Ma 371374a263eSTao Ma ret = ocfs2_read_inode_block(inode, &di_bh); 372374a263eSTao Ma if (ret) { 373374a263eSTao Ma mlog_errno(ret); 374374a263eSTao Ma goto out; 375374a263eSTao Ma } 376374a263eSTao Ma 377374a263eSTao Ma BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); 378374a263eSTao Ma 379374a263eSTao Ma di = (struct ocfs2_dinode *)di_bh->b_data; 380374a263eSTao Ma *ref_blkno = le64_to_cpu(di->i_refcount_loc); 381374a263eSTao Ma brelse(di_bh); 382374a263eSTao Ma out: 383374a263eSTao Ma return ret; 384374a263eSTao Ma } 385374a263eSTao Ma 386374a263eSTao Ma static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb, 387374a263eSTao Ma struct ocfs2_refcount_tree *tree, int rw) 388374a263eSTao Ma { 389374a263eSTao Ma int ret; 390374a263eSTao Ma 391374a263eSTao Ma ret = ocfs2_refcount_lock(tree, rw); 392374a263eSTao Ma if (ret) { 393374a263eSTao Ma mlog_errno(ret); 394374a263eSTao Ma goto out; 395374a263eSTao Ma } 396374a263eSTao Ma 397374a263eSTao Ma if (rw) 398374a263eSTao Ma down_write(&tree->rf_sem); 399374a263eSTao Ma else 400374a263eSTao Ma down_read(&tree->rf_sem); 401374a263eSTao Ma 402374a263eSTao Ma out: 403374a263eSTao Ma return ret; 404374a263eSTao Ma } 405374a263eSTao Ma 406374a263eSTao Ma /* 407374a263eSTao Ma * Lock the refcount tree pointed by ref_blkno and return the tree. 408374a263eSTao Ma * In most case, we lock the tree and read the refcount block. 409374a263eSTao Ma * So read it here if the caller really needs it. 410374a263eSTao Ma * 411374a263eSTao Ma * If the tree has been re-created by other node, it will free the 412374a263eSTao Ma * old one and re-create it. 413374a263eSTao Ma */ 414374a263eSTao Ma int ocfs2_lock_refcount_tree(struct ocfs2_super *osb, 415374a263eSTao Ma u64 ref_blkno, int rw, 416374a263eSTao Ma struct ocfs2_refcount_tree **ret_tree, 417374a263eSTao Ma struct buffer_head **ref_bh) 418374a263eSTao Ma { 419374a263eSTao Ma int ret, delete_tree = 0; 420374a263eSTao Ma struct ocfs2_refcount_tree *tree = NULL; 421374a263eSTao Ma struct buffer_head *ref_root_bh = NULL; 422374a263eSTao Ma struct ocfs2_refcount_block *rb; 423374a263eSTao Ma 424374a263eSTao Ma again: 425374a263eSTao Ma ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree); 426374a263eSTao Ma if (ret) { 427374a263eSTao Ma mlog_errno(ret); 428374a263eSTao Ma return ret; 429374a263eSTao Ma } 430374a263eSTao Ma 431374a263eSTao Ma ocfs2_refcount_tree_get(tree); 432374a263eSTao Ma 433374a263eSTao Ma ret = __ocfs2_lock_refcount_tree(osb, tree, rw); 434374a263eSTao Ma if (ret) { 435374a263eSTao Ma mlog_errno(ret); 436374a263eSTao Ma ocfs2_refcount_tree_put(tree); 437374a263eSTao Ma goto out; 438374a263eSTao Ma } 439374a263eSTao Ma 440374a263eSTao Ma ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno, 441374a263eSTao Ma &ref_root_bh); 442374a263eSTao Ma if (ret) { 443374a263eSTao Ma mlog_errno(ret); 444374a263eSTao Ma ocfs2_unlock_refcount_tree(osb, tree, rw); 445374a263eSTao Ma ocfs2_refcount_tree_put(tree); 446374a263eSTao Ma goto out; 447374a263eSTao Ma } 448374a263eSTao Ma 449374a263eSTao Ma rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 450374a263eSTao Ma /* 451374a263eSTao Ma * If the refcount block has been freed and re-created, we may need 452374a263eSTao Ma * to recreate the refcount tree also. 453374a263eSTao Ma * 454374a263eSTao Ma * Here we just remove the tree from the rb-tree, and the last 455374a263eSTao Ma * kref holder will unlock and delete this refcount_tree. 456374a263eSTao Ma * Then we goto "again" and ocfs2_get_refcount_tree will create 457374a263eSTao Ma * the new refcount tree for us. 458374a263eSTao Ma */ 459374a263eSTao Ma if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) { 460374a263eSTao Ma if (!tree->rf_removed) { 461374a263eSTao Ma ocfs2_erase_refcount_tree_from_list(osb, tree); 462374a263eSTao Ma tree->rf_removed = 1; 463374a263eSTao Ma delete_tree = 1; 464374a263eSTao Ma } 465374a263eSTao Ma 466374a263eSTao Ma ocfs2_unlock_refcount_tree(osb, tree, rw); 467374a263eSTao Ma /* 468374a263eSTao Ma * We get an extra reference when we create the refcount 469374a263eSTao Ma * tree, so another put will destroy it. 470374a263eSTao Ma */ 471374a263eSTao Ma if (delete_tree) 472374a263eSTao Ma ocfs2_refcount_tree_put(tree); 473374a263eSTao Ma brelse(ref_root_bh); 474374a263eSTao Ma ref_root_bh = NULL; 475374a263eSTao Ma goto again; 476374a263eSTao Ma } 477374a263eSTao Ma 478374a263eSTao Ma *ret_tree = tree; 479374a263eSTao Ma if (ref_bh) { 480374a263eSTao Ma *ref_bh = ref_root_bh; 481374a263eSTao Ma ref_root_bh = NULL; 482374a263eSTao Ma } 483374a263eSTao Ma out: 484374a263eSTao Ma brelse(ref_root_bh); 485374a263eSTao Ma return ret; 486374a263eSTao Ma } 487374a263eSTao Ma 488374a263eSTao Ma int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw, 489374a263eSTao Ma struct ocfs2_refcount_tree **ret_tree, 490374a263eSTao Ma struct buffer_head **ref_bh) 491374a263eSTao Ma { 492374a263eSTao Ma int ret; 493374a263eSTao Ma u64 ref_blkno; 494374a263eSTao Ma 495374a263eSTao Ma ret = ocfs2_get_refcount_block(inode, &ref_blkno); 496374a263eSTao Ma if (ret) { 497374a263eSTao Ma mlog_errno(ret); 498374a263eSTao Ma return ret; 499374a263eSTao Ma } 500374a263eSTao Ma 501374a263eSTao Ma return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, 502374a263eSTao Ma rw, ret_tree, ref_bh); 503374a263eSTao Ma } 504374a263eSTao Ma 505374a263eSTao Ma void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb, 506374a263eSTao Ma struct ocfs2_refcount_tree *tree, int rw) 507374a263eSTao Ma { 508374a263eSTao Ma if (rw) 509374a263eSTao Ma up_write(&tree->rf_sem); 510374a263eSTao Ma else 511374a263eSTao Ma up_read(&tree->rf_sem); 512374a263eSTao Ma 513374a263eSTao Ma ocfs2_refcount_unlock(tree, rw); 514374a263eSTao Ma ocfs2_refcount_tree_put(tree); 515374a263eSTao Ma } 516374a263eSTao Ma 517374a263eSTao Ma void ocfs2_purge_refcount_trees(struct ocfs2_super *osb) 518374a263eSTao Ma { 519374a263eSTao Ma struct rb_node *node; 520374a263eSTao Ma struct ocfs2_refcount_tree *tree; 521374a263eSTao Ma struct rb_root *root = &osb->osb_rf_lock_tree; 522374a263eSTao Ma 523374a263eSTao Ma while ((node = rb_last(root)) != NULL) { 524374a263eSTao Ma tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node); 525374a263eSTao Ma 526374a263eSTao Ma mlog(0, "Purge tree %llu\n", 527374a263eSTao Ma (unsigned long long) tree->rf_blkno); 528374a263eSTao Ma 529374a263eSTao Ma rb_erase(&tree->rf_node, root); 530374a263eSTao Ma ocfs2_free_refcount_tree(tree); 531374a263eSTao Ma } 532374a263eSTao Ma } 5338bf396deSTao Ma 5348bf396deSTao Ma /* 5358bf396deSTao Ma * Create a refcount tree for an inode. 5368bf396deSTao Ma * We take for granted that the inode is already locked. 5378bf396deSTao Ma */ 5388bf396deSTao Ma static int ocfs2_create_refcount_tree(struct inode *inode, 5398bf396deSTao Ma struct buffer_head *di_bh) 5408bf396deSTao Ma { 5418bf396deSTao Ma int ret; 5428bf396deSTao Ma handle_t *handle = NULL; 5438bf396deSTao Ma struct ocfs2_alloc_context *meta_ac = NULL; 5448bf396deSTao Ma struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 5458bf396deSTao Ma struct ocfs2_inode_info *oi = OCFS2_I(inode); 5468bf396deSTao Ma struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5478bf396deSTao Ma struct buffer_head *new_bh = NULL; 5488bf396deSTao Ma struct ocfs2_refcount_block *rb; 5498bf396deSTao Ma struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL; 5508bf396deSTao Ma u16 suballoc_bit_start; 5518bf396deSTao Ma u32 num_got; 5528bf396deSTao Ma u64 first_blkno; 5538bf396deSTao Ma 5548bf396deSTao Ma BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); 5558bf396deSTao Ma 5568bf396deSTao Ma mlog(0, "create tree for inode %lu\n", inode->i_ino); 5578bf396deSTao Ma 5588bf396deSTao Ma ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); 5598bf396deSTao Ma if (ret) { 5608bf396deSTao Ma mlog_errno(ret); 5618bf396deSTao Ma goto out; 5628bf396deSTao Ma } 5638bf396deSTao Ma 5648bf396deSTao Ma handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_CREATE_CREDITS); 5658bf396deSTao Ma if (IS_ERR(handle)) { 5668bf396deSTao Ma ret = PTR_ERR(handle); 5678bf396deSTao Ma mlog_errno(ret); 5688bf396deSTao Ma goto out; 5698bf396deSTao Ma } 5708bf396deSTao Ma 5718bf396deSTao Ma ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 5728bf396deSTao Ma OCFS2_JOURNAL_ACCESS_WRITE); 5738bf396deSTao Ma if (ret) { 5748bf396deSTao Ma mlog_errno(ret); 5758bf396deSTao Ma goto out_commit; 5768bf396deSTao Ma } 5778bf396deSTao Ma 5788bf396deSTao Ma ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, 5798bf396deSTao Ma &suballoc_bit_start, &num_got, 5808bf396deSTao Ma &first_blkno); 5818bf396deSTao Ma if (ret) { 5828bf396deSTao Ma mlog_errno(ret); 5838bf396deSTao Ma goto out_commit; 5848bf396deSTao Ma } 5858bf396deSTao Ma 5868bf396deSTao Ma new_tree = ocfs2_allocate_refcount_tree(osb, first_blkno); 5878bf396deSTao Ma if (!new_tree) { 5888bf396deSTao Ma ret = -ENOMEM; 5898bf396deSTao Ma mlog_errno(ret); 5908bf396deSTao Ma goto out_commit; 5918bf396deSTao Ma } 5928bf396deSTao Ma 5938bf396deSTao Ma new_bh = sb_getblk(inode->i_sb, first_blkno); 5948bf396deSTao Ma ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh); 5958bf396deSTao Ma 5968bf396deSTao Ma ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh, 5978bf396deSTao Ma OCFS2_JOURNAL_ACCESS_CREATE); 5988bf396deSTao Ma if (ret) { 5998bf396deSTao Ma mlog_errno(ret); 6008bf396deSTao Ma goto out_commit; 6018bf396deSTao Ma } 6028bf396deSTao Ma 6038bf396deSTao Ma /* Initialize ocfs2_refcount_block. */ 6048bf396deSTao Ma rb = (struct ocfs2_refcount_block *)new_bh->b_data; 6058bf396deSTao Ma memset(rb, 0, inode->i_sb->s_blocksize); 6068bf396deSTao Ma strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); 6078bf396deSTao Ma rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num); 6088bf396deSTao Ma rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 6098bf396deSTao Ma rb->rf_fs_generation = cpu_to_le32(osb->fs_generation); 6108bf396deSTao Ma rb->rf_blkno = cpu_to_le64(first_blkno); 6118bf396deSTao Ma rb->rf_count = cpu_to_le32(1); 6128bf396deSTao Ma rb->rf_records.rl_count = 6138bf396deSTao Ma cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb)); 6148bf396deSTao Ma spin_lock(&osb->osb_lock); 6158bf396deSTao Ma rb->rf_generation = osb->s_next_generation++; 6168bf396deSTao Ma spin_unlock(&osb->osb_lock); 6178bf396deSTao Ma 6188bf396deSTao Ma ocfs2_journal_dirty(handle, new_bh); 6198bf396deSTao Ma 6208bf396deSTao Ma spin_lock(&oi->ip_lock); 6218bf396deSTao Ma oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL; 6228bf396deSTao Ma di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 6238bf396deSTao Ma di->i_refcount_loc = cpu_to_le64(first_blkno); 6248bf396deSTao Ma spin_unlock(&oi->ip_lock); 6258bf396deSTao Ma 6268bf396deSTao Ma mlog(0, "created tree for inode %lu, refblock %llu\n", 6278bf396deSTao Ma inode->i_ino, (unsigned long long)first_blkno); 6288bf396deSTao Ma 6298bf396deSTao Ma ocfs2_journal_dirty(handle, di_bh); 6308bf396deSTao Ma 6318bf396deSTao Ma /* 6328bf396deSTao Ma * We have to init the tree lock here since it will use 6338bf396deSTao Ma * the generation number to create it. 6348bf396deSTao Ma */ 6358bf396deSTao Ma new_tree->rf_generation = le32_to_cpu(rb->rf_generation); 6368bf396deSTao Ma ocfs2_init_refcount_tree_lock(osb, new_tree, first_blkno, 6378bf396deSTao Ma new_tree->rf_generation); 6388bf396deSTao Ma 6398bf396deSTao Ma spin_lock(&osb->osb_lock); 6408bf396deSTao Ma tree = ocfs2_find_refcount_tree(osb, first_blkno); 6418bf396deSTao Ma 6428bf396deSTao Ma /* 6438bf396deSTao Ma * We've just created a new refcount tree in this block. If 6448bf396deSTao Ma * we found a refcount tree on the ocfs2_super, it must be 6458bf396deSTao Ma * one we just deleted. We free the old tree before 6468bf396deSTao Ma * inserting the new tree. 6478bf396deSTao Ma */ 6488bf396deSTao Ma BUG_ON(tree && tree->rf_generation == new_tree->rf_generation); 6498bf396deSTao Ma if (tree) 6508bf396deSTao Ma ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree); 6518bf396deSTao Ma ocfs2_insert_refcount_tree(osb, new_tree); 6528bf396deSTao Ma spin_unlock(&osb->osb_lock); 6538bf396deSTao Ma new_tree = NULL; 6548bf396deSTao Ma if (tree) 6558bf396deSTao Ma ocfs2_refcount_tree_put(tree); 6568bf396deSTao Ma 6578bf396deSTao Ma out_commit: 6588bf396deSTao Ma ocfs2_commit_trans(osb, handle); 6598bf396deSTao Ma 6608bf396deSTao Ma out: 6618bf396deSTao Ma if (new_tree) { 6628bf396deSTao Ma ocfs2_metadata_cache_exit(&new_tree->rf_ci); 6638bf396deSTao Ma kfree(new_tree); 6648bf396deSTao Ma } 6658bf396deSTao Ma 6668bf396deSTao Ma brelse(new_bh); 6678bf396deSTao Ma if (meta_ac) 6688bf396deSTao Ma ocfs2_free_alloc_context(meta_ac); 6698bf396deSTao Ma 6708bf396deSTao Ma return ret; 6718bf396deSTao Ma } 6728bf396deSTao Ma 6738bf396deSTao Ma static int ocfs2_set_refcount_tree(struct inode *inode, 6748bf396deSTao Ma struct buffer_head *di_bh, 6758bf396deSTao Ma u64 refcount_loc) 6768bf396deSTao Ma { 6778bf396deSTao Ma int ret; 6788bf396deSTao Ma handle_t *handle = NULL; 6798bf396deSTao Ma struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 6808bf396deSTao Ma struct ocfs2_inode_info *oi = OCFS2_I(inode); 6818bf396deSTao Ma struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 6828bf396deSTao Ma struct buffer_head *ref_root_bh = NULL; 6838bf396deSTao Ma struct ocfs2_refcount_block *rb; 6848bf396deSTao Ma struct ocfs2_refcount_tree *ref_tree; 6858bf396deSTao Ma 6868bf396deSTao Ma BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); 6878bf396deSTao Ma 6888bf396deSTao Ma ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, 6898bf396deSTao Ma &ref_tree, &ref_root_bh); 6908bf396deSTao Ma if (ret) { 6918bf396deSTao Ma mlog_errno(ret); 6928bf396deSTao Ma return ret; 6938bf396deSTao Ma } 6948bf396deSTao Ma 6958bf396deSTao Ma handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_SET_CREDITS); 6968bf396deSTao Ma if (IS_ERR(handle)) { 6978bf396deSTao Ma ret = PTR_ERR(handle); 6988bf396deSTao Ma mlog_errno(ret); 6998bf396deSTao Ma goto out; 7008bf396deSTao Ma } 7018bf396deSTao Ma 7028bf396deSTao Ma ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 7038bf396deSTao Ma OCFS2_JOURNAL_ACCESS_WRITE); 7048bf396deSTao Ma if (ret) { 7058bf396deSTao Ma mlog_errno(ret); 7068bf396deSTao Ma goto out_commit; 7078bf396deSTao Ma } 7088bf396deSTao Ma 7098bf396deSTao Ma ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, ref_root_bh, 7108bf396deSTao Ma OCFS2_JOURNAL_ACCESS_WRITE); 7118bf396deSTao Ma if (ret) { 7128bf396deSTao Ma mlog_errno(ret); 7138bf396deSTao Ma goto out_commit; 7148bf396deSTao Ma } 7158bf396deSTao Ma 7168bf396deSTao Ma rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 7178bf396deSTao Ma le32_add_cpu(&rb->rf_count, 1); 7188bf396deSTao Ma 7198bf396deSTao Ma ocfs2_journal_dirty(handle, ref_root_bh); 7208bf396deSTao Ma 7218bf396deSTao Ma spin_lock(&oi->ip_lock); 7228bf396deSTao Ma oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL; 7238bf396deSTao Ma di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 7248bf396deSTao Ma di->i_refcount_loc = cpu_to_le64(refcount_loc); 7258bf396deSTao Ma spin_unlock(&oi->ip_lock); 7268bf396deSTao Ma ocfs2_journal_dirty(handle, di_bh); 7278bf396deSTao Ma 7288bf396deSTao Ma out_commit: 7298bf396deSTao Ma ocfs2_commit_trans(osb, handle); 7308bf396deSTao Ma out: 7318bf396deSTao Ma ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 7328bf396deSTao Ma brelse(ref_root_bh); 7338bf396deSTao Ma 7348bf396deSTao Ma return ret; 7358bf396deSTao Ma } 7368bf396deSTao Ma 7378bf396deSTao Ma int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh) 7388bf396deSTao Ma { 7398bf396deSTao Ma int ret, delete_tree = 0; 7408bf396deSTao Ma handle_t *handle = NULL; 7418bf396deSTao Ma struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 7428bf396deSTao Ma struct ocfs2_inode_info *oi = OCFS2_I(inode); 7438bf396deSTao Ma struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7448bf396deSTao Ma struct ocfs2_refcount_block *rb; 7458bf396deSTao Ma struct inode *alloc_inode = NULL; 7468bf396deSTao Ma struct buffer_head *alloc_bh = NULL; 7478bf396deSTao Ma struct buffer_head *blk_bh = NULL; 7488bf396deSTao Ma struct ocfs2_refcount_tree *ref_tree; 7498bf396deSTao Ma int credits = OCFS2_REFCOUNT_TREE_REMOVE_CREDITS; 7508bf396deSTao Ma u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc); 7518bf396deSTao Ma u16 bit = 0; 7528bf396deSTao Ma 7538bf396deSTao Ma if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) 7548bf396deSTao Ma return 0; 7558bf396deSTao Ma 7568bf396deSTao Ma BUG_ON(!ref_blkno); 7578bf396deSTao Ma ret = ocfs2_lock_refcount_tree(osb, ref_blkno, 1, &ref_tree, &blk_bh); 7588bf396deSTao Ma if (ret) { 7598bf396deSTao Ma mlog_errno(ret); 7608bf396deSTao Ma return ret; 7618bf396deSTao Ma } 7628bf396deSTao Ma 7638bf396deSTao Ma rb = (struct ocfs2_refcount_block *)blk_bh->b_data; 7648bf396deSTao Ma 7658bf396deSTao Ma /* 7668bf396deSTao Ma * If we are the last user, we need to free the block. 7678bf396deSTao Ma * So lock the allocator ahead. 7688bf396deSTao Ma */ 7698bf396deSTao Ma if (le32_to_cpu(rb->rf_count) == 1) { 7708bf396deSTao Ma blk = le64_to_cpu(rb->rf_blkno); 7718bf396deSTao Ma bit = le16_to_cpu(rb->rf_suballoc_bit); 7728bf396deSTao Ma bg_blkno = ocfs2_which_suballoc_group(blk, bit); 7738bf396deSTao Ma 7748bf396deSTao Ma alloc_inode = ocfs2_get_system_file_inode(osb, 7758bf396deSTao Ma EXTENT_ALLOC_SYSTEM_INODE, 7768bf396deSTao Ma le16_to_cpu(rb->rf_suballoc_slot)); 7778bf396deSTao Ma if (!alloc_inode) { 7788bf396deSTao Ma ret = -ENOMEM; 7798bf396deSTao Ma mlog_errno(ret); 7808bf396deSTao Ma goto out; 7818bf396deSTao Ma } 7828bf396deSTao Ma mutex_lock(&alloc_inode->i_mutex); 7838bf396deSTao Ma 7848bf396deSTao Ma ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1); 7858bf396deSTao Ma if (ret) { 7868bf396deSTao Ma mlog_errno(ret); 7878bf396deSTao Ma goto out_mutex; 7888bf396deSTao Ma } 7898bf396deSTao Ma 7908bf396deSTao Ma credits += OCFS2_SUBALLOC_FREE; 7918bf396deSTao Ma } 7928bf396deSTao Ma 7938bf396deSTao Ma handle = ocfs2_start_trans(osb, credits); 7948bf396deSTao Ma if (IS_ERR(handle)) { 7958bf396deSTao Ma ret = PTR_ERR(handle); 7968bf396deSTao Ma mlog_errno(ret); 7978bf396deSTao Ma goto out_unlock; 7988bf396deSTao Ma } 7998bf396deSTao Ma 8008bf396deSTao Ma ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 8018bf396deSTao Ma OCFS2_JOURNAL_ACCESS_WRITE); 8028bf396deSTao Ma if (ret) { 8038bf396deSTao Ma mlog_errno(ret); 8048bf396deSTao Ma goto out_commit; 8058bf396deSTao Ma } 8068bf396deSTao Ma 8078bf396deSTao Ma ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, blk_bh, 8088bf396deSTao Ma OCFS2_JOURNAL_ACCESS_WRITE); 8098bf396deSTao Ma if (ret) { 8108bf396deSTao Ma mlog_errno(ret); 8118bf396deSTao Ma goto out_commit; 8128bf396deSTao Ma } 8138bf396deSTao Ma 8148bf396deSTao Ma spin_lock(&oi->ip_lock); 8158bf396deSTao Ma oi->ip_dyn_features &= ~OCFS2_HAS_REFCOUNT_FL; 8168bf396deSTao Ma di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 8178bf396deSTao Ma di->i_refcount_loc = 0; 8188bf396deSTao Ma spin_unlock(&oi->ip_lock); 8198bf396deSTao Ma ocfs2_journal_dirty(handle, di_bh); 8208bf396deSTao Ma 8218bf396deSTao Ma le32_add_cpu(&rb->rf_count , -1); 8228bf396deSTao Ma ocfs2_journal_dirty(handle, blk_bh); 8238bf396deSTao Ma 8248bf396deSTao Ma if (!rb->rf_count) { 8258bf396deSTao Ma delete_tree = 1; 8268bf396deSTao Ma ocfs2_erase_refcount_tree_from_list(osb, ref_tree); 8278bf396deSTao Ma ret = ocfs2_free_suballoc_bits(handle, alloc_inode, 8288bf396deSTao Ma alloc_bh, bit, bg_blkno, 1); 8298bf396deSTao Ma if (ret) 8308bf396deSTao Ma mlog_errno(ret); 8318bf396deSTao Ma } 8328bf396deSTao Ma 8338bf396deSTao Ma out_commit: 8348bf396deSTao Ma ocfs2_commit_trans(osb, handle); 8358bf396deSTao Ma out_unlock: 8368bf396deSTao Ma if (alloc_inode) { 8378bf396deSTao Ma ocfs2_inode_unlock(alloc_inode, 1); 8388bf396deSTao Ma brelse(alloc_bh); 8398bf396deSTao Ma } 8408bf396deSTao Ma out_mutex: 8418bf396deSTao Ma if (alloc_inode) { 8428bf396deSTao Ma mutex_unlock(&alloc_inode->i_mutex); 8438bf396deSTao Ma iput(alloc_inode); 8448bf396deSTao Ma } 8458bf396deSTao Ma out: 8468bf396deSTao Ma ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 8478bf396deSTao Ma if (delete_tree) 8488bf396deSTao Ma ocfs2_refcount_tree_put(ref_tree); 8498bf396deSTao Ma brelse(blk_bh); 8508bf396deSTao Ma 8518bf396deSTao Ma return ret; 8528bf396deSTao Ma } 853e73a819dSTao Ma 854e73a819dSTao Ma static void ocfs2_find_refcount_rec_in_rl(struct ocfs2_caching_info *ci, 855e73a819dSTao Ma struct buffer_head *ref_leaf_bh, 856e73a819dSTao Ma u64 cpos, unsigned int len, 857e73a819dSTao Ma struct ocfs2_refcount_rec *ret_rec, 858e73a819dSTao Ma int *index) 859e73a819dSTao Ma { 860e73a819dSTao Ma int i = 0; 861e73a819dSTao Ma struct ocfs2_refcount_block *rb = 862e73a819dSTao Ma (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 863e73a819dSTao Ma struct ocfs2_refcount_rec *rec = NULL; 864e73a819dSTao Ma 865e73a819dSTao Ma for (; i < le16_to_cpu(rb->rf_records.rl_used); i++) { 866e73a819dSTao Ma rec = &rb->rf_records.rl_recs[i]; 867e73a819dSTao Ma 868e73a819dSTao Ma if (le64_to_cpu(rec->r_cpos) + 869e73a819dSTao Ma le32_to_cpu(rec->r_clusters) <= cpos) 870e73a819dSTao Ma continue; 871e73a819dSTao Ma else if (le64_to_cpu(rec->r_cpos) > cpos) 872e73a819dSTao Ma break; 873e73a819dSTao Ma 874e73a819dSTao Ma /* ok, cpos fail in this rec. Just return. */ 875e73a819dSTao Ma if (ret_rec) 876e73a819dSTao Ma *ret_rec = *rec; 877e73a819dSTao Ma goto out; 878e73a819dSTao Ma } 879e73a819dSTao Ma 880e73a819dSTao Ma if (ret_rec) { 881e73a819dSTao Ma /* We meet with a hole here, so fake the rec. */ 882e73a819dSTao Ma ret_rec->r_cpos = cpu_to_le64(cpos); 883e73a819dSTao Ma ret_rec->r_refcount = 0; 884e73a819dSTao Ma if (i < le16_to_cpu(rb->rf_records.rl_used) && 885e73a819dSTao Ma le64_to_cpu(rec->r_cpos) < cpos + len) 886e73a819dSTao Ma ret_rec->r_clusters = 887e73a819dSTao Ma cpu_to_le32(le64_to_cpu(rec->r_cpos) - cpos); 888e73a819dSTao Ma else 889e73a819dSTao Ma ret_rec->r_clusters = cpu_to_le32(len); 890e73a819dSTao Ma } 891e73a819dSTao Ma 892e73a819dSTao Ma out: 893e73a819dSTao Ma *index = i; 894e73a819dSTao Ma } 895e73a819dSTao Ma 896e73a819dSTao Ma /* 897e73a819dSTao Ma * Given a cpos and len, try to find the refcount record which contains cpos. 898e73a819dSTao Ma * 1. If cpos can be found in one refcount record, return the record. 899e73a819dSTao Ma * 2. If cpos can't be found, return a fake record which start from cpos 900e73a819dSTao Ma * and end at a small value between cpos+len and start of the next record. 901e73a819dSTao Ma * This fake record has r_refcount = 0. 902e73a819dSTao Ma */ 903e73a819dSTao Ma static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci, 904e73a819dSTao Ma struct buffer_head *ref_root_bh, 905e73a819dSTao Ma u64 cpos, unsigned int len, 906e73a819dSTao Ma struct ocfs2_refcount_rec *ret_rec, 907e73a819dSTao Ma int *index, 908e73a819dSTao Ma struct buffer_head **ret_bh) 909e73a819dSTao Ma { 910e73a819dSTao Ma int ret = 0, i, found; 911e73a819dSTao Ma u32 low_cpos; 912e73a819dSTao Ma struct ocfs2_extent_list *el; 913e73a819dSTao Ma struct ocfs2_extent_rec *tmp, *rec = NULL; 914e73a819dSTao Ma struct ocfs2_extent_block *eb; 915e73a819dSTao Ma struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; 916e73a819dSTao Ma struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 917e73a819dSTao Ma struct ocfs2_refcount_block *rb = 918e73a819dSTao Ma (struct ocfs2_refcount_block *)ref_root_bh->b_data; 919e73a819dSTao Ma 920e73a819dSTao Ma if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) { 921e73a819dSTao Ma ocfs2_find_refcount_rec_in_rl(ci, ref_root_bh, cpos, len, 922e73a819dSTao Ma ret_rec, index); 923e73a819dSTao Ma *ret_bh = ref_root_bh; 924e73a819dSTao Ma get_bh(ref_root_bh); 925e73a819dSTao Ma return 0; 926e73a819dSTao Ma } 927e73a819dSTao Ma 928e73a819dSTao Ma el = &rb->rf_list; 929e73a819dSTao Ma low_cpos = cpos & OCFS2_32BIT_POS_MASK; 930e73a819dSTao Ma 931e73a819dSTao Ma if (el->l_tree_depth) { 932e73a819dSTao Ma ret = ocfs2_find_leaf(ci, el, low_cpos, &eb_bh); 933e73a819dSTao Ma if (ret) { 934e73a819dSTao Ma mlog_errno(ret); 935e73a819dSTao Ma goto out; 936e73a819dSTao Ma } 937e73a819dSTao Ma 938e73a819dSTao Ma eb = (struct ocfs2_extent_block *) eb_bh->b_data; 939e73a819dSTao Ma el = &eb->h_list; 940e73a819dSTao Ma 941e73a819dSTao Ma if (el->l_tree_depth) { 942e73a819dSTao Ma ocfs2_error(sb, 943e73a819dSTao Ma "refcount tree %llu has non zero tree " 944e73a819dSTao Ma "depth in leaf btree tree block %llu\n", 945e73a819dSTao Ma (unsigned long long)ocfs2_metadata_cache_owner(ci), 946e73a819dSTao Ma (unsigned long long)eb_bh->b_blocknr); 947e73a819dSTao Ma ret = -EROFS; 948e73a819dSTao Ma goto out; 949e73a819dSTao Ma } 950e73a819dSTao Ma } 951e73a819dSTao Ma 952e73a819dSTao Ma found = 0; 953e73a819dSTao Ma for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { 954e73a819dSTao Ma rec = &el->l_recs[i]; 955e73a819dSTao Ma 956e73a819dSTao Ma if (le32_to_cpu(rec->e_cpos) <= low_cpos) { 957e73a819dSTao Ma found = 1; 958e73a819dSTao Ma break; 959e73a819dSTao Ma } 960e73a819dSTao Ma } 961e73a819dSTao Ma 962e73a819dSTao Ma /* adjust len when we have ocfs2_extent_rec after it. */ 963e73a819dSTao Ma if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) { 964e73a819dSTao Ma tmp = &el->l_recs[i+1]; 965e73a819dSTao Ma 966e73a819dSTao Ma if (le32_to_cpu(tmp->e_cpos) < cpos + len) 967e73a819dSTao Ma len = le32_to_cpu(tmp->e_cpos) - cpos; 968e73a819dSTao Ma } 969e73a819dSTao Ma 970e73a819dSTao Ma ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno), 971e73a819dSTao Ma &ref_leaf_bh); 972e73a819dSTao Ma if (ret) { 973e73a819dSTao Ma mlog_errno(ret); 974e73a819dSTao Ma goto out; 975e73a819dSTao Ma } 976e73a819dSTao Ma 977e73a819dSTao Ma ocfs2_find_refcount_rec_in_rl(ci, ref_leaf_bh, cpos, len, 978e73a819dSTao Ma ret_rec, index); 979e73a819dSTao Ma *ret_bh = ref_leaf_bh; 980e73a819dSTao Ma out: 981e73a819dSTao Ma brelse(eb_bh); 982e73a819dSTao Ma return ret; 983e73a819dSTao Ma } 984e73a819dSTao Ma 985e73a819dSTao Ma enum ocfs2_ref_rec_contig { 986e73a819dSTao Ma REF_CONTIG_NONE = 0, 987e73a819dSTao Ma REF_CONTIG_LEFT, 988e73a819dSTao Ma REF_CONTIG_RIGHT, 989e73a819dSTao Ma REF_CONTIG_LEFTRIGHT, 990e73a819dSTao Ma }; 991e73a819dSTao Ma 992e73a819dSTao Ma static enum ocfs2_ref_rec_contig 993e73a819dSTao Ma ocfs2_refcount_rec_adjacent(struct ocfs2_refcount_block *rb, 994e73a819dSTao Ma int index) 995e73a819dSTao Ma { 996e73a819dSTao Ma if ((rb->rf_records.rl_recs[index].r_refcount == 997e73a819dSTao Ma rb->rf_records.rl_recs[index + 1].r_refcount) && 998e73a819dSTao Ma (le64_to_cpu(rb->rf_records.rl_recs[index].r_cpos) + 999e73a819dSTao Ma le32_to_cpu(rb->rf_records.rl_recs[index].r_clusters) == 1000e73a819dSTao Ma le64_to_cpu(rb->rf_records.rl_recs[index + 1].r_cpos))) 1001e73a819dSTao Ma return REF_CONTIG_RIGHT; 1002e73a819dSTao Ma 1003e73a819dSTao Ma return REF_CONTIG_NONE; 1004e73a819dSTao Ma } 1005e73a819dSTao Ma 1006e73a819dSTao Ma static enum ocfs2_ref_rec_contig 1007e73a819dSTao Ma ocfs2_refcount_rec_contig(struct ocfs2_refcount_block *rb, 1008e73a819dSTao Ma int index) 1009e73a819dSTao Ma { 1010e73a819dSTao Ma enum ocfs2_ref_rec_contig ret = REF_CONTIG_NONE; 1011e73a819dSTao Ma 1012e73a819dSTao Ma if (index < le16_to_cpu(rb->rf_records.rl_used) - 1) 1013e73a819dSTao Ma ret = ocfs2_refcount_rec_adjacent(rb, index); 1014e73a819dSTao Ma 1015e73a819dSTao Ma if (index > 0) { 1016e73a819dSTao Ma enum ocfs2_ref_rec_contig tmp; 1017e73a819dSTao Ma 1018e73a819dSTao Ma tmp = ocfs2_refcount_rec_adjacent(rb, index - 1); 1019e73a819dSTao Ma 1020e73a819dSTao Ma if (tmp == REF_CONTIG_RIGHT) { 1021e73a819dSTao Ma if (ret == REF_CONTIG_RIGHT) 1022e73a819dSTao Ma ret = REF_CONTIG_LEFTRIGHT; 1023e73a819dSTao Ma else 1024e73a819dSTao Ma ret = REF_CONTIG_LEFT; 1025e73a819dSTao Ma } 1026e73a819dSTao Ma } 1027e73a819dSTao Ma 1028e73a819dSTao Ma return ret; 1029e73a819dSTao Ma } 1030e73a819dSTao Ma 1031e73a819dSTao Ma static void ocfs2_rotate_refcount_rec_left(struct ocfs2_refcount_block *rb, 1032e73a819dSTao Ma int index) 1033e73a819dSTao Ma { 1034e73a819dSTao Ma BUG_ON(rb->rf_records.rl_recs[index].r_refcount != 1035e73a819dSTao Ma rb->rf_records.rl_recs[index+1].r_refcount); 1036e73a819dSTao Ma 1037e73a819dSTao Ma le32_add_cpu(&rb->rf_records.rl_recs[index].r_clusters, 1038e73a819dSTao Ma le32_to_cpu(rb->rf_records.rl_recs[index+1].r_clusters)); 1039e73a819dSTao Ma 1040e73a819dSTao Ma if (index < le16_to_cpu(rb->rf_records.rl_used) - 2) 1041e73a819dSTao Ma memmove(&rb->rf_records.rl_recs[index + 1], 1042e73a819dSTao Ma &rb->rf_records.rl_recs[index + 2], 1043e73a819dSTao Ma sizeof(struct ocfs2_refcount_rec) * 1044e73a819dSTao Ma (le16_to_cpu(rb->rf_records.rl_used) - index - 2)); 1045e73a819dSTao Ma 1046e73a819dSTao Ma memset(&rb->rf_records.rl_recs[le16_to_cpu(rb->rf_records.rl_used) - 1], 1047e73a819dSTao Ma 0, sizeof(struct ocfs2_refcount_rec)); 1048e73a819dSTao Ma le16_add_cpu(&rb->rf_records.rl_used, -1); 1049e73a819dSTao Ma } 1050e73a819dSTao Ma 1051e73a819dSTao Ma /* 1052e73a819dSTao Ma * Merge the refcount rec if we are contiguous with the adjacent recs. 1053e73a819dSTao Ma */ 1054e73a819dSTao Ma static void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb, 1055e73a819dSTao Ma int index) 1056e73a819dSTao Ma { 1057e73a819dSTao Ma enum ocfs2_ref_rec_contig contig = 1058e73a819dSTao Ma ocfs2_refcount_rec_contig(rb, index); 1059e73a819dSTao Ma 1060e73a819dSTao Ma if (contig == REF_CONTIG_NONE) 1061e73a819dSTao Ma return; 1062e73a819dSTao Ma 1063e73a819dSTao Ma if (contig == REF_CONTIG_LEFT || contig == REF_CONTIG_LEFTRIGHT) { 1064e73a819dSTao Ma BUG_ON(index == 0); 1065e73a819dSTao Ma index--; 1066e73a819dSTao Ma } 1067e73a819dSTao Ma 1068e73a819dSTao Ma ocfs2_rotate_refcount_rec_left(rb, index); 1069e73a819dSTao Ma 1070e73a819dSTao Ma if (contig == REF_CONTIG_LEFTRIGHT) 1071e73a819dSTao Ma ocfs2_rotate_refcount_rec_left(rb, index); 1072e73a819dSTao Ma } 1073e73a819dSTao Ma 10741823cb0bSTao Ma /* 10751823cb0bSTao Ma * Change the refcount indexed by "index" in ref_bh. 10761823cb0bSTao Ma * If refcount reaches 0, remove it. 10771823cb0bSTao Ma */ 1078e73a819dSTao Ma static int ocfs2_change_refcount_rec(handle_t *handle, 1079e73a819dSTao Ma struct ocfs2_caching_info *ci, 1080e73a819dSTao Ma struct buffer_head *ref_leaf_bh, 1081e73a819dSTao Ma int index, int change) 1082e73a819dSTao Ma { 1083e73a819dSTao Ma int ret; 1084e73a819dSTao Ma struct ocfs2_refcount_block *rb = 1085e73a819dSTao Ma (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 10861823cb0bSTao Ma struct ocfs2_refcount_list *rl = &rb->rf_records; 10871823cb0bSTao Ma struct ocfs2_refcount_rec *rec = &rl->rl_recs[index]; 1088e73a819dSTao Ma 1089e73a819dSTao Ma ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, 1090e73a819dSTao Ma OCFS2_JOURNAL_ACCESS_WRITE); 1091e73a819dSTao Ma if (ret) { 1092e73a819dSTao Ma mlog_errno(ret); 1093e73a819dSTao Ma goto out; 1094e73a819dSTao Ma } 1095e73a819dSTao Ma 1096e73a819dSTao Ma mlog(0, "change index %d, old count %u, change %d\n", index, 1097e73a819dSTao Ma le32_to_cpu(rec->r_refcount), change); 1098e73a819dSTao Ma le32_add_cpu(&rec->r_refcount, change); 1099e73a819dSTao Ma 11001823cb0bSTao Ma if (!rec->r_refcount) { 11011823cb0bSTao Ma if (index != le16_to_cpu(rl->rl_used) - 1) { 11021823cb0bSTao Ma memmove(rec, rec + 1, 11031823cb0bSTao Ma (le16_to_cpu(rl->rl_used) - index - 1) * 11041823cb0bSTao Ma sizeof(struct ocfs2_refcount_rec)); 11051823cb0bSTao Ma memset(&rl->rl_recs[le16_to_cpu(rl->rl_used) - 1], 11061823cb0bSTao Ma 0, sizeof(struct ocfs2_refcount_rec)); 11071823cb0bSTao Ma } 11081823cb0bSTao Ma 11091823cb0bSTao Ma le16_add_cpu(&rl->rl_used, -1); 11101823cb0bSTao Ma } else 1111e73a819dSTao Ma ocfs2_refcount_rec_merge(rb, index); 1112e73a819dSTao Ma 1113e73a819dSTao Ma ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1114e73a819dSTao Ma if (ret) 1115e73a819dSTao Ma mlog_errno(ret); 1116e73a819dSTao Ma out: 1117e73a819dSTao Ma return ret; 1118e73a819dSTao Ma } 1119e73a819dSTao Ma 1120e73a819dSTao Ma static int ocfs2_expand_inline_ref_root(handle_t *handle, 1121e73a819dSTao Ma struct ocfs2_caching_info *ci, 1122e73a819dSTao Ma struct buffer_head *ref_root_bh, 1123e73a819dSTao Ma struct buffer_head **ref_leaf_bh, 1124e73a819dSTao Ma struct ocfs2_alloc_context *meta_ac) 1125e73a819dSTao Ma { 1126e73a819dSTao Ma int ret; 1127e73a819dSTao Ma u16 suballoc_bit_start; 1128e73a819dSTao Ma u32 num_got; 1129e73a819dSTao Ma u64 blkno; 1130e73a819dSTao Ma struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1131e73a819dSTao Ma struct buffer_head *new_bh = NULL; 1132e73a819dSTao Ma struct ocfs2_refcount_block *new_rb; 1133e73a819dSTao Ma struct ocfs2_refcount_block *root_rb = 1134e73a819dSTao Ma (struct ocfs2_refcount_block *)ref_root_bh->b_data; 1135e73a819dSTao Ma 1136e73a819dSTao Ma ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh, 1137e73a819dSTao Ma OCFS2_JOURNAL_ACCESS_WRITE); 1138e73a819dSTao Ma if (ret) { 1139e73a819dSTao Ma mlog_errno(ret); 1140e73a819dSTao Ma goto out; 1141e73a819dSTao Ma } 1142e73a819dSTao Ma 1143e73a819dSTao Ma ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, 1144e73a819dSTao Ma &suballoc_bit_start, &num_got, 1145e73a819dSTao Ma &blkno); 1146e73a819dSTao Ma if (ret) { 1147e73a819dSTao Ma mlog_errno(ret); 1148e73a819dSTao Ma goto out; 1149e73a819dSTao Ma } 1150e73a819dSTao Ma 1151e73a819dSTao Ma new_bh = sb_getblk(sb, blkno); 1152e73a819dSTao Ma if (new_bh == NULL) { 1153e73a819dSTao Ma ret = -EIO; 1154e73a819dSTao Ma mlog_errno(ret); 1155e73a819dSTao Ma goto out; 1156e73a819dSTao Ma } 1157e73a819dSTao Ma ocfs2_set_new_buffer_uptodate(ci, new_bh); 1158e73a819dSTao Ma 1159e73a819dSTao Ma ret = ocfs2_journal_access_rb(handle, ci, new_bh, 1160e73a819dSTao Ma OCFS2_JOURNAL_ACCESS_CREATE); 1161e73a819dSTao Ma if (ret) { 1162e73a819dSTao Ma mlog_errno(ret); 1163e73a819dSTao Ma goto out; 1164e73a819dSTao Ma } 1165e73a819dSTao Ma 1166e73a819dSTao Ma /* 1167e73a819dSTao Ma * Initialize ocfs2_refcount_block. 1168e73a819dSTao Ma * It should contain the same information as the old root. 1169e73a819dSTao Ma * so just memcpy it and change the corresponding field. 1170e73a819dSTao Ma */ 1171e73a819dSTao Ma memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize); 1172e73a819dSTao Ma 1173e73a819dSTao Ma new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; 1174e73a819dSTao Ma new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); 1175e73a819dSTao Ma new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1176e73a819dSTao Ma new_rb->rf_blkno = cpu_to_le64(blkno); 1177e73a819dSTao Ma new_rb->rf_cpos = cpu_to_le32(0); 1178e73a819dSTao Ma new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr); 1179e73a819dSTao Ma new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL); 1180e73a819dSTao Ma ocfs2_journal_dirty(handle, new_bh); 1181e73a819dSTao Ma 1182e73a819dSTao Ma /* Now change the root. */ 1183e73a819dSTao Ma memset(&root_rb->rf_list, 0, sb->s_blocksize - 1184e73a819dSTao Ma offsetof(struct ocfs2_refcount_block, rf_list)); 1185e73a819dSTao Ma root_rb->rf_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_rb(sb)); 1186e73a819dSTao Ma root_rb->rf_clusters = cpu_to_le32(1); 1187e73a819dSTao Ma root_rb->rf_list.l_next_free_rec = cpu_to_le16(1); 1188e73a819dSTao Ma root_rb->rf_list.l_recs[0].e_blkno = cpu_to_le64(blkno); 1189e73a819dSTao Ma root_rb->rf_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); 1190e73a819dSTao Ma root_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_TREE_FL); 1191e73a819dSTao Ma 1192e73a819dSTao Ma ocfs2_journal_dirty(handle, ref_root_bh); 1193e73a819dSTao Ma 1194e73a819dSTao Ma mlog(0, "new leaf block %llu, used %u\n", (unsigned long long)blkno, 1195e73a819dSTao Ma le16_to_cpu(new_rb->rf_records.rl_used)); 1196e73a819dSTao Ma 1197e73a819dSTao Ma *ref_leaf_bh = new_bh; 1198e73a819dSTao Ma new_bh = NULL; 1199e73a819dSTao Ma out: 1200e73a819dSTao Ma brelse(new_bh); 1201e73a819dSTao Ma return ret; 1202e73a819dSTao Ma } 1203e73a819dSTao Ma 1204e73a819dSTao Ma static int ocfs2_refcount_rec_no_intersect(struct ocfs2_refcount_rec *prev, 1205e73a819dSTao Ma struct ocfs2_refcount_rec *next) 1206e73a819dSTao Ma { 1207e73a819dSTao Ma if (ocfs2_get_ref_rec_low_cpos(prev) + le32_to_cpu(prev->r_clusters) <= 1208e73a819dSTao Ma ocfs2_get_ref_rec_low_cpos(next)) 1209e73a819dSTao Ma return 1; 1210e73a819dSTao Ma 1211e73a819dSTao Ma return 0; 1212e73a819dSTao Ma } 1213e73a819dSTao Ma 1214e73a819dSTao Ma static int cmp_refcount_rec_by_low_cpos(const void *a, const void *b) 1215e73a819dSTao Ma { 1216e73a819dSTao Ma const struct ocfs2_refcount_rec *l = a, *r = b; 1217e73a819dSTao Ma u32 l_cpos = ocfs2_get_ref_rec_low_cpos(l); 1218e73a819dSTao Ma u32 r_cpos = ocfs2_get_ref_rec_low_cpos(r); 1219e73a819dSTao Ma 1220e73a819dSTao Ma if (l_cpos > r_cpos) 1221e73a819dSTao Ma return 1; 1222e73a819dSTao Ma if (l_cpos < r_cpos) 1223e73a819dSTao Ma return -1; 1224e73a819dSTao Ma return 0; 1225e73a819dSTao Ma } 1226e73a819dSTao Ma 1227e73a819dSTao Ma static int cmp_refcount_rec_by_cpos(const void *a, const void *b) 1228e73a819dSTao Ma { 1229e73a819dSTao Ma const struct ocfs2_refcount_rec *l = a, *r = b; 1230e73a819dSTao Ma u64 l_cpos = le64_to_cpu(l->r_cpos); 1231e73a819dSTao Ma u64 r_cpos = le64_to_cpu(r->r_cpos); 1232e73a819dSTao Ma 1233e73a819dSTao Ma if (l_cpos > r_cpos) 1234e73a819dSTao Ma return 1; 1235e73a819dSTao Ma if (l_cpos < r_cpos) 1236e73a819dSTao Ma return -1; 1237e73a819dSTao Ma return 0; 1238e73a819dSTao Ma } 1239e73a819dSTao Ma 1240e73a819dSTao Ma static void swap_refcount_rec(void *a, void *b, int size) 1241e73a819dSTao Ma { 1242e73a819dSTao Ma struct ocfs2_refcount_rec *l = a, *r = b, tmp; 1243e73a819dSTao Ma 1244e73a819dSTao Ma tmp = *(struct ocfs2_refcount_rec *)l; 1245e73a819dSTao Ma *(struct ocfs2_refcount_rec *)l = 1246e73a819dSTao Ma *(struct ocfs2_refcount_rec *)r; 1247e73a819dSTao Ma *(struct ocfs2_refcount_rec *)r = tmp; 1248e73a819dSTao Ma } 1249e73a819dSTao Ma 1250e73a819dSTao Ma /* 1251e73a819dSTao Ma * The refcount cpos are ordered by their 64bit cpos, 1252e73a819dSTao Ma * But we will use the low 32 bit to be the e_cpos in the b-tree. 1253e73a819dSTao Ma * So we need to make sure that this pos isn't intersected with others. 1254e73a819dSTao Ma * 1255e73a819dSTao Ma * Note: The refcount block is already sorted by their low 32 bit cpos, 1256e73a819dSTao Ma * So just try the middle pos first, and we will exit when we find 1257e73a819dSTao Ma * the good position. 1258e73a819dSTao Ma */ 1259e73a819dSTao Ma static int ocfs2_find_refcount_split_pos(struct ocfs2_refcount_list *rl, 1260e73a819dSTao Ma u32 *split_pos, int *split_index) 1261e73a819dSTao Ma { 1262e73a819dSTao Ma int num_used = le16_to_cpu(rl->rl_used); 1263e73a819dSTao Ma int delta, middle = num_used / 2; 1264e73a819dSTao Ma 1265e73a819dSTao Ma for (delta = 0; delta < middle; delta++) { 1266e73a819dSTao Ma /* Let's check delta earlier than middle */ 1267e73a819dSTao Ma if (ocfs2_refcount_rec_no_intersect( 1268e73a819dSTao Ma &rl->rl_recs[middle - delta - 1], 1269e73a819dSTao Ma &rl->rl_recs[middle - delta])) { 1270e73a819dSTao Ma *split_index = middle - delta; 1271e73a819dSTao Ma break; 1272e73a819dSTao Ma } 1273e73a819dSTao Ma 1274e73a819dSTao Ma /* For even counts, don't walk off the end */ 1275e73a819dSTao Ma if ((middle + delta + 1) == num_used) 1276e73a819dSTao Ma continue; 1277e73a819dSTao Ma 1278e73a819dSTao Ma /* Now try delta past middle */ 1279e73a819dSTao Ma if (ocfs2_refcount_rec_no_intersect( 1280e73a819dSTao Ma &rl->rl_recs[middle + delta], 1281e73a819dSTao Ma &rl->rl_recs[middle + delta + 1])) { 1282e73a819dSTao Ma *split_index = middle + delta + 1; 1283e73a819dSTao Ma break; 1284e73a819dSTao Ma } 1285e73a819dSTao Ma } 1286e73a819dSTao Ma 1287e73a819dSTao Ma if (delta >= middle) 1288e73a819dSTao Ma return -ENOSPC; 1289e73a819dSTao Ma 1290e73a819dSTao Ma *split_pos = ocfs2_get_ref_rec_low_cpos(&rl->rl_recs[*split_index]); 1291e73a819dSTao Ma return 0; 1292e73a819dSTao Ma } 1293e73a819dSTao Ma 1294e73a819dSTao Ma static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh, 1295e73a819dSTao Ma struct buffer_head *new_bh, 1296e73a819dSTao Ma u32 *split_cpos) 1297e73a819dSTao Ma { 1298e73a819dSTao Ma int split_index = 0, num_moved, ret; 1299e73a819dSTao Ma u32 cpos = 0; 1300e73a819dSTao Ma struct ocfs2_refcount_block *rb = 1301e73a819dSTao Ma (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1302e73a819dSTao Ma struct ocfs2_refcount_list *rl = &rb->rf_records; 1303e73a819dSTao Ma struct ocfs2_refcount_block *new_rb = 1304e73a819dSTao Ma (struct ocfs2_refcount_block *)new_bh->b_data; 1305e73a819dSTao Ma struct ocfs2_refcount_list *new_rl = &new_rb->rf_records; 1306e73a819dSTao Ma 1307e73a819dSTao Ma mlog(0, "split old leaf refcount block %llu, count = %u, used = %u\n", 1308e73a819dSTao Ma (unsigned long long)ref_leaf_bh->b_blocknr, 1309e73a819dSTao Ma le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used)); 1310e73a819dSTao Ma 1311e73a819dSTao Ma /* 1312e73a819dSTao Ma * XXX: Improvement later. 1313e73a819dSTao Ma * If we know all the high 32 bit cpos is the same, no need to sort. 1314e73a819dSTao Ma * 1315e73a819dSTao Ma * In order to make the whole process safe, we do: 1316e73a819dSTao Ma * 1. sort the entries by their low 32 bit cpos first so that we can 1317e73a819dSTao Ma * find the split cpos easily. 1318e73a819dSTao Ma * 2. call ocfs2_insert_extent to insert the new refcount block. 1319e73a819dSTao Ma * 3. move the refcount rec to the new block. 1320e73a819dSTao Ma * 4. sort the entries by their 64 bit cpos. 1321e73a819dSTao Ma * 5. dirty the new_rb and rb. 1322e73a819dSTao Ma */ 1323e73a819dSTao Ma sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), 1324e73a819dSTao Ma sizeof(struct ocfs2_refcount_rec), 1325e73a819dSTao Ma cmp_refcount_rec_by_low_cpos, swap_refcount_rec); 1326e73a819dSTao Ma 1327e73a819dSTao Ma ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index); 1328e73a819dSTao Ma if (ret) { 1329e73a819dSTao Ma mlog_errno(ret); 1330e73a819dSTao Ma return ret; 1331e73a819dSTao Ma } 1332e73a819dSTao Ma 1333e73a819dSTao Ma new_rb->rf_cpos = cpu_to_le32(cpos); 1334e73a819dSTao Ma 1335e73a819dSTao Ma /* move refcount records starting from split_index to the new block. */ 1336e73a819dSTao Ma num_moved = le16_to_cpu(rl->rl_used) - split_index; 1337e73a819dSTao Ma memcpy(new_rl->rl_recs, &rl->rl_recs[split_index], 1338e73a819dSTao Ma num_moved * sizeof(struct ocfs2_refcount_rec)); 1339e73a819dSTao Ma 1340e73a819dSTao Ma /*ok, remove the entries we just moved over to the other block. */ 1341e73a819dSTao Ma memset(&rl->rl_recs[split_index], 0, 1342e73a819dSTao Ma num_moved * sizeof(struct ocfs2_refcount_rec)); 1343e73a819dSTao Ma 1344e73a819dSTao Ma /* change old and new rl_used accordingly. */ 1345e73a819dSTao Ma le16_add_cpu(&rl->rl_used, -num_moved); 1346e73a819dSTao Ma new_rl->rl_used = cpu_to_le32(num_moved); 1347e73a819dSTao Ma 1348e73a819dSTao Ma sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), 1349e73a819dSTao Ma sizeof(struct ocfs2_refcount_rec), 1350e73a819dSTao Ma cmp_refcount_rec_by_cpos, swap_refcount_rec); 1351e73a819dSTao Ma 1352e73a819dSTao Ma sort(&new_rl->rl_recs, le16_to_cpu(new_rl->rl_used), 1353e73a819dSTao Ma sizeof(struct ocfs2_refcount_rec), 1354e73a819dSTao Ma cmp_refcount_rec_by_cpos, swap_refcount_rec); 1355e73a819dSTao Ma 1356e73a819dSTao Ma *split_cpos = cpos; 1357e73a819dSTao Ma return 0; 1358e73a819dSTao Ma } 1359e73a819dSTao Ma 1360e73a819dSTao Ma static int ocfs2_new_leaf_refcount_block(handle_t *handle, 1361e73a819dSTao Ma struct ocfs2_caching_info *ci, 1362e73a819dSTao Ma struct buffer_head *ref_root_bh, 1363e73a819dSTao Ma struct buffer_head *ref_leaf_bh, 1364e73a819dSTao Ma struct ocfs2_alloc_context *meta_ac) 1365e73a819dSTao Ma { 1366e73a819dSTao Ma int ret; 1367e73a819dSTao Ma u16 suballoc_bit_start; 1368e73a819dSTao Ma u32 num_got, new_cpos; 1369e73a819dSTao Ma u64 blkno; 1370e73a819dSTao Ma struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1371e73a819dSTao Ma struct ocfs2_refcount_block *root_rb = 1372e73a819dSTao Ma (struct ocfs2_refcount_block *)ref_root_bh->b_data; 1373e73a819dSTao Ma struct buffer_head *new_bh = NULL; 1374e73a819dSTao Ma struct ocfs2_refcount_block *new_rb; 1375e73a819dSTao Ma struct ocfs2_extent_tree ref_et; 1376e73a819dSTao Ma 1377e73a819dSTao Ma BUG_ON(!(le32_to_cpu(root_rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)); 1378e73a819dSTao Ma 1379e73a819dSTao Ma ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh, 1380e73a819dSTao Ma OCFS2_JOURNAL_ACCESS_WRITE); 1381e73a819dSTao Ma if (ret) { 1382e73a819dSTao Ma mlog_errno(ret); 1383e73a819dSTao Ma goto out; 1384e73a819dSTao Ma } 1385e73a819dSTao Ma 1386e73a819dSTao Ma ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, 1387e73a819dSTao Ma OCFS2_JOURNAL_ACCESS_WRITE); 1388e73a819dSTao Ma if (ret) { 1389e73a819dSTao Ma mlog_errno(ret); 1390e73a819dSTao Ma goto out; 1391e73a819dSTao Ma } 1392e73a819dSTao Ma 1393e73a819dSTao Ma ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, 1394e73a819dSTao Ma &suballoc_bit_start, &num_got, 1395e73a819dSTao Ma &blkno); 1396e73a819dSTao Ma if (ret) { 1397e73a819dSTao Ma mlog_errno(ret); 1398e73a819dSTao Ma goto out; 1399e73a819dSTao Ma } 1400e73a819dSTao Ma 1401e73a819dSTao Ma new_bh = sb_getblk(sb, blkno); 1402e73a819dSTao Ma if (new_bh == NULL) { 1403e73a819dSTao Ma ret = -EIO; 1404e73a819dSTao Ma mlog_errno(ret); 1405e73a819dSTao Ma goto out; 1406e73a819dSTao Ma } 1407e73a819dSTao Ma ocfs2_set_new_buffer_uptodate(ci, new_bh); 1408e73a819dSTao Ma 1409e73a819dSTao Ma ret = ocfs2_journal_access_rb(handle, ci, new_bh, 1410e73a819dSTao Ma OCFS2_JOURNAL_ACCESS_CREATE); 1411e73a819dSTao Ma if (ret) { 1412e73a819dSTao Ma mlog_errno(ret); 1413e73a819dSTao Ma goto out; 1414e73a819dSTao Ma } 1415e73a819dSTao Ma 1416e73a819dSTao Ma /* Initialize ocfs2_refcount_block. */ 1417e73a819dSTao Ma new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; 1418e73a819dSTao Ma memset(new_rb, 0, sb->s_blocksize); 1419e73a819dSTao Ma strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); 1420e73a819dSTao Ma new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); 1421e73a819dSTao Ma new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1422e73a819dSTao Ma new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); 1423e73a819dSTao Ma new_rb->rf_blkno = cpu_to_le64(blkno); 1424e73a819dSTao Ma new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr); 1425e73a819dSTao Ma new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL); 1426e73a819dSTao Ma new_rb->rf_records.rl_count = 1427e73a819dSTao Ma cpu_to_le16(ocfs2_refcount_recs_per_rb(sb)); 1428e73a819dSTao Ma new_rb->rf_generation = root_rb->rf_generation; 1429e73a819dSTao Ma 1430e73a819dSTao Ma ret = ocfs2_divide_leaf_refcount_block(ref_leaf_bh, new_bh, &new_cpos); 1431e73a819dSTao Ma if (ret) { 1432e73a819dSTao Ma mlog_errno(ret); 1433e73a819dSTao Ma goto out; 1434e73a819dSTao Ma } 1435e73a819dSTao Ma 1436e73a819dSTao Ma ocfs2_journal_dirty(handle, ref_leaf_bh); 1437e73a819dSTao Ma ocfs2_journal_dirty(handle, new_bh); 1438e73a819dSTao Ma 1439e73a819dSTao Ma ocfs2_init_refcount_extent_tree(&ref_et, ci, ref_root_bh); 1440e73a819dSTao Ma 1441e73a819dSTao Ma mlog(0, "insert new leaf block %llu at %u\n", 1442e73a819dSTao Ma (unsigned long long)new_bh->b_blocknr, new_cpos); 1443e73a819dSTao Ma 1444e73a819dSTao Ma /* Insert the new leaf block with the specific offset cpos. */ 1445e73a819dSTao Ma ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr, 1446e73a819dSTao Ma 1, 0, meta_ac); 1447e73a819dSTao Ma if (ret) 1448e73a819dSTao Ma mlog_errno(ret); 1449e73a819dSTao Ma 1450e73a819dSTao Ma out: 1451e73a819dSTao Ma brelse(new_bh); 1452e73a819dSTao Ma return ret; 1453e73a819dSTao Ma } 1454e73a819dSTao Ma 1455e73a819dSTao Ma static int ocfs2_expand_refcount_tree(handle_t *handle, 1456e73a819dSTao Ma struct ocfs2_caching_info *ci, 1457e73a819dSTao Ma struct buffer_head *ref_root_bh, 1458e73a819dSTao Ma struct buffer_head *ref_leaf_bh, 1459e73a819dSTao Ma struct ocfs2_alloc_context *meta_ac) 1460e73a819dSTao Ma { 1461e73a819dSTao Ma int ret; 1462e73a819dSTao Ma struct buffer_head *expand_bh = NULL; 1463e73a819dSTao Ma 1464e73a819dSTao Ma if (ref_root_bh == ref_leaf_bh) { 1465e73a819dSTao Ma /* 1466e73a819dSTao Ma * the old root bh hasn't been expanded to a b-tree, 1467e73a819dSTao Ma * so expand it first. 1468e73a819dSTao Ma */ 1469e73a819dSTao Ma ret = ocfs2_expand_inline_ref_root(handle, ci, ref_root_bh, 1470e73a819dSTao Ma &expand_bh, meta_ac); 1471e73a819dSTao Ma if (ret) { 1472e73a819dSTao Ma mlog_errno(ret); 1473e73a819dSTao Ma goto out; 1474e73a819dSTao Ma } 1475e73a819dSTao Ma } else { 1476e73a819dSTao Ma expand_bh = ref_leaf_bh; 1477e73a819dSTao Ma get_bh(expand_bh); 1478e73a819dSTao Ma } 1479e73a819dSTao Ma 1480e73a819dSTao Ma 1481e73a819dSTao Ma /* Now add a new refcount block into the tree.*/ 1482e73a819dSTao Ma ret = ocfs2_new_leaf_refcount_block(handle, ci, ref_root_bh, 1483e73a819dSTao Ma expand_bh, meta_ac); 1484e73a819dSTao Ma if (ret) 1485e73a819dSTao Ma mlog_errno(ret); 1486e73a819dSTao Ma out: 1487e73a819dSTao Ma brelse(expand_bh); 1488e73a819dSTao Ma return ret; 1489e73a819dSTao Ma } 1490e73a819dSTao Ma 1491e73a819dSTao Ma /* 1492e73a819dSTao Ma * Adjust the extent rec in b-tree representing ref_leaf_bh. 1493e73a819dSTao Ma * 1494e73a819dSTao Ma * Only called when we have inserted a new refcount rec at index 0 1495e73a819dSTao Ma * which means ocfs2_extent_rec.e_cpos may need some change. 1496e73a819dSTao Ma */ 1497e73a819dSTao Ma static int ocfs2_adjust_refcount_rec(handle_t *handle, 1498e73a819dSTao Ma struct ocfs2_caching_info *ci, 1499e73a819dSTao Ma struct buffer_head *ref_root_bh, 1500e73a819dSTao Ma struct buffer_head *ref_leaf_bh, 1501e73a819dSTao Ma struct ocfs2_refcount_rec *rec) 1502e73a819dSTao Ma { 1503e73a819dSTao Ma int ret = 0, i; 1504e73a819dSTao Ma u32 new_cpos, old_cpos; 1505e73a819dSTao Ma struct ocfs2_path *path = NULL; 1506e73a819dSTao Ma struct ocfs2_extent_tree et; 1507e73a819dSTao Ma struct ocfs2_refcount_block *rb = 1508e73a819dSTao Ma (struct ocfs2_refcount_block *)ref_root_bh->b_data; 1509e73a819dSTao Ma struct ocfs2_extent_list *el; 1510e73a819dSTao Ma 1511e73a819dSTao Ma if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) 1512e73a819dSTao Ma goto out; 1513e73a819dSTao Ma 1514e73a819dSTao Ma rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1515e73a819dSTao Ma old_cpos = le32_to_cpu(rb->rf_cpos); 1516e73a819dSTao Ma new_cpos = le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK; 1517e73a819dSTao Ma if (old_cpos <= new_cpos) 1518e73a819dSTao Ma goto out; 1519e73a819dSTao Ma 1520e73a819dSTao Ma ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); 1521e73a819dSTao Ma 1522e73a819dSTao Ma path = ocfs2_new_path_from_et(&et); 1523e73a819dSTao Ma if (!path) { 1524e73a819dSTao Ma ret = -ENOMEM; 1525e73a819dSTao Ma mlog_errno(ret); 1526e73a819dSTao Ma goto out; 1527e73a819dSTao Ma } 1528e73a819dSTao Ma 1529e73a819dSTao Ma ret = ocfs2_find_path(ci, path, old_cpos); 1530e73a819dSTao Ma if (ret) { 1531e73a819dSTao Ma mlog_errno(ret); 1532e73a819dSTao Ma goto out; 1533e73a819dSTao Ma } 1534e73a819dSTao Ma 1535e73a819dSTao Ma /* 1536e73a819dSTao Ma * 2 more credits, one for the leaf refcount block, one for 1537e73a819dSTao Ma * the extent block contains the extent rec. 1538e73a819dSTao Ma */ 1539e73a819dSTao Ma ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2); 1540e73a819dSTao Ma if (ret < 0) { 1541e73a819dSTao Ma mlog_errno(ret); 1542e73a819dSTao Ma goto out; 1543e73a819dSTao Ma } 1544e73a819dSTao Ma 1545e73a819dSTao Ma ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, 1546e73a819dSTao Ma OCFS2_JOURNAL_ACCESS_WRITE); 1547e73a819dSTao Ma if (ret < 0) { 1548e73a819dSTao Ma mlog_errno(ret); 1549e73a819dSTao Ma goto out; 1550e73a819dSTao Ma } 1551e73a819dSTao Ma 1552e73a819dSTao Ma ret = ocfs2_journal_access_eb(handle, ci, path_leaf_bh(path), 1553e73a819dSTao Ma OCFS2_JOURNAL_ACCESS_WRITE); 1554e73a819dSTao Ma if (ret < 0) { 1555e73a819dSTao Ma mlog_errno(ret); 1556e73a819dSTao Ma goto out; 1557e73a819dSTao Ma } 1558e73a819dSTao Ma 1559e73a819dSTao Ma /* change the leaf extent block first. */ 1560e73a819dSTao Ma el = path_leaf_el(path); 1561e73a819dSTao Ma 1562e73a819dSTao Ma for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) 1563e73a819dSTao Ma if (le32_to_cpu(el->l_recs[i].e_cpos) == old_cpos) 1564e73a819dSTao Ma break; 1565e73a819dSTao Ma 1566e73a819dSTao Ma BUG_ON(i == le16_to_cpu(el->l_next_free_rec)); 1567e73a819dSTao Ma 1568e73a819dSTao Ma el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); 1569e73a819dSTao Ma 1570e73a819dSTao Ma /* change the r_cpos in the leaf block. */ 1571e73a819dSTao Ma rb->rf_cpos = cpu_to_le32(new_cpos); 1572e73a819dSTao Ma 1573e73a819dSTao Ma ocfs2_journal_dirty(handle, path_leaf_bh(path)); 1574e73a819dSTao Ma ocfs2_journal_dirty(handle, ref_leaf_bh); 1575e73a819dSTao Ma 1576e73a819dSTao Ma out: 1577e73a819dSTao Ma ocfs2_free_path(path); 1578e73a819dSTao Ma return ret; 1579e73a819dSTao Ma } 1580e73a819dSTao Ma 1581e73a819dSTao Ma static int ocfs2_insert_refcount_rec(handle_t *handle, 1582e73a819dSTao Ma struct ocfs2_caching_info *ci, 1583e73a819dSTao Ma struct buffer_head *ref_root_bh, 1584e73a819dSTao Ma struct buffer_head *ref_leaf_bh, 1585e73a819dSTao Ma struct ocfs2_refcount_rec *rec, 1586e73a819dSTao Ma int index, 1587e73a819dSTao Ma struct ocfs2_alloc_context *meta_ac) 1588e73a819dSTao Ma { 1589e73a819dSTao Ma int ret; 1590e73a819dSTao Ma struct ocfs2_refcount_block *rb = 1591e73a819dSTao Ma (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1592e73a819dSTao Ma struct ocfs2_refcount_list *rf_list = &rb->rf_records; 1593e73a819dSTao Ma struct buffer_head *new_bh = NULL; 1594e73a819dSTao Ma 1595e73a819dSTao Ma BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL); 1596e73a819dSTao Ma 1597e73a819dSTao Ma if (rf_list->rl_used == rf_list->rl_count) { 1598e73a819dSTao Ma u64 cpos = le64_to_cpu(rec->r_cpos); 1599e73a819dSTao Ma u32 len = le32_to_cpu(rec->r_clusters); 1600e73a819dSTao Ma 1601e73a819dSTao Ma ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh, 1602e73a819dSTao Ma ref_leaf_bh, meta_ac); 1603e73a819dSTao Ma if (ret) { 1604e73a819dSTao Ma mlog_errno(ret); 1605e73a819dSTao Ma goto out; 1606e73a819dSTao Ma } 1607e73a819dSTao Ma 1608e73a819dSTao Ma ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 1609e73a819dSTao Ma cpos, len, NULL, &index, 1610e73a819dSTao Ma &new_bh); 1611e73a819dSTao Ma if (ret) { 1612e73a819dSTao Ma mlog_errno(ret); 1613e73a819dSTao Ma goto out; 1614e73a819dSTao Ma } 1615e73a819dSTao Ma 1616e73a819dSTao Ma ref_leaf_bh = new_bh; 1617e73a819dSTao Ma rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1618e73a819dSTao Ma rf_list = &rb->rf_records; 1619e73a819dSTao Ma } 1620e73a819dSTao Ma 1621e73a819dSTao Ma ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, 1622e73a819dSTao Ma OCFS2_JOURNAL_ACCESS_WRITE); 1623e73a819dSTao Ma if (ret) { 1624e73a819dSTao Ma mlog_errno(ret); 1625e73a819dSTao Ma goto out; 1626e73a819dSTao Ma } 1627e73a819dSTao Ma 1628e73a819dSTao Ma if (index < le16_to_cpu(rf_list->rl_used)) 1629e73a819dSTao Ma memmove(&rf_list->rl_recs[index + 1], 1630e73a819dSTao Ma &rf_list->rl_recs[index], 1631e73a819dSTao Ma (le16_to_cpu(rf_list->rl_used) - index) * 1632e73a819dSTao Ma sizeof(struct ocfs2_refcount_rec)); 1633e73a819dSTao Ma 1634e73a819dSTao Ma mlog(0, "insert refcount record start %llu, len %u, count %u " 1635e73a819dSTao Ma "to leaf block %llu at index %d\n", 1636e73a819dSTao Ma (unsigned long long)le64_to_cpu(rec->r_cpos), 1637e73a819dSTao Ma le32_to_cpu(rec->r_clusters), le32_to_cpu(rec->r_refcount), 1638e73a819dSTao Ma (unsigned long long)ref_leaf_bh->b_blocknr, index); 1639e73a819dSTao Ma 1640e73a819dSTao Ma rf_list->rl_recs[index] = *rec; 1641e73a819dSTao Ma 1642e73a819dSTao Ma le16_add_cpu(&rf_list->rl_used, 1); 1643e73a819dSTao Ma 1644e73a819dSTao Ma ocfs2_refcount_rec_merge(rb, index); 1645e73a819dSTao Ma 1646e73a819dSTao Ma ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1647e73a819dSTao Ma if (ret) { 1648e73a819dSTao Ma mlog_errno(ret); 1649e73a819dSTao Ma goto out; 1650e73a819dSTao Ma } 1651e73a819dSTao Ma 1652e73a819dSTao Ma if (index == 0) { 1653e73a819dSTao Ma ret = ocfs2_adjust_refcount_rec(handle, ci, 1654e73a819dSTao Ma ref_root_bh, 1655e73a819dSTao Ma ref_leaf_bh, rec); 1656e73a819dSTao Ma if (ret) 1657e73a819dSTao Ma mlog_errno(ret); 1658e73a819dSTao Ma } 1659e73a819dSTao Ma out: 1660e73a819dSTao Ma brelse(new_bh); 1661e73a819dSTao Ma return ret; 1662e73a819dSTao Ma } 1663e73a819dSTao Ma 1664e73a819dSTao Ma /* 1665e73a819dSTao Ma * Split the refcount_rec indexed by "index" in ref_leaf_bh. 1666e73a819dSTao Ma * This is much simple than our b-tree code. 1667e73a819dSTao Ma * split_rec is the new refcount rec we want to insert. 1668e73a819dSTao Ma * If split_rec->r_refcount > 0, we are changing the refcount(in case we 1669e73a819dSTao Ma * increase refcount or decrease a refcount to non-zero). 1670e73a819dSTao Ma * If split_rec->r_refcount == 0, we are punching a hole in current refcount 1671e73a819dSTao Ma * rec( in case we decrease a refcount to zero). 1672e73a819dSTao Ma */ 1673e73a819dSTao Ma static int ocfs2_split_refcount_rec(handle_t *handle, 1674e73a819dSTao Ma struct ocfs2_caching_info *ci, 1675e73a819dSTao Ma struct buffer_head *ref_root_bh, 1676e73a819dSTao Ma struct buffer_head *ref_leaf_bh, 1677e73a819dSTao Ma struct ocfs2_refcount_rec *split_rec, 1678e73a819dSTao Ma int index, 1679e73a819dSTao Ma struct ocfs2_alloc_context *meta_ac, 1680e73a819dSTao Ma struct ocfs2_cached_dealloc_ctxt *dealloc) 1681e73a819dSTao Ma { 1682e73a819dSTao Ma int ret, recs_need; 1683e73a819dSTao Ma u32 len; 1684e73a819dSTao Ma struct ocfs2_refcount_block *rb = 1685e73a819dSTao Ma (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1686e73a819dSTao Ma struct ocfs2_refcount_list *rf_list = &rb->rf_records; 1687e73a819dSTao Ma struct ocfs2_refcount_rec *orig_rec = &rf_list->rl_recs[index]; 1688e73a819dSTao Ma struct ocfs2_refcount_rec *tail_rec = NULL; 1689e73a819dSTao Ma struct buffer_head *new_bh = NULL; 1690e73a819dSTao Ma 1691e73a819dSTao Ma BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL); 1692e73a819dSTao Ma 1693e73a819dSTao Ma mlog(0, "original r_pos %llu, cluster %u, split %llu, cluster %u\n", 1694e73a819dSTao Ma le64_to_cpu(orig_rec->r_cpos), le32_to_cpu(orig_rec->r_clusters), 1695e73a819dSTao Ma le64_to_cpu(split_rec->r_cpos), 1696e73a819dSTao Ma le32_to_cpu(split_rec->r_clusters)); 1697e73a819dSTao Ma 1698e73a819dSTao Ma /* 1699e73a819dSTao Ma * If we just need to split the header or tail clusters, 1700e73a819dSTao Ma * no more recs are needed, just split is OK. 1701e73a819dSTao Ma * Otherwise we at least need one new recs. 1702e73a819dSTao Ma */ 1703e73a819dSTao Ma if (!split_rec->r_refcount && 1704e73a819dSTao Ma (split_rec->r_cpos == orig_rec->r_cpos || 1705e73a819dSTao Ma le64_to_cpu(split_rec->r_cpos) + 1706e73a819dSTao Ma le32_to_cpu(split_rec->r_clusters) == 1707e73a819dSTao Ma le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters))) 1708e73a819dSTao Ma recs_need = 0; 1709e73a819dSTao Ma else 1710e73a819dSTao Ma recs_need = 1; 1711e73a819dSTao Ma 1712e73a819dSTao Ma /* 1713e73a819dSTao Ma * We need one more rec if we split in the middle and the new rec have 1714e73a819dSTao Ma * some refcount in it. 1715e73a819dSTao Ma */ 1716e73a819dSTao Ma if (split_rec->r_refcount && 1717e73a819dSTao Ma (split_rec->r_cpos != orig_rec->r_cpos && 1718e73a819dSTao Ma le64_to_cpu(split_rec->r_cpos) + 1719e73a819dSTao Ma le32_to_cpu(split_rec->r_clusters) != 1720e73a819dSTao Ma le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters))) 1721e73a819dSTao Ma recs_need++; 1722e73a819dSTao Ma 1723e73a819dSTao Ma /* If the leaf block don't have enough record, expand it. */ 1724e73a819dSTao Ma if (le16_to_cpu(rf_list->rl_used) + recs_need > rf_list->rl_count) { 1725e73a819dSTao Ma struct ocfs2_refcount_rec tmp_rec; 1726e73a819dSTao Ma u64 cpos = le64_to_cpu(orig_rec->r_cpos); 1727e73a819dSTao Ma len = le32_to_cpu(orig_rec->r_clusters); 1728e73a819dSTao Ma ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh, 1729e73a819dSTao Ma ref_leaf_bh, meta_ac); 1730e73a819dSTao Ma if (ret) { 1731e73a819dSTao Ma mlog_errno(ret); 1732e73a819dSTao Ma goto out; 1733e73a819dSTao Ma } 1734e73a819dSTao Ma 1735e73a819dSTao Ma /* 1736e73a819dSTao Ma * We have to re-get it since now cpos may be moved to 1737e73a819dSTao Ma * another leaf block. 1738e73a819dSTao Ma */ 1739e73a819dSTao Ma ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 1740e73a819dSTao Ma cpos, len, &tmp_rec, &index, 1741e73a819dSTao Ma &new_bh); 1742e73a819dSTao Ma if (ret) { 1743e73a819dSTao Ma mlog_errno(ret); 1744e73a819dSTao Ma goto out; 1745e73a819dSTao Ma } 1746e73a819dSTao Ma 1747e73a819dSTao Ma ref_leaf_bh = new_bh; 1748e73a819dSTao Ma rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 1749e73a819dSTao Ma rf_list = &rb->rf_records; 1750e73a819dSTao Ma orig_rec = &rf_list->rl_recs[index]; 1751e73a819dSTao Ma } 1752e73a819dSTao Ma 1753e73a819dSTao Ma ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, 1754e73a819dSTao Ma OCFS2_JOURNAL_ACCESS_WRITE); 1755e73a819dSTao Ma if (ret) { 1756e73a819dSTao Ma mlog_errno(ret); 1757e73a819dSTao Ma goto out; 1758e73a819dSTao Ma } 1759e73a819dSTao Ma 1760e73a819dSTao Ma /* 1761e73a819dSTao Ma * We have calculated out how many new records we need and store 1762e73a819dSTao Ma * in recs_need, so spare enough space first by moving the records 1763e73a819dSTao Ma * after "index" to the end. 1764e73a819dSTao Ma */ 1765e73a819dSTao Ma if (index != le16_to_cpu(rf_list->rl_used) - 1) 1766e73a819dSTao Ma memmove(&rf_list->rl_recs[index + 1 + recs_need], 1767e73a819dSTao Ma &rf_list->rl_recs[index + 1], 1768e73a819dSTao Ma (le16_to_cpu(rf_list->rl_used) - index - 1) * 1769e73a819dSTao Ma sizeof(struct ocfs2_refcount_rec)); 1770e73a819dSTao Ma 1771e73a819dSTao Ma len = (le64_to_cpu(orig_rec->r_cpos) + 1772e73a819dSTao Ma le32_to_cpu(orig_rec->r_clusters)) - 1773e73a819dSTao Ma (le64_to_cpu(split_rec->r_cpos) + 1774e73a819dSTao Ma le32_to_cpu(split_rec->r_clusters)); 1775e73a819dSTao Ma 1776e73a819dSTao Ma /* 1777e73a819dSTao Ma * If we have "len", the we will split in the tail and move it 1778e73a819dSTao Ma * to the end of the space we have just spared. 1779e73a819dSTao Ma */ 1780e73a819dSTao Ma if (len) { 1781e73a819dSTao Ma tail_rec = &rf_list->rl_recs[index + recs_need]; 1782e73a819dSTao Ma 1783e73a819dSTao Ma memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec)); 1784e73a819dSTao Ma le64_add_cpu(&tail_rec->r_cpos, 1785e73a819dSTao Ma le32_to_cpu(tail_rec->r_clusters) - len); 1786e73a819dSTao Ma tail_rec->r_clusters = le32_to_cpu(len); 1787e73a819dSTao Ma } 1788e73a819dSTao Ma 1789e73a819dSTao Ma /* 1790e73a819dSTao Ma * If the split pos isn't the same as the original one, we need to 1791e73a819dSTao Ma * split in the head. 1792e73a819dSTao Ma * 1793e73a819dSTao Ma * Note: We have the chance that split_rec.r_refcount = 0, 1794e73a819dSTao Ma * recs_need = 0 and len > 0, which means we just cut the head from 1795e73a819dSTao Ma * the orig_rec and in that case we have done some modification in 1796e73a819dSTao Ma * orig_rec above, so the check for r_cpos is faked. 1797e73a819dSTao Ma */ 1798e73a819dSTao Ma if (split_rec->r_cpos != orig_rec->r_cpos && tail_rec != orig_rec) { 1799e73a819dSTao Ma len = le64_to_cpu(split_rec->r_cpos) - 1800e73a819dSTao Ma le64_to_cpu(orig_rec->r_cpos); 1801e73a819dSTao Ma orig_rec->r_clusters = cpu_to_le32(len); 1802e73a819dSTao Ma index++; 1803e73a819dSTao Ma } 1804e73a819dSTao Ma 1805e73a819dSTao Ma le16_add_cpu(&rf_list->rl_used, recs_need); 1806e73a819dSTao Ma 1807e73a819dSTao Ma if (split_rec->r_refcount) { 1808e73a819dSTao Ma rf_list->rl_recs[index] = *split_rec; 1809e73a819dSTao Ma mlog(0, "insert refcount record start %llu, len %u, count %u " 1810e73a819dSTao Ma "to leaf block %llu at index %d\n", 1811e73a819dSTao Ma (unsigned long long)le64_to_cpu(split_rec->r_cpos), 1812e73a819dSTao Ma le32_to_cpu(split_rec->r_clusters), 1813e73a819dSTao Ma le32_to_cpu(split_rec->r_refcount), 1814e73a819dSTao Ma (unsigned long long)ref_leaf_bh->b_blocknr, index); 1815e73a819dSTao Ma 1816e73a819dSTao Ma ocfs2_refcount_rec_merge(rb, index); 1817e73a819dSTao Ma } 1818e73a819dSTao Ma 1819e73a819dSTao Ma ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1820e73a819dSTao Ma if (ret) 1821e73a819dSTao Ma mlog_errno(ret); 1822e73a819dSTao Ma 1823e73a819dSTao Ma out: 1824e73a819dSTao Ma brelse(new_bh); 1825e73a819dSTao Ma return ret; 1826e73a819dSTao Ma } 1827e73a819dSTao Ma 1828e73a819dSTao Ma static int __ocfs2_increase_refcount(handle_t *handle, 1829e73a819dSTao Ma struct ocfs2_caching_info *ci, 1830e73a819dSTao Ma struct buffer_head *ref_root_bh, 1831e73a819dSTao Ma u64 cpos, u32 len, 1832e73a819dSTao Ma struct ocfs2_alloc_context *meta_ac, 1833e73a819dSTao Ma struct ocfs2_cached_dealloc_ctxt *dealloc) 1834e73a819dSTao Ma { 1835e73a819dSTao Ma int ret = 0, index; 1836e73a819dSTao Ma struct buffer_head *ref_leaf_bh = NULL; 1837e73a819dSTao Ma struct ocfs2_refcount_rec rec; 1838e73a819dSTao Ma unsigned int set_len = 0; 1839e73a819dSTao Ma 1840e73a819dSTao Ma mlog(0, "Tree owner %llu, add refcount start %llu, len %u\n", 1841e73a819dSTao Ma (unsigned long long)ocfs2_metadata_cache_owner(ci), 1842e73a819dSTao Ma (unsigned long long)cpos, len); 1843e73a819dSTao Ma 1844e73a819dSTao Ma while (len) { 1845e73a819dSTao Ma ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 1846e73a819dSTao Ma cpos, len, &rec, &index, 1847e73a819dSTao Ma &ref_leaf_bh); 1848e73a819dSTao Ma if (ret) { 1849e73a819dSTao Ma mlog_errno(ret); 1850e73a819dSTao Ma goto out; 1851e73a819dSTao Ma } 1852e73a819dSTao Ma 1853e73a819dSTao Ma set_len = le32_to_cpu(rec.r_clusters); 1854e73a819dSTao Ma 1855e73a819dSTao Ma /* 1856e73a819dSTao Ma * Here we may meet with 3 situations: 1857e73a819dSTao Ma * 1858e73a819dSTao Ma * 1. If we find an already existing record, and the length 1859e73a819dSTao Ma * is the same, cool, we just need to increase the r_refcount 1860e73a819dSTao Ma * and it is OK. 1861e73a819dSTao Ma * 2. If we find a hole, just insert it with r_refcount = 1. 1862e73a819dSTao Ma * 3. If we are in the middle of one extent record, split 1863e73a819dSTao Ma * it. 1864e73a819dSTao Ma */ 1865e73a819dSTao Ma if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos && 1866e73a819dSTao Ma set_len <= len) { 1867e73a819dSTao Ma mlog(0, "increase refcount rec, start %llu, len %u, " 1868e73a819dSTao Ma "count %u\n", (unsigned long long)cpos, set_len, 1869e73a819dSTao Ma le32_to_cpu(rec.r_refcount)); 1870e73a819dSTao Ma ret = ocfs2_change_refcount_rec(handle, ci, 1871e73a819dSTao Ma ref_leaf_bh, index, 1); 1872e73a819dSTao Ma if (ret) { 1873e73a819dSTao Ma mlog_errno(ret); 1874e73a819dSTao Ma goto out; 1875e73a819dSTao Ma } 1876e73a819dSTao Ma } else if (!rec.r_refcount) { 1877e73a819dSTao Ma rec.r_refcount = cpu_to_le32(1); 1878e73a819dSTao Ma 1879e73a819dSTao Ma mlog(0, "insert refcount rec, start %llu, len %u\n", 1880e73a819dSTao Ma (unsigned long long)le64_to_cpu(rec.r_cpos), 1881e73a819dSTao Ma set_len); 1882e73a819dSTao Ma ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh, 1883e73a819dSTao Ma ref_leaf_bh, 1884e73a819dSTao Ma &rec, index, meta_ac); 1885e73a819dSTao Ma if (ret) { 1886e73a819dSTao Ma mlog_errno(ret); 1887e73a819dSTao Ma goto out; 1888e73a819dSTao Ma } 1889e73a819dSTao Ma } else { 1890e73a819dSTao Ma set_len = min((u64)(cpos + len), 1891e73a819dSTao Ma le64_to_cpu(rec.r_cpos) + set_len) - cpos; 1892e73a819dSTao Ma rec.r_cpos = cpu_to_le64(cpos); 1893e73a819dSTao Ma rec.r_clusters = cpu_to_le32(set_len); 1894e73a819dSTao Ma le32_add_cpu(&rec.r_refcount, 1); 1895e73a819dSTao Ma 1896e73a819dSTao Ma mlog(0, "split refcount rec, start %llu, " 1897e73a819dSTao Ma "len %u, count %u\n", 1898e73a819dSTao Ma (unsigned long long)le64_to_cpu(rec.r_cpos), 1899e73a819dSTao Ma set_len, le32_to_cpu(rec.r_refcount)); 1900e73a819dSTao Ma ret = ocfs2_split_refcount_rec(handle, ci, 1901e73a819dSTao Ma ref_root_bh, ref_leaf_bh, 1902e73a819dSTao Ma &rec, index, 1903e73a819dSTao Ma meta_ac, dealloc); 1904e73a819dSTao Ma if (ret) { 1905e73a819dSTao Ma mlog_errno(ret); 1906e73a819dSTao Ma goto out; 1907e73a819dSTao Ma } 1908e73a819dSTao Ma } 1909e73a819dSTao Ma 1910e73a819dSTao Ma cpos += set_len; 1911e73a819dSTao Ma len -= set_len; 1912e73a819dSTao Ma brelse(ref_leaf_bh); 1913e73a819dSTao Ma ref_leaf_bh = NULL; 1914e73a819dSTao Ma } 1915e73a819dSTao Ma 1916e73a819dSTao Ma out: 1917e73a819dSTao Ma brelse(ref_leaf_bh); 1918e73a819dSTao Ma return ret; 1919e73a819dSTao Ma } 19201823cb0bSTao Ma 19211823cb0bSTao Ma static int ocfs2_remove_refcount_extent(handle_t *handle, 19221823cb0bSTao Ma struct ocfs2_caching_info *ci, 19231823cb0bSTao Ma struct buffer_head *ref_root_bh, 19241823cb0bSTao Ma struct buffer_head *ref_leaf_bh, 19251823cb0bSTao Ma struct ocfs2_alloc_context *meta_ac, 19261823cb0bSTao Ma struct ocfs2_cached_dealloc_ctxt *dealloc) 19271823cb0bSTao Ma { 19281823cb0bSTao Ma int ret; 19291823cb0bSTao Ma struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 19301823cb0bSTao Ma struct ocfs2_refcount_block *rb = 19311823cb0bSTao Ma (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 19321823cb0bSTao Ma struct ocfs2_extent_tree et; 19331823cb0bSTao Ma 19341823cb0bSTao Ma BUG_ON(rb->rf_records.rl_used); 19351823cb0bSTao Ma 19361823cb0bSTao Ma ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); 19371823cb0bSTao Ma ret = ocfs2_remove_extent(handle, &et, le32_to_cpu(rb->rf_cpos), 19381823cb0bSTao Ma 1, meta_ac, dealloc); 19391823cb0bSTao Ma if (ret) { 19401823cb0bSTao Ma mlog_errno(ret); 19411823cb0bSTao Ma goto out; 19421823cb0bSTao Ma } 19431823cb0bSTao Ma 19441823cb0bSTao Ma ocfs2_remove_from_cache(ci, ref_leaf_bh); 19451823cb0bSTao Ma 19461823cb0bSTao Ma /* 19471823cb0bSTao Ma * add the freed block to the dealloc so that it will be freed 19481823cb0bSTao Ma * when we run dealloc. 19491823cb0bSTao Ma */ 19501823cb0bSTao Ma ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE, 19511823cb0bSTao Ma le16_to_cpu(rb->rf_suballoc_slot), 19521823cb0bSTao Ma le64_to_cpu(rb->rf_blkno), 19531823cb0bSTao Ma le16_to_cpu(rb->rf_suballoc_bit)); 19541823cb0bSTao Ma if (ret) { 19551823cb0bSTao Ma mlog_errno(ret); 19561823cb0bSTao Ma goto out; 19571823cb0bSTao Ma } 19581823cb0bSTao Ma 19591823cb0bSTao Ma ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh, 19601823cb0bSTao Ma OCFS2_JOURNAL_ACCESS_WRITE); 19611823cb0bSTao Ma if (ret) { 19621823cb0bSTao Ma mlog_errno(ret); 19631823cb0bSTao Ma goto out; 19641823cb0bSTao Ma } 19651823cb0bSTao Ma 19661823cb0bSTao Ma rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 19671823cb0bSTao Ma 19681823cb0bSTao Ma le32_add_cpu(&rb->rf_clusters, -1); 19691823cb0bSTao Ma 19701823cb0bSTao Ma /* 19711823cb0bSTao Ma * check whether we need to restore the root refcount block if 19721823cb0bSTao Ma * there is no leaf extent block at atll. 19731823cb0bSTao Ma */ 19741823cb0bSTao Ma if (!rb->rf_list.l_next_free_rec) { 19751823cb0bSTao Ma BUG_ON(rb->rf_clusters); 19761823cb0bSTao Ma 19771823cb0bSTao Ma mlog(0, "reset refcount tree root %llu to be a record block.\n", 19781823cb0bSTao Ma (unsigned long long)ref_root_bh->b_blocknr); 19791823cb0bSTao Ma 19801823cb0bSTao Ma rb->rf_flags = 0; 19811823cb0bSTao Ma rb->rf_parent = 0; 19821823cb0bSTao Ma rb->rf_cpos = 0; 19831823cb0bSTao Ma memset(&rb->rf_records, 0, sb->s_blocksize - 19841823cb0bSTao Ma offsetof(struct ocfs2_refcount_block, rf_records)); 19851823cb0bSTao Ma rb->rf_records.rl_count = 19861823cb0bSTao Ma cpu_to_le16(ocfs2_refcount_recs_per_rb(sb)); 19871823cb0bSTao Ma } 19881823cb0bSTao Ma 19891823cb0bSTao Ma ocfs2_journal_dirty(handle, ref_root_bh); 19901823cb0bSTao Ma 19911823cb0bSTao Ma out: 19921823cb0bSTao Ma return ret; 19931823cb0bSTao Ma } 19941823cb0bSTao Ma 19951823cb0bSTao Ma static int ocfs2_decrease_refcount_rec(handle_t *handle, 19961823cb0bSTao Ma struct ocfs2_caching_info *ci, 19971823cb0bSTao Ma struct buffer_head *ref_root_bh, 19981823cb0bSTao Ma struct buffer_head *ref_leaf_bh, 19991823cb0bSTao Ma int index, u64 cpos, unsigned int len, 20001823cb0bSTao Ma struct ocfs2_alloc_context *meta_ac, 20011823cb0bSTao Ma struct ocfs2_cached_dealloc_ctxt *dealloc) 20021823cb0bSTao Ma { 20031823cb0bSTao Ma int ret; 20041823cb0bSTao Ma struct ocfs2_refcount_block *rb = 20051823cb0bSTao Ma (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 20061823cb0bSTao Ma struct ocfs2_refcount_rec *rec = &rb->rf_records.rl_recs[index]; 20071823cb0bSTao Ma 20081823cb0bSTao Ma BUG_ON(cpos < le64_to_cpu(rec->r_cpos)); 20091823cb0bSTao Ma BUG_ON(cpos + len > 20101823cb0bSTao Ma le64_to_cpu(rec->r_cpos) + le32_to_cpu(rec->r_clusters)); 20111823cb0bSTao Ma 20121823cb0bSTao Ma if (cpos == le64_to_cpu(rec->r_cpos) && 20131823cb0bSTao Ma len == le32_to_cpu(rec->r_clusters)) 20141823cb0bSTao Ma ret = ocfs2_change_refcount_rec(handle, ci, 20151823cb0bSTao Ma ref_leaf_bh, index, -1); 20161823cb0bSTao Ma else { 20171823cb0bSTao Ma struct ocfs2_refcount_rec split = *rec; 20181823cb0bSTao Ma split.r_cpos = cpu_to_le64(cpos); 20191823cb0bSTao Ma split.r_clusters = cpu_to_le32(len); 20201823cb0bSTao Ma 20211823cb0bSTao Ma le32_add_cpu(&split.r_refcount, -1); 20221823cb0bSTao Ma 20231823cb0bSTao Ma mlog(0, "split refcount rec, start %llu, " 20241823cb0bSTao Ma "len %u, count %u, original start %llu, len %u\n", 20251823cb0bSTao Ma (unsigned long long)le64_to_cpu(split.r_cpos), 20261823cb0bSTao Ma len, le32_to_cpu(split.r_refcount), 20271823cb0bSTao Ma (unsigned long long)le64_to_cpu(rec->r_cpos), 20281823cb0bSTao Ma le32_to_cpu(rec->r_clusters)); 20291823cb0bSTao Ma ret = ocfs2_split_refcount_rec(handle, ci, 20301823cb0bSTao Ma ref_root_bh, ref_leaf_bh, 20311823cb0bSTao Ma &split, index, 20321823cb0bSTao Ma meta_ac, dealloc); 20331823cb0bSTao Ma } 20341823cb0bSTao Ma 20351823cb0bSTao Ma if (ret) { 20361823cb0bSTao Ma mlog_errno(ret); 20371823cb0bSTao Ma goto out; 20381823cb0bSTao Ma } 20391823cb0bSTao Ma 20401823cb0bSTao Ma /* Remove the leaf refcount block if it contains no refcount record. */ 20411823cb0bSTao Ma if (!rb->rf_records.rl_used && ref_leaf_bh != ref_root_bh) { 20421823cb0bSTao Ma ret = ocfs2_remove_refcount_extent(handle, ci, ref_root_bh, 20431823cb0bSTao Ma ref_leaf_bh, meta_ac, 20441823cb0bSTao Ma dealloc); 20451823cb0bSTao Ma if (ret) 20461823cb0bSTao Ma mlog_errno(ret); 20471823cb0bSTao Ma } 20481823cb0bSTao Ma 20491823cb0bSTao Ma out: 20501823cb0bSTao Ma return ret; 20511823cb0bSTao Ma } 20521823cb0bSTao Ma 20531823cb0bSTao Ma static int __ocfs2_decrease_refcount(handle_t *handle, 20541823cb0bSTao Ma struct ocfs2_caching_info *ci, 20551823cb0bSTao Ma struct buffer_head *ref_root_bh, 20561823cb0bSTao Ma u64 cpos, u32 len, 20571823cb0bSTao Ma struct ocfs2_alloc_context *meta_ac, 20581823cb0bSTao Ma struct ocfs2_cached_dealloc_ctxt *dealloc) 20591823cb0bSTao Ma { 20601823cb0bSTao Ma int ret = 0, index = 0; 20611823cb0bSTao Ma struct ocfs2_refcount_rec rec; 20621823cb0bSTao Ma unsigned int r_count = 0, r_len; 20631823cb0bSTao Ma struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 20641823cb0bSTao Ma struct buffer_head *ref_leaf_bh = NULL; 20651823cb0bSTao Ma 20661823cb0bSTao Ma mlog(0, "Tree owner %llu, decrease refcount start %llu, len %u\n", 20671823cb0bSTao Ma (unsigned long long)ocfs2_metadata_cache_owner(ci), 20681823cb0bSTao Ma (unsigned long long)cpos, len); 20691823cb0bSTao Ma 20701823cb0bSTao Ma while (len) { 20711823cb0bSTao Ma ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 20721823cb0bSTao Ma cpos, len, &rec, &index, 20731823cb0bSTao Ma &ref_leaf_bh); 20741823cb0bSTao Ma if (ret) { 20751823cb0bSTao Ma mlog_errno(ret); 20761823cb0bSTao Ma goto out; 20771823cb0bSTao Ma } 20781823cb0bSTao Ma 20791823cb0bSTao Ma r_count = le32_to_cpu(rec.r_refcount); 20801823cb0bSTao Ma BUG_ON(r_count == 0); 20811823cb0bSTao Ma 20821823cb0bSTao Ma r_len = min((u64)(cpos + len), le64_to_cpu(rec.r_cpos) + 20831823cb0bSTao Ma le32_to_cpu(rec.r_clusters)) - cpos; 20841823cb0bSTao Ma 20851823cb0bSTao Ma ret = ocfs2_decrease_refcount_rec(handle, ci, ref_root_bh, 20861823cb0bSTao Ma ref_leaf_bh, index, 20871823cb0bSTao Ma cpos, r_len, 20881823cb0bSTao Ma meta_ac, dealloc); 20891823cb0bSTao Ma if (ret) { 20901823cb0bSTao Ma mlog_errno(ret); 20911823cb0bSTao Ma goto out; 20921823cb0bSTao Ma } 20931823cb0bSTao Ma 20941823cb0bSTao Ma if (le32_to_cpu(rec.r_refcount) == 1) { 20951823cb0bSTao Ma ret = ocfs2_cache_cluster_dealloc(dealloc, 20961823cb0bSTao Ma ocfs2_clusters_to_blocks(sb, cpos), 20971823cb0bSTao Ma r_len); 20981823cb0bSTao Ma if (ret) { 20991823cb0bSTao Ma mlog_errno(ret); 21001823cb0bSTao Ma goto out; 21011823cb0bSTao Ma } 21021823cb0bSTao Ma } 21031823cb0bSTao Ma 21041823cb0bSTao Ma cpos += r_len; 21051823cb0bSTao Ma len -= r_len; 21061823cb0bSTao Ma brelse(ref_leaf_bh); 21071823cb0bSTao Ma ref_leaf_bh = NULL; 21081823cb0bSTao Ma } 21091823cb0bSTao Ma 21101823cb0bSTao Ma out: 21111823cb0bSTao Ma brelse(ref_leaf_bh); 21121823cb0bSTao Ma return ret; 21131823cb0bSTao Ma } 21141823cb0bSTao Ma 21151823cb0bSTao Ma /* Caller must hold refcount tree lock. */ 21161823cb0bSTao Ma int ocfs2_decrease_refcount(struct inode *inode, 21171823cb0bSTao Ma handle_t *handle, u32 cpos, u32 len, 21181823cb0bSTao Ma struct ocfs2_alloc_context *meta_ac, 21191823cb0bSTao Ma struct ocfs2_cached_dealloc_ctxt *dealloc) 21201823cb0bSTao Ma { 21211823cb0bSTao Ma int ret; 21221823cb0bSTao Ma u64 ref_blkno; 21231823cb0bSTao Ma struct ocfs2_inode_info *oi = OCFS2_I(inode); 21241823cb0bSTao Ma struct buffer_head *ref_root_bh = NULL; 21251823cb0bSTao Ma struct ocfs2_refcount_tree *tree; 21261823cb0bSTao Ma 21271823cb0bSTao Ma BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); 21281823cb0bSTao Ma 21291823cb0bSTao Ma ret = ocfs2_get_refcount_block(inode, &ref_blkno); 21301823cb0bSTao Ma if (ret) { 21311823cb0bSTao Ma mlog_errno(ret); 21321823cb0bSTao Ma goto out; 21331823cb0bSTao Ma } 21341823cb0bSTao Ma 21351823cb0bSTao Ma ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, &tree); 21361823cb0bSTao Ma if (ret) { 21371823cb0bSTao Ma mlog_errno(ret); 21381823cb0bSTao Ma goto out; 21391823cb0bSTao Ma } 21401823cb0bSTao Ma 21411823cb0bSTao Ma ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno, 21421823cb0bSTao Ma &ref_root_bh); 21431823cb0bSTao Ma if (ret) { 21441823cb0bSTao Ma mlog_errno(ret); 21451823cb0bSTao Ma goto out; 21461823cb0bSTao Ma } 21471823cb0bSTao Ma 21481823cb0bSTao Ma ret = __ocfs2_decrease_refcount(handle, &tree->rf_ci, ref_root_bh, 21491823cb0bSTao Ma cpos, len, meta_ac, dealloc); 21501823cb0bSTao Ma if (ret) 21511823cb0bSTao Ma mlog_errno(ret); 21521823cb0bSTao Ma out: 21531823cb0bSTao Ma brelse(ref_root_bh); 21541823cb0bSTao Ma return ret; 21551823cb0bSTao Ma } 21561aa75feaSTao Ma 21571aa75feaSTao Ma /* 21581aa75feaSTao Ma * Mark the already-existing extent at cpos as refcounted for len clusters. 21591aa75feaSTao Ma * This adds the refcount extent flag. 21601aa75feaSTao Ma * 21611aa75feaSTao Ma * If the existing extent is larger than the request, initiate a 21621aa75feaSTao Ma * split. An attempt will be made at merging with adjacent extents. 21631aa75feaSTao Ma * 21641aa75feaSTao Ma * The caller is responsible for passing down meta_ac if we'll need it. 21651aa75feaSTao Ma */ 21661aa75feaSTao Ma static int ocfs2_mark_extent_refcounted(struct inode *inode, 21671aa75feaSTao Ma struct ocfs2_extent_tree *et, 21681aa75feaSTao Ma handle_t *handle, u32 cpos, 21691aa75feaSTao Ma u32 len, u32 phys, 21701aa75feaSTao Ma struct ocfs2_alloc_context *meta_ac, 21711aa75feaSTao Ma struct ocfs2_cached_dealloc_ctxt *dealloc) 21721aa75feaSTao Ma { 21731aa75feaSTao Ma int ret; 21741aa75feaSTao Ma 21751aa75feaSTao Ma mlog(0, "Inode %lu refcount tree cpos %u, len %u, phys cluster %u\n", 21761aa75feaSTao Ma inode->i_ino, cpos, len, phys); 21771aa75feaSTao Ma 21781aa75feaSTao Ma if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { 21791aa75feaSTao Ma ocfs2_error(inode->i_sb, "Inode %lu want to use refcount " 21801aa75feaSTao Ma "tree, but the feature bit is not set in the " 21811aa75feaSTao Ma "super block.", inode->i_ino); 21821aa75feaSTao Ma ret = -EROFS; 21831aa75feaSTao Ma goto out; 21841aa75feaSTao Ma } 21851aa75feaSTao Ma 21861aa75feaSTao Ma ret = ocfs2_change_extent_flag(handle, et, cpos, 21871aa75feaSTao Ma len, phys, meta_ac, dealloc, 21881aa75feaSTao Ma OCFS2_EXT_REFCOUNTED, 0); 21891aa75feaSTao Ma if (ret) 21901aa75feaSTao Ma mlog_errno(ret); 21911aa75feaSTao Ma 21921aa75feaSTao Ma out: 21931aa75feaSTao Ma return ret; 21941aa75feaSTao Ma } 2195bcbbb24aSTao Ma 2196bcbbb24aSTao Ma /* 2197bcbbb24aSTao Ma * Given some contiguous physical clusters, calculate what we need 2198bcbbb24aSTao Ma * for modifying their refcount. 2199bcbbb24aSTao Ma */ 2200bcbbb24aSTao Ma static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, 2201bcbbb24aSTao Ma struct ocfs2_caching_info *ci, 2202bcbbb24aSTao Ma struct buffer_head *ref_root_bh, 2203bcbbb24aSTao Ma u64 start_cpos, 2204bcbbb24aSTao Ma u32 clusters, 2205bcbbb24aSTao Ma int *meta_add, 2206bcbbb24aSTao Ma int *credits) 2207bcbbb24aSTao Ma { 2208bcbbb24aSTao Ma int ret = 0, index, ref_blocks = 0, recs_add = 0; 2209bcbbb24aSTao Ma u64 cpos = start_cpos; 2210bcbbb24aSTao Ma struct ocfs2_refcount_block *rb; 2211bcbbb24aSTao Ma struct ocfs2_refcount_rec rec; 2212bcbbb24aSTao Ma struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL; 2213bcbbb24aSTao Ma u32 len; 2214bcbbb24aSTao Ma 2215bcbbb24aSTao Ma mlog(0, "start_cpos %llu, clusters %u\n", 2216bcbbb24aSTao Ma (unsigned long long)start_cpos, clusters); 2217bcbbb24aSTao Ma while (clusters) { 2218bcbbb24aSTao Ma ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 2219bcbbb24aSTao Ma cpos, clusters, &rec, 2220bcbbb24aSTao Ma &index, &ref_leaf_bh); 2221bcbbb24aSTao Ma if (ret) { 2222bcbbb24aSTao Ma mlog_errno(ret); 2223bcbbb24aSTao Ma goto out; 2224bcbbb24aSTao Ma } 2225bcbbb24aSTao Ma 2226bcbbb24aSTao Ma if (ref_leaf_bh != prev_bh) { 2227bcbbb24aSTao Ma /* 2228bcbbb24aSTao Ma * Now we encounter a new leaf block, so calculate 2229bcbbb24aSTao Ma * whether we need to extend the old leaf. 2230bcbbb24aSTao Ma */ 2231bcbbb24aSTao Ma if (prev_bh) { 2232bcbbb24aSTao Ma rb = (struct ocfs2_refcount_block *) 2233bcbbb24aSTao Ma prev_bh->b_data; 2234bcbbb24aSTao Ma 2235bcbbb24aSTao Ma if (le64_to_cpu(rb->rf_records.rl_used) + 2236bcbbb24aSTao Ma recs_add > 2237bcbbb24aSTao Ma le16_to_cpu(rb->rf_records.rl_count)) 2238bcbbb24aSTao Ma ref_blocks++; 2239bcbbb24aSTao Ma } 2240bcbbb24aSTao Ma 2241bcbbb24aSTao Ma recs_add = 0; 2242bcbbb24aSTao Ma *credits += 1; 2243bcbbb24aSTao Ma brelse(prev_bh); 2244bcbbb24aSTao Ma prev_bh = ref_leaf_bh; 2245bcbbb24aSTao Ma get_bh(prev_bh); 2246bcbbb24aSTao Ma } 2247bcbbb24aSTao Ma 2248bcbbb24aSTao Ma rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 2249bcbbb24aSTao Ma 2250bcbbb24aSTao Ma mlog(0, "recs_add %d,cpos %llu, clusters %u, rec->r_cpos %llu," 2251bcbbb24aSTao Ma "rec->r_clusters %u, rec->r_refcount %u, index %d\n", 2252bcbbb24aSTao Ma recs_add, (unsigned long long)cpos, clusters, 2253bcbbb24aSTao Ma (unsigned long long)le64_to_cpu(rec.r_cpos), 2254bcbbb24aSTao Ma le32_to_cpu(rec.r_clusters), 2255bcbbb24aSTao Ma le32_to_cpu(rec.r_refcount), index); 2256bcbbb24aSTao Ma 2257bcbbb24aSTao Ma len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + 2258bcbbb24aSTao Ma le32_to_cpu(rec.r_clusters)) - cpos; 2259bcbbb24aSTao Ma /* 2260bcbbb24aSTao Ma * If the refcount rec already exist, cool. We just need 2261bcbbb24aSTao Ma * to check whether there is a split. Otherwise we just need 2262bcbbb24aSTao Ma * to increase the refcount. 2263bcbbb24aSTao Ma * If we will insert one, increases recs_add. 2264bcbbb24aSTao Ma * 2265bcbbb24aSTao Ma * We record all the records which will be inserted to the 2266bcbbb24aSTao Ma * same refcount block, so that we can tell exactly whether 2267bcbbb24aSTao Ma * we need a new refcount block or not. 2268bcbbb24aSTao Ma */ 2269bcbbb24aSTao Ma if (rec.r_refcount) { 2270bcbbb24aSTao Ma /* Check whether we need a split at the beginning. */ 2271bcbbb24aSTao Ma if (cpos == start_cpos && 2272bcbbb24aSTao Ma cpos != le64_to_cpu(rec.r_cpos)) 2273bcbbb24aSTao Ma recs_add++; 2274bcbbb24aSTao Ma 2275bcbbb24aSTao Ma /* Check whether we need a split in the end. */ 2276bcbbb24aSTao Ma if (cpos + clusters < le64_to_cpu(rec.r_cpos) + 2277bcbbb24aSTao Ma le32_to_cpu(rec.r_clusters)) 2278bcbbb24aSTao Ma recs_add++; 2279bcbbb24aSTao Ma } else 2280bcbbb24aSTao Ma recs_add++; 2281bcbbb24aSTao Ma 2282bcbbb24aSTao Ma brelse(ref_leaf_bh); 2283bcbbb24aSTao Ma ref_leaf_bh = NULL; 2284bcbbb24aSTao Ma clusters -= len; 2285bcbbb24aSTao Ma cpos += len; 2286bcbbb24aSTao Ma } 2287bcbbb24aSTao Ma 2288bcbbb24aSTao Ma if (prev_bh) { 2289bcbbb24aSTao Ma rb = (struct ocfs2_refcount_block *)prev_bh->b_data; 2290bcbbb24aSTao Ma 2291bcbbb24aSTao Ma if (le64_to_cpu(rb->rf_records.rl_used) + recs_add > 2292bcbbb24aSTao Ma le16_to_cpu(rb->rf_records.rl_count)) 2293bcbbb24aSTao Ma ref_blocks++; 2294bcbbb24aSTao Ma 2295bcbbb24aSTao Ma *credits += 1; 2296bcbbb24aSTao Ma } 2297bcbbb24aSTao Ma 2298bcbbb24aSTao Ma if (!ref_blocks) 2299bcbbb24aSTao Ma goto out; 2300bcbbb24aSTao Ma 2301bcbbb24aSTao Ma mlog(0, "we need ref_blocks %d\n", ref_blocks); 2302bcbbb24aSTao Ma *meta_add += ref_blocks; 2303bcbbb24aSTao Ma *credits += ref_blocks; 2304bcbbb24aSTao Ma 2305bcbbb24aSTao Ma /* 2306bcbbb24aSTao Ma * So we may need ref_blocks to insert into the tree. 2307bcbbb24aSTao Ma * That also means we need to change the b-tree and add that number 2308bcbbb24aSTao Ma * of records since we never merge them. 2309bcbbb24aSTao Ma * We need one more block for expansion since the new created leaf 2310bcbbb24aSTao Ma * block is also full and needs split. 2311bcbbb24aSTao Ma */ 2312bcbbb24aSTao Ma rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; 2313bcbbb24aSTao Ma if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) { 2314bcbbb24aSTao Ma struct ocfs2_extent_tree et; 2315bcbbb24aSTao Ma 2316bcbbb24aSTao Ma ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); 2317bcbbb24aSTao Ma *meta_add += ocfs2_extend_meta_needed(et.et_root_el); 2318bcbbb24aSTao Ma *credits += ocfs2_calc_extend_credits(sb, 2319bcbbb24aSTao Ma et.et_root_el, 2320bcbbb24aSTao Ma ref_blocks); 2321bcbbb24aSTao Ma } else { 2322bcbbb24aSTao Ma *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; 2323bcbbb24aSTao Ma *meta_add += 1; 2324bcbbb24aSTao Ma } 2325bcbbb24aSTao Ma 2326bcbbb24aSTao Ma out: 2327bcbbb24aSTao Ma brelse(ref_leaf_bh); 2328bcbbb24aSTao Ma brelse(prev_bh); 2329bcbbb24aSTao Ma return ret; 2330bcbbb24aSTao Ma } 2331bcbbb24aSTao Ma 2332bcbbb24aSTao Ma /* 2333bcbbb24aSTao Ma * For refcount tree, we will decrease some contiguous clusters 2334bcbbb24aSTao Ma * refcount count, so just go through it to see how many blocks 2335bcbbb24aSTao Ma * we gonna touch and whether we need to create new blocks. 2336bcbbb24aSTao Ma * 2337bcbbb24aSTao Ma * Normally the refcount blocks store these refcount should be 2338bcbbb24aSTao Ma * continguous also, so that we can get the number easily. 2339bcbbb24aSTao Ma * As for meta_ac, we will at most add split 2 refcount record and 2340bcbbb24aSTao Ma * 2 more refcount block, so just check it in a rough way. 2341bcbbb24aSTao Ma * 2342bcbbb24aSTao Ma * Caller must hold refcount tree lock. 2343bcbbb24aSTao Ma */ 2344bcbbb24aSTao Ma int ocfs2_prepare_refcount_change_for_del(struct inode *inode, 2345bcbbb24aSTao Ma struct buffer_head *di_bh, 2346bcbbb24aSTao Ma u64 phys_blkno, 2347bcbbb24aSTao Ma u32 clusters, 2348bcbbb24aSTao Ma int *credits, 2349bcbbb24aSTao Ma struct ocfs2_alloc_context **meta_ac) 2350bcbbb24aSTao Ma { 2351bcbbb24aSTao Ma int ret, ref_blocks = 0; 2352bcbbb24aSTao Ma struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2353bcbbb24aSTao Ma struct ocfs2_inode_info *oi = OCFS2_I(inode); 2354bcbbb24aSTao Ma struct buffer_head *ref_root_bh = NULL; 2355bcbbb24aSTao Ma struct ocfs2_refcount_tree *tree; 2356bcbbb24aSTao Ma u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno); 2357bcbbb24aSTao Ma 2358bcbbb24aSTao Ma if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { 2359bcbbb24aSTao Ma ocfs2_error(inode->i_sb, "Inode %lu want to use refcount " 2360bcbbb24aSTao Ma "tree, but the feature bit is not set in the " 2361bcbbb24aSTao Ma "super block.", inode->i_ino); 2362bcbbb24aSTao Ma ret = -EROFS; 2363bcbbb24aSTao Ma goto out; 2364bcbbb24aSTao Ma } 2365bcbbb24aSTao Ma 2366bcbbb24aSTao Ma BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); 2367bcbbb24aSTao Ma 2368bcbbb24aSTao Ma ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), 2369bcbbb24aSTao Ma le64_to_cpu(di->i_refcount_loc), &tree); 2370bcbbb24aSTao Ma if (ret) { 2371bcbbb24aSTao Ma mlog_errno(ret); 2372bcbbb24aSTao Ma goto out; 2373bcbbb24aSTao Ma } 2374bcbbb24aSTao Ma 2375bcbbb24aSTao Ma ret = ocfs2_read_refcount_block(&tree->rf_ci, 2376bcbbb24aSTao Ma le64_to_cpu(di->i_refcount_loc), 2377bcbbb24aSTao Ma &ref_root_bh); 2378bcbbb24aSTao Ma if (ret) { 2379bcbbb24aSTao Ma mlog_errno(ret); 2380bcbbb24aSTao Ma goto out; 2381bcbbb24aSTao Ma } 2382bcbbb24aSTao Ma 2383bcbbb24aSTao Ma ret = ocfs2_calc_refcount_meta_credits(inode->i_sb, 2384bcbbb24aSTao Ma &tree->rf_ci, 2385bcbbb24aSTao Ma ref_root_bh, 2386bcbbb24aSTao Ma start_cpos, clusters, 2387bcbbb24aSTao Ma &ref_blocks, credits); 2388bcbbb24aSTao Ma if (ret) { 2389bcbbb24aSTao Ma mlog_errno(ret); 2390bcbbb24aSTao Ma goto out; 2391bcbbb24aSTao Ma } 2392bcbbb24aSTao Ma 2393bcbbb24aSTao Ma mlog(0, "reserve new metadata %d, credits = %d\n", 2394bcbbb24aSTao Ma ref_blocks, *credits); 2395bcbbb24aSTao Ma 2396bcbbb24aSTao Ma if (ref_blocks) { 2397bcbbb24aSTao Ma ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 2398bcbbb24aSTao Ma ref_blocks, meta_ac); 2399bcbbb24aSTao Ma if (ret) 2400bcbbb24aSTao Ma mlog_errno(ret); 2401bcbbb24aSTao Ma } 2402bcbbb24aSTao Ma 2403bcbbb24aSTao Ma out: 2404bcbbb24aSTao Ma brelse(ref_root_bh); 2405bcbbb24aSTao Ma return ret; 2406bcbbb24aSTao Ma } 2407