1*ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*- 2*ccd979bdSMark Fasheh * vim: noexpandtab sw=8 ts=8 sts=0: 3*ccd979bdSMark Fasheh * 4*ccd979bdSMark Fasheh * suballoc.c 5*ccd979bdSMark Fasheh * 6*ccd979bdSMark Fasheh * metadata alloc and free 7*ccd979bdSMark Fasheh * Inspired by ext3 block groups. 8*ccd979bdSMark Fasheh * 9*ccd979bdSMark Fasheh * Copyright (C) 2002, 2004 Oracle. All rights reserved. 10*ccd979bdSMark Fasheh * 11*ccd979bdSMark Fasheh * This program is free software; you can redistribute it and/or 12*ccd979bdSMark Fasheh * modify it under the terms of the GNU General Public 13*ccd979bdSMark Fasheh * License as published by the Free Software Foundation; either 14*ccd979bdSMark Fasheh * version 2 of the License, or (at your option) any later version. 15*ccd979bdSMark Fasheh * 16*ccd979bdSMark Fasheh * This program is distributed in the hope that it will be useful, 17*ccd979bdSMark Fasheh * but WITHOUT ANY WARRANTY; without even the implied warranty of 18*ccd979bdSMark Fasheh * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19*ccd979bdSMark Fasheh * General Public License for more details. 20*ccd979bdSMark Fasheh * 21*ccd979bdSMark Fasheh * You should have received a copy of the GNU General Public 22*ccd979bdSMark Fasheh * License along with this program; if not, write to the 23*ccd979bdSMark Fasheh * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 24*ccd979bdSMark Fasheh * Boston, MA 021110-1307, USA. 25*ccd979bdSMark Fasheh */ 26*ccd979bdSMark Fasheh 27*ccd979bdSMark Fasheh #include <linux/fs.h> 28*ccd979bdSMark Fasheh #include <linux/types.h> 29*ccd979bdSMark Fasheh #include <linux/slab.h> 30*ccd979bdSMark Fasheh #include <linux/highmem.h> 31*ccd979bdSMark Fasheh 32*ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DISK_ALLOC 33*ccd979bdSMark Fasheh #include <cluster/masklog.h> 34*ccd979bdSMark Fasheh 35*ccd979bdSMark Fasheh #include "ocfs2.h" 36*ccd979bdSMark Fasheh 37*ccd979bdSMark Fasheh #include "alloc.h" 38*ccd979bdSMark Fasheh #include "dlmglue.h" 39*ccd979bdSMark Fasheh #include "inode.h" 40*ccd979bdSMark Fasheh #include "journal.h" 41*ccd979bdSMark Fasheh #include "localalloc.h" 42*ccd979bdSMark Fasheh #include "suballoc.h" 43*ccd979bdSMark Fasheh #include "super.h" 44*ccd979bdSMark Fasheh #include "sysfile.h" 45*ccd979bdSMark Fasheh #include "uptodate.h" 46*ccd979bdSMark Fasheh 47*ccd979bdSMark Fasheh #include "buffer_head_io.h" 48*ccd979bdSMark Fasheh 49*ccd979bdSMark Fasheh static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); 50*ccd979bdSMark Fasheh static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); 51*ccd979bdSMark Fasheh static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); 52*ccd979bdSMark Fasheh static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, 53*ccd979bdSMark Fasheh struct inode *alloc_inode, 54*ccd979bdSMark Fasheh struct buffer_head *bg_bh, 55*ccd979bdSMark Fasheh u64 group_blkno, 56*ccd979bdSMark Fasheh u16 my_chain, 57*ccd979bdSMark Fasheh struct ocfs2_chain_list *cl); 58*ccd979bdSMark Fasheh static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 59*ccd979bdSMark Fasheh struct inode *alloc_inode, 60*ccd979bdSMark Fasheh struct buffer_head *bh); 61*ccd979bdSMark Fasheh 62*ccd979bdSMark Fasheh static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, 63*ccd979bdSMark Fasheh struct ocfs2_alloc_context *ac); 64*ccd979bdSMark Fasheh 65*ccd979bdSMark Fasheh static int ocfs2_cluster_group_search(struct inode *inode, 66*ccd979bdSMark Fasheh struct buffer_head *group_bh, 67*ccd979bdSMark Fasheh u32 bits_wanted, u32 min_bits, 68*ccd979bdSMark Fasheh u16 *bit_off, u16 *bits_found); 69*ccd979bdSMark Fasheh static int ocfs2_block_group_search(struct inode *inode, 70*ccd979bdSMark Fasheh struct buffer_head *group_bh, 71*ccd979bdSMark Fasheh u32 bits_wanted, u32 min_bits, 72*ccd979bdSMark Fasheh u16 *bit_off, u16 *bits_found); 73*ccd979bdSMark Fasheh static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, 74*ccd979bdSMark Fasheh u32 bits_wanted, 75*ccd979bdSMark Fasheh u32 min_bits, 76*ccd979bdSMark Fasheh u16 *bit_off, 77*ccd979bdSMark Fasheh unsigned int *num_bits, 78*ccd979bdSMark Fasheh u64 *bg_blkno); 79*ccd979bdSMark Fasheh static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 80*ccd979bdSMark Fasheh struct ocfs2_alloc_context *ac, 81*ccd979bdSMark Fasheh u32 bits_wanted, 82*ccd979bdSMark Fasheh u32 min_bits, 83*ccd979bdSMark Fasheh u16 *bit_off, 84*ccd979bdSMark Fasheh unsigned int *num_bits, 85*ccd979bdSMark Fasheh u64 *bg_blkno); 86*ccd979bdSMark Fasheh static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, 87*ccd979bdSMark Fasheh int nr); 88*ccd979bdSMark Fasheh static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, 89*ccd979bdSMark Fasheh struct buffer_head *bg_bh, 90*ccd979bdSMark Fasheh unsigned int bits_wanted, 91*ccd979bdSMark Fasheh u16 *bit_off, 92*ccd979bdSMark Fasheh u16 *bits_found); 93*ccd979bdSMark Fasheh static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, 94*ccd979bdSMark Fasheh struct inode *alloc_inode, 95*ccd979bdSMark Fasheh struct ocfs2_group_desc *bg, 96*ccd979bdSMark Fasheh struct buffer_head *group_bh, 97*ccd979bdSMark Fasheh unsigned int bit_off, 98*ccd979bdSMark Fasheh unsigned int num_bits); 99*ccd979bdSMark Fasheh static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle, 100*ccd979bdSMark Fasheh struct inode *alloc_inode, 101*ccd979bdSMark Fasheh struct ocfs2_group_desc *bg, 102*ccd979bdSMark Fasheh struct buffer_head *group_bh, 103*ccd979bdSMark Fasheh unsigned int bit_off, 104*ccd979bdSMark Fasheh unsigned int num_bits); 105*ccd979bdSMark Fasheh 106*ccd979bdSMark Fasheh static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle, 107*ccd979bdSMark Fasheh struct inode *alloc_inode, 108*ccd979bdSMark Fasheh struct buffer_head *fe_bh, 109*ccd979bdSMark Fasheh struct buffer_head *bg_bh, 110*ccd979bdSMark Fasheh struct buffer_head *prev_bg_bh, 111*ccd979bdSMark Fasheh u16 chain); 112*ccd979bdSMark Fasheh static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, 113*ccd979bdSMark Fasheh u32 wanted); 114*ccd979bdSMark Fasheh static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, 115*ccd979bdSMark Fasheh struct inode *alloc_inode, 116*ccd979bdSMark Fasheh struct buffer_head *alloc_bh, 117*ccd979bdSMark Fasheh unsigned int start_bit, 118*ccd979bdSMark Fasheh u64 bg_blkno, 119*ccd979bdSMark Fasheh unsigned int count); 120*ccd979bdSMark Fasheh static inline u64 ocfs2_which_suballoc_group(u64 block, 121*ccd979bdSMark Fasheh unsigned int bit); 122*ccd979bdSMark Fasheh static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, 123*ccd979bdSMark Fasheh u64 bg_blkno, 124*ccd979bdSMark Fasheh u16 bg_bit_off); 125*ccd979bdSMark Fasheh static inline u64 ocfs2_which_cluster_group(struct inode *inode, 126*ccd979bdSMark Fasheh u32 cluster); 127*ccd979bdSMark Fasheh static inline void ocfs2_block_to_cluster_group(struct inode *inode, 128*ccd979bdSMark Fasheh u64 data_blkno, 129*ccd979bdSMark Fasheh u64 *bg_blkno, 130*ccd979bdSMark Fasheh u16 *bg_bit_off); 131*ccd979bdSMark Fasheh 132*ccd979bdSMark Fasheh void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) 133*ccd979bdSMark Fasheh { 134*ccd979bdSMark Fasheh if (ac->ac_inode) 135*ccd979bdSMark Fasheh iput(ac->ac_inode); 136*ccd979bdSMark Fasheh if (ac->ac_bh) 137*ccd979bdSMark Fasheh brelse(ac->ac_bh); 138*ccd979bdSMark Fasheh kfree(ac); 139*ccd979bdSMark Fasheh } 140*ccd979bdSMark Fasheh 141*ccd979bdSMark Fasheh static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) 142*ccd979bdSMark Fasheh { 143*ccd979bdSMark Fasheh return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); 144*ccd979bdSMark Fasheh } 145*ccd979bdSMark Fasheh 146*ccd979bdSMark Fasheh static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, 147*ccd979bdSMark Fasheh struct inode *alloc_inode, 148*ccd979bdSMark Fasheh struct buffer_head *bg_bh, 149*ccd979bdSMark Fasheh u64 group_blkno, 150*ccd979bdSMark Fasheh u16 my_chain, 151*ccd979bdSMark Fasheh struct ocfs2_chain_list *cl) 152*ccd979bdSMark Fasheh { 153*ccd979bdSMark Fasheh int status = 0; 154*ccd979bdSMark Fasheh struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 155*ccd979bdSMark Fasheh struct super_block * sb = alloc_inode->i_sb; 156*ccd979bdSMark Fasheh 157*ccd979bdSMark Fasheh mlog_entry_void(); 158*ccd979bdSMark Fasheh 159*ccd979bdSMark Fasheh if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) { 160*ccd979bdSMark Fasheh ocfs2_error(alloc_inode->i_sb, "group block (%"MLFu64") " 161*ccd979bdSMark Fasheh "!= b_blocknr (%llu)", group_blkno, 162*ccd979bdSMark Fasheh (unsigned long long) bg_bh->b_blocknr); 163*ccd979bdSMark Fasheh status = -EIO; 164*ccd979bdSMark Fasheh goto bail; 165*ccd979bdSMark Fasheh } 166*ccd979bdSMark Fasheh 167*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, 168*ccd979bdSMark Fasheh alloc_inode, 169*ccd979bdSMark Fasheh bg_bh, 170*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE); 171*ccd979bdSMark Fasheh if (status < 0) { 172*ccd979bdSMark Fasheh mlog_errno(status); 173*ccd979bdSMark Fasheh goto bail; 174*ccd979bdSMark Fasheh } 175*ccd979bdSMark Fasheh 176*ccd979bdSMark Fasheh memset(bg, 0, sb->s_blocksize); 177*ccd979bdSMark Fasheh strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE); 178*ccd979bdSMark Fasheh bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); 179*ccd979bdSMark Fasheh bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb)); 180*ccd979bdSMark Fasheh bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl)); 181*ccd979bdSMark Fasheh bg->bg_chain = cpu_to_le16(my_chain); 182*ccd979bdSMark Fasheh bg->bg_next_group = cl->cl_recs[my_chain].c_blkno; 183*ccd979bdSMark Fasheh bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno); 184*ccd979bdSMark Fasheh bg->bg_blkno = cpu_to_le64(group_blkno); 185*ccd979bdSMark Fasheh /* set the 1st bit in the bitmap to account for the descriptor block */ 186*ccd979bdSMark Fasheh ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap); 187*ccd979bdSMark Fasheh bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1); 188*ccd979bdSMark Fasheh 189*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, bg_bh); 190*ccd979bdSMark Fasheh if (status < 0) 191*ccd979bdSMark Fasheh mlog_errno(status); 192*ccd979bdSMark Fasheh 193*ccd979bdSMark Fasheh /* There is no need to zero out or otherwise initialize the 194*ccd979bdSMark Fasheh * other blocks in a group - All valid FS metadata in a block 195*ccd979bdSMark Fasheh * group stores the superblock fs_generation value at 196*ccd979bdSMark Fasheh * allocation time. */ 197*ccd979bdSMark Fasheh 198*ccd979bdSMark Fasheh bail: 199*ccd979bdSMark Fasheh mlog_exit(status); 200*ccd979bdSMark Fasheh return status; 201*ccd979bdSMark Fasheh } 202*ccd979bdSMark Fasheh 203*ccd979bdSMark Fasheh static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl) 204*ccd979bdSMark Fasheh { 205*ccd979bdSMark Fasheh u16 curr, best; 206*ccd979bdSMark Fasheh 207*ccd979bdSMark Fasheh best = curr = 0; 208*ccd979bdSMark Fasheh while (curr < le16_to_cpu(cl->cl_count)) { 209*ccd979bdSMark Fasheh if (le32_to_cpu(cl->cl_recs[best].c_total) > 210*ccd979bdSMark Fasheh le32_to_cpu(cl->cl_recs[curr].c_total)) 211*ccd979bdSMark Fasheh best = curr; 212*ccd979bdSMark Fasheh curr++; 213*ccd979bdSMark Fasheh } 214*ccd979bdSMark Fasheh return best; 215*ccd979bdSMark Fasheh } 216*ccd979bdSMark Fasheh 217*ccd979bdSMark Fasheh /* 218*ccd979bdSMark Fasheh * We expect the block group allocator to already be locked. 219*ccd979bdSMark Fasheh */ 220*ccd979bdSMark Fasheh static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 221*ccd979bdSMark Fasheh struct inode *alloc_inode, 222*ccd979bdSMark Fasheh struct buffer_head *bh) 223*ccd979bdSMark Fasheh { 224*ccd979bdSMark Fasheh int status, credits; 225*ccd979bdSMark Fasheh struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; 226*ccd979bdSMark Fasheh struct ocfs2_chain_list *cl; 227*ccd979bdSMark Fasheh struct ocfs2_alloc_context *ac = NULL; 228*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle = NULL; 229*ccd979bdSMark Fasheh u32 bit_off, num_bits; 230*ccd979bdSMark Fasheh u16 alloc_rec; 231*ccd979bdSMark Fasheh u64 bg_blkno; 232*ccd979bdSMark Fasheh struct buffer_head *bg_bh = NULL; 233*ccd979bdSMark Fasheh struct ocfs2_group_desc *bg; 234*ccd979bdSMark Fasheh 235*ccd979bdSMark Fasheh BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode)); 236*ccd979bdSMark Fasheh 237*ccd979bdSMark Fasheh mlog_entry_void(); 238*ccd979bdSMark Fasheh 239*ccd979bdSMark Fasheh handle = ocfs2_alloc_handle(osb); 240*ccd979bdSMark Fasheh if (!handle) { 241*ccd979bdSMark Fasheh status = -ENOMEM; 242*ccd979bdSMark Fasheh mlog_errno(status); 243*ccd979bdSMark Fasheh goto bail; 244*ccd979bdSMark Fasheh } 245*ccd979bdSMark Fasheh 246*ccd979bdSMark Fasheh cl = &fe->id2.i_chain; 247*ccd979bdSMark Fasheh status = ocfs2_reserve_clusters(osb, 248*ccd979bdSMark Fasheh handle, 249*ccd979bdSMark Fasheh le16_to_cpu(cl->cl_cpg), 250*ccd979bdSMark Fasheh &ac); 251*ccd979bdSMark Fasheh if (status < 0) { 252*ccd979bdSMark Fasheh if (status != -ENOSPC) 253*ccd979bdSMark Fasheh mlog_errno(status); 254*ccd979bdSMark Fasheh goto bail; 255*ccd979bdSMark Fasheh } 256*ccd979bdSMark Fasheh 257*ccd979bdSMark Fasheh credits = ocfs2_calc_group_alloc_credits(osb->sb, 258*ccd979bdSMark Fasheh le16_to_cpu(cl->cl_cpg)); 259*ccd979bdSMark Fasheh handle = ocfs2_start_trans(osb, handle, credits); 260*ccd979bdSMark Fasheh if (IS_ERR(handle)) { 261*ccd979bdSMark Fasheh status = PTR_ERR(handle); 262*ccd979bdSMark Fasheh handle = NULL; 263*ccd979bdSMark Fasheh mlog_errno(status); 264*ccd979bdSMark Fasheh goto bail; 265*ccd979bdSMark Fasheh } 266*ccd979bdSMark Fasheh 267*ccd979bdSMark Fasheh status = ocfs2_claim_clusters(osb, 268*ccd979bdSMark Fasheh handle, 269*ccd979bdSMark Fasheh ac, 270*ccd979bdSMark Fasheh le16_to_cpu(cl->cl_cpg), 271*ccd979bdSMark Fasheh &bit_off, 272*ccd979bdSMark Fasheh &num_bits); 273*ccd979bdSMark Fasheh if (status < 0) { 274*ccd979bdSMark Fasheh if (status != -ENOSPC) 275*ccd979bdSMark Fasheh mlog_errno(status); 276*ccd979bdSMark Fasheh goto bail; 277*ccd979bdSMark Fasheh } 278*ccd979bdSMark Fasheh 279*ccd979bdSMark Fasheh alloc_rec = ocfs2_find_smallest_chain(cl); 280*ccd979bdSMark Fasheh 281*ccd979bdSMark Fasheh /* setup the group */ 282*ccd979bdSMark Fasheh bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); 283*ccd979bdSMark Fasheh mlog(0, "new descriptor, record %u, at block %"MLFu64"\n", 284*ccd979bdSMark Fasheh alloc_rec, bg_blkno); 285*ccd979bdSMark Fasheh 286*ccd979bdSMark Fasheh bg_bh = sb_getblk(osb->sb, bg_blkno); 287*ccd979bdSMark Fasheh if (!bg_bh) { 288*ccd979bdSMark Fasheh status = -EIO; 289*ccd979bdSMark Fasheh mlog_errno(status); 290*ccd979bdSMark Fasheh goto bail; 291*ccd979bdSMark Fasheh } 292*ccd979bdSMark Fasheh ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh); 293*ccd979bdSMark Fasheh 294*ccd979bdSMark Fasheh status = ocfs2_block_group_fill(handle, 295*ccd979bdSMark Fasheh alloc_inode, 296*ccd979bdSMark Fasheh bg_bh, 297*ccd979bdSMark Fasheh bg_blkno, 298*ccd979bdSMark Fasheh alloc_rec, 299*ccd979bdSMark Fasheh cl); 300*ccd979bdSMark Fasheh if (status < 0) { 301*ccd979bdSMark Fasheh mlog_errno(status); 302*ccd979bdSMark Fasheh goto bail; 303*ccd979bdSMark Fasheh } 304*ccd979bdSMark Fasheh 305*ccd979bdSMark Fasheh bg = (struct ocfs2_group_desc *) bg_bh->b_data; 306*ccd979bdSMark Fasheh 307*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, alloc_inode, 308*ccd979bdSMark Fasheh bh, OCFS2_JOURNAL_ACCESS_WRITE); 309*ccd979bdSMark Fasheh if (status < 0) { 310*ccd979bdSMark Fasheh mlog_errno(status); 311*ccd979bdSMark Fasheh goto bail; 312*ccd979bdSMark Fasheh } 313*ccd979bdSMark Fasheh 314*ccd979bdSMark Fasheh le32_add_cpu(&cl->cl_recs[alloc_rec].c_free, 315*ccd979bdSMark Fasheh le16_to_cpu(bg->bg_free_bits_count)); 316*ccd979bdSMark Fasheh le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits)); 317*ccd979bdSMark Fasheh cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg_blkno); 318*ccd979bdSMark Fasheh if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) 319*ccd979bdSMark Fasheh le16_add_cpu(&cl->cl_next_free_rec, 1); 320*ccd979bdSMark Fasheh 321*ccd979bdSMark Fasheh le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) - 322*ccd979bdSMark Fasheh le16_to_cpu(bg->bg_free_bits_count)); 323*ccd979bdSMark Fasheh le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits)); 324*ccd979bdSMark Fasheh le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg)); 325*ccd979bdSMark Fasheh 326*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, bh); 327*ccd979bdSMark Fasheh if (status < 0) { 328*ccd979bdSMark Fasheh mlog_errno(status); 329*ccd979bdSMark Fasheh goto bail; 330*ccd979bdSMark Fasheh } 331*ccd979bdSMark Fasheh 332*ccd979bdSMark Fasheh spin_lock(&OCFS2_I(alloc_inode)->ip_lock); 333*ccd979bdSMark Fasheh OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 334*ccd979bdSMark Fasheh fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb, 335*ccd979bdSMark Fasheh le32_to_cpu(fe->i_clusters))); 336*ccd979bdSMark Fasheh spin_unlock(&OCFS2_I(alloc_inode)->ip_lock); 337*ccd979bdSMark Fasheh i_size_write(alloc_inode, le64_to_cpu(fe->i_size)); 338*ccd979bdSMark Fasheh alloc_inode->i_blocks = 339*ccd979bdSMark Fasheh ocfs2_align_bytes_to_sectors(i_size_read(alloc_inode)); 340*ccd979bdSMark Fasheh 341*ccd979bdSMark Fasheh status = 0; 342*ccd979bdSMark Fasheh bail: 343*ccd979bdSMark Fasheh if (handle) 344*ccd979bdSMark Fasheh ocfs2_commit_trans(handle); 345*ccd979bdSMark Fasheh 346*ccd979bdSMark Fasheh if (ac) 347*ccd979bdSMark Fasheh ocfs2_free_alloc_context(ac); 348*ccd979bdSMark Fasheh 349*ccd979bdSMark Fasheh if (bg_bh) 350*ccd979bdSMark Fasheh brelse(bg_bh); 351*ccd979bdSMark Fasheh 352*ccd979bdSMark Fasheh mlog_exit(status); 353*ccd979bdSMark Fasheh return status; 354*ccd979bdSMark Fasheh } 355*ccd979bdSMark Fasheh 356*ccd979bdSMark Fasheh static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, 357*ccd979bdSMark Fasheh struct ocfs2_alloc_context *ac) 358*ccd979bdSMark Fasheh { 359*ccd979bdSMark Fasheh int status; 360*ccd979bdSMark Fasheh u32 bits_wanted = ac->ac_bits_wanted; 361*ccd979bdSMark Fasheh struct inode *alloc_inode = ac->ac_inode; 362*ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 363*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle = ac->ac_handle; 364*ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 365*ccd979bdSMark Fasheh u32 free_bits; 366*ccd979bdSMark Fasheh 367*ccd979bdSMark Fasheh mlog_entry_void(); 368*ccd979bdSMark Fasheh 369*ccd979bdSMark Fasheh BUG_ON(handle->flags & OCFS2_HANDLE_STARTED); 370*ccd979bdSMark Fasheh 371*ccd979bdSMark Fasheh ocfs2_handle_add_inode(handle, alloc_inode); 372*ccd979bdSMark Fasheh status = ocfs2_meta_lock(alloc_inode, handle, &bh, 1); 373*ccd979bdSMark Fasheh if (status < 0) { 374*ccd979bdSMark Fasheh mlog_errno(status); 375*ccd979bdSMark Fasheh goto bail; 376*ccd979bdSMark Fasheh } 377*ccd979bdSMark Fasheh 378*ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) bh->b_data; 379*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(fe)) { 380*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 381*ccd979bdSMark Fasheh status = -EIO; 382*ccd979bdSMark Fasheh goto bail; 383*ccd979bdSMark Fasheh } 384*ccd979bdSMark Fasheh if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) { 385*ccd979bdSMark Fasheh ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator " 386*ccd979bdSMark Fasheh "# %"MLFu64, le64_to_cpu(fe->i_blkno)); 387*ccd979bdSMark Fasheh status = -EIO; 388*ccd979bdSMark Fasheh goto bail; 389*ccd979bdSMark Fasheh } 390*ccd979bdSMark Fasheh 391*ccd979bdSMark Fasheh free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) - 392*ccd979bdSMark Fasheh le32_to_cpu(fe->id1.bitmap1.i_used); 393*ccd979bdSMark Fasheh 394*ccd979bdSMark Fasheh if (bits_wanted > free_bits) { 395*ccd979bdSMark Fasheh /* cluster bitmap never grows */ 396*ccd979bdSMark Fasheh if (ocfs2_is_cluster_bitmap(alloc_inode)) { 397*ccd979bdSMark Fasheh mlog(0, "Disk Full: wanted=%u, free_bits=%u\n", 398*ccd979bdSMark Fasheh bits_wanted, free_bits); 399*ccd979bdSMark Fasheh status = -ENOSPC; 400*ccd979bdSMark Fasheh goto bail; 401*ccd979bdSMark Fasheh } 402*ccd979bdSMark Fasheh 403*ccd979bdSMark Fasheh status = ocfs2_block_group_alloc(osb, alloc_inode, bh); 404*ccd979bdSMark Fasheh if (status < 0) { 405*ccd979bdSMark Fasheh if (status != -ENOSPC) 406*ccd979bdSMark Fasheh mlog_errno(status); 407*ccd979bdSMark Fasheh goto bail; 408*ccd979bdSMark Fasheh } 409*ccd979bdSMark Fasheh atomic_inc(&osb->alloc_stats.bg_extends); 410*ccd979bdSMark Fasheh 411*ccd979bdSMark Fasheh /* You should never ask for this much metadata */ 412*ccd979bdSMark Fasheh BUG_ON(bits_wanted > 413*ccd979bdSMark Fasheh (le32_to_cpu(fe->id1.bitmap1.i_total) 414*ccd979bdSMark Fasheh - le32_to_cpu(fe->id1.bitmap1.i_used))); 415*ccd979bdSMark Fasheh } 416*ccd979bdSMark Fasheh 417*ccd979bdSMark Fasheh get_bh(bh); 418*ccd979bdSMark Fasheh ac->ac_bh = bh; 419*ccd979bdSMark Fasheh bail: 420*ccd979bdSMark Fasheh if (bh) 421*ccd979bdSMark Fasheh brelse(bh); 422*ccd979bdSMark Fasheh 423*ccd979bdSMark Fasheh mlog_exit(status); 424*ccd979bdSMark Fasheh return status; 425*ccd979bdSMark Fasheh } 426*ccd979bdSMark Fasheh 427*ccd979bdSMark Fasheh int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, 428*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 429*ccd979bdSMark Fasheh struct ocfs2_dinode *fe, 430*ccd979bdSMark Fasheh struct ocfs2_alloc_context **ac) 431*ccd979bdSMark Fasheh { 432*ccd979bdSMark Fasheh int status; 433*ccd979bdSMark Fasheh struct inode *alloc_inode = NULL; 434*ccd979bdSMark Fasheh 435*ccd979bdSMark Fasheh *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 436*ccd979bdSMark Fasheh if (!(*ac)) { 437*ccd979bdSMark Fasheh status = -ENOMEM; 438*ccd979bdSMark Fasheh mlog_errno(status); 439*ccd979bdSMark Fasheh goto bail; 440*ccd979bdSMark Fasheh } 441*ccd979bdSMark Fasheh 442*ccd979bdSMark Fasheh (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); 443*ccd979bdSMark Fasheh (*ac)->ac_handle = handle; 444*ccd979bdSMark Fasheh (*ac)->ac_which = OCFS2_AC_USE_META; 445*ccd979bdSMark Fasheh 446*ccd979bdSMark Fasheh #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS 447*ccd979bdSMark Fasheh alloc_inode = ocfs2_get_system_file_inode(osb, 448*ccd979bdSMark Fasheh EXTENT_ALLOC_SYSTEM_INODE, 449*ccd979bdSMark Fasheh 0); 450*ccd979bdSMark Fasheh #else 451*ccd979bdSMark Fasheh alloc_inode = ocfs2_get_system_file_inode(osb, 452*ccd979bdSMark Fasheh EXTENT_ALLOC_SYSTEM_INODE, 453*ccd979bdSMark Fasheh osb->slot_num); 454*ccd979bdSMark Fasheh #endif 455*ccd979bdSMark Fasheh if (!alloc_inode) { 456*ccd979bdSMark Fasheh status = -ENOMEM; 457*ccd979bdSMark Fasheh mlog_errno(status); 458*ccd979bdSMark Fasheh goto bail; 459*ccd979bdSMark Fasheh } 460*ccd979bdSMark Fasheh 461*ccd979bdSMark Fasheh (*ac)->ac_inode = igrab(alloc_inode); 462*ccd979bdSMark Fasheh (*ac)->ac_group_search = ocfs2_block_group_search; 463*ccd979bdSMark Fasheh 464*ccd979bdSMark Fasheh status = ocfs2_reserve_suballoc_bits(osb, (*ac)); 465*ccd979bdSMark Fasheh if (status < 0) { 466*ccd979bdSMark Fasheh if (status != -ENOSPC) 467*ccd979bdSMark Fasheh mlog_errno(status); 468*ccd979bdSMark Fasheh goto bail; 469*ccd979bdSMark Fasheh } 470*ccd979bdSMark Fasheh 471*ccd979bdSMark Fasheh status = 0; 472*ccd979bdSMark Fasheh bail: 473*ccd979bdSMark Fasheh if ((status < 0) && *ac) { 474*ccd979bdSMark Fasheh ocfs2_free_alloc_context(*ac); 475*ccd979bdSMark Fasheh *ac = NULL; 476*ccd979bdSMark Fasheh } 477*ccd979bdSMark Fasheh 478*ccd979bdSMark Fasheh if (alloc_inode) 479*ccd979bdSMark Fasheh iput(alloc_inode); 480*ccd979bdSMark Fasheh 481*ccd979bdSMark Fasheh mlog_exit(status); 482*ccd979bdSMark Fasheh return status; 483*ccd979bdSMark Fasheh } 484*ccd979bdSMark Fasheh 485*ccd979bdSMark Fasheh int ocfs2_reserve_new_inode(struct ocfs2_super *osb, 486*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 487*ccd979bdSMark Fasheh struct ocfs2_alloc_context **ac) 488*ccd979bdSMark Fasheh { 489*ccd979bdSMark Fasheh int status; 490*ccd979bdSMark Fasheh struct inode *alloc_inode = NULL; 491*ccd979bdSMark Fasheh 492*ccd979bdSMark Fasheh *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 493*ccd979bdSMark Fasheh if (!(*ac)) { 494*ccd979bdSMark Fasheh status = -ENOMEM; 495*ccd979bdSMark Fasheh mlog_errno(status); 496*ccd979bdSMark Fasheh goto bail; 497*ccd979bdSMark Fasheh } 498*ccd979bdSMark Fasheh 499*ccd979bdSMark Fasheh (*ac)->ac_bits_wanted = 1; 500*ccd979bdSMark Fasheh (*ac)->ac_handle = handle; 501*ccd979bdSMark Fasheh (*ac)->ac_which = OCFS2_AC_USE_INODE; 502*ccd979bdSMark Fasheh 503*ccd979bdSMark Fasheh alloc_inode = ocfs2_get_system_file_inode(osb, 504*ccd979bdSMark Fasheh INODE_ALLOC_SYSTEM_INODE, 505*ccd979bdSMark Fasheh osb->slot_num); 506*ccd979bdSMark Fasheh if (!alloc_inode) { 507*ccd979bdSMark Fasheh status = -ENOMEM; 508*ccd979bdSMark Fasheh mlog_errno(status); 509*ccd979bdSMark Fasheh goto bail; 510*ccd979bdSMark Fasheh } 511*ccd979bdSMark Fasheh 512*ccd979bdSMark Fasheh (*ac)->ac_inode = igrab(alloc_inode); 513*ccd979bdSMark Fasheh (*ac)->ac_group_search = ocfs2_block_group_search; 514*ccd979bdSMark Fasheh 515*ccd979bdSMark Fasheh status = ocfs2_reserve_suballoc_bits(osb, *ac); 516*ccd979bdSMark Fasheh if (status < 0) { 517*ccd979bdSMark Fasheh if (status != -ENOSPC) 518*ccd979bdSMark Fasheh mlog_errno(status); 519*ccd979bdSMark Fasheh goto bail; 520*ccd979bdSMark Fasheh } 521*ccd979bdSMark Fasheh 522*ccd979bdSMark Fasheh status = 0; 523*ccd979bdSMark Fasheh bail: 524*ccd979bdSMark Fasheh if ((status < 0) && *ac) { 525*ccd979bdSMark Fasheh ocfs2_free_alloc_context(*ac); 526*ccd979bdSMark Fasheh *ac = NULL; 527*ccd979bdSMark Fasheh } 528*ccd979bdSMark Fasheh 529*ccd979bdSMark Fasheh if (alloc_inode) 530*ccd979bdSMark Fasheh iput(alloc_inode); 531*ccd979bdSMark Fasheh 532*ccd979bdSMark Fasheh mlog_exit(status); 533*ccd979bdSMark Fasheh return status; 534*ccd979bdSMark Fasheh } 535*ccd979bdSMark Fasheh 536*ccd979bdSMark Fasheh /* local alloc code has to do the same thing, so rather than do this 537*ccd979bdSMark Fasheh * twice.. */ 538*ccd979bdSMark Fasheh int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, 539*ccd979bdSMark Fasheh struct ocfs2_alloc_context *ac) 540*ccd979bdSMark Fasheh { 541*ccd979bdSMark Fasheh int status; 542*ccd979bdSMark Fasheh 543*ccd979bdSMark Fasheh ac->ac_inode = ocfs2_get_system_file_inode(osb, 544*ccd979bdSMark Fasheh GLOBAL_BITMAP_SYSTEM_INODE, 545*ccd979bdSMark Fasheh OCFS2_INVALID_SLOT); 546*ccd979bdSMark Fasheh if (!ac->ac_inode) { 547*ccd979bdSMark Fasheh status = -EINVAL; 548*ccd979bdSMark Fasheh mlog(ML_ERROR, "Could not get bitmap inode!\n"); 549*ccd979bdSMark Fasheh goto bail; 550*ccd979bdSMark Fasheh } 551*ccd979bdSMark Fasheh ac->ac_which = OCFS2_AC_USE_MAIN; 552*ccd979bdSMark Fasheh ac->ac_group_search = ocfs2_cluster_group_search; 553*ccd979bdSMark Fasheh 554*ccd979bdSMark Fasheh status = ocfs2_reserve_suballoc_bits(osb, ac); 555*ccd979bdSMark Fasheh if (status < 0 && status != -ENOSPC) 556*ccd979bdSMark Fasheh mlog_errno(status); 557*ccd979bdSMark Fasheh bail: 558*ccd979bdSMark Fasheh return status; 559*ccd979bdSMark Fasheh } 560*ccd979bdSMark Fasheh 561*ccd979bdSMark Fasheh /* Callers don't need to care which bitmap (local alloc or main) to 562*ccd979bdSMark Fasheh * use so we figure it out for them, but unfortunately this clutters 563*ccd979bdSMark Fasheh * things a bit. */ 564*ccd979bdSMark Fasheh int ocfs2_reserve_clusters(struct ocfs2_super *osb, 565*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 566*ccd979bdSMark Fasheh u32 bits_wanted, 567*ccd979bdSMark Fasheh struct ocfs2_alloc_context **ac) 568*ccd979bdSMark Fasheh { 569*ccd979bdSMark Fasheh int status; 570*ccd979bdSMark Fasheh 571*ccd979bdSMark Fasheh mlog_entry_void(); 572*ccd979bdSMark Fasheh 573*ccd979bdSMark Fasheh BUG_ON(!handle); 574*ccd979bdSMark Fasheh 575*ccd979bdSMark Fasheh *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 576*ccd979bdSMark Fasheh if (!(*ac)) { 577*ccd979bdSMark Fasheh status = -ENOMEM; 578*ccd979bdSMark Fasheh mlog_errno(status); 579*ccd979bdSMark Fasheh goto bail; 580*ccd979bdSMark Fasheh } 581*ccd979bdSMark Fasheh 582*ccd979bdSMark Fasheh (*ac)->ac_bits_wanted = bits_wanted; 583*ccd979bdSMark Fasheh (*ac)->ac_handle = handle; 584*ccd979bdSMark Fasheh 585*ccd979bdSMark Fasheh status = -ENOSPC; 586*ccd979bdSMark Fasheh if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { 587*ccd979bdSMark Fasheh status = ocfs2_reserve_local_alloc_bits(osb, 588*ccd979bdSMark Fasheh handle, 589*ccd979bdSMark Fasheh bits_wanted, 590*ccd979bdSMark Fasheh *ac); 591*ccd979bdSMark Fasheh if ((status < 0) && (status != -ENOSPC)) { 592*ccd979bdSMark Fasheh mlog_errno(status); 593*ccd979bdSMark Fasheh goto bail; 594*ccd979bdSMark Fasheh } else if (status == -ENOSPC) { 595*ccd979bdSMark Fasheh /* reserve_local_bits will return enospc with 596*ccd979bdSMark Fasheh * the local alloc inode still locked, so we 597*ccd979bdSMark Fasheh * can change this safely here. */ 598*ccd979bdSMark Fasheh mlog(0, "Disabling local alloc\n"); 599*ccd979bdSMark Fasheh /* We set to OCFS2_LA_DISABLED so that umount 600*ccd979bdSMark Fasheh * can clean up what's left of the local 601*ccd979bdSMark Fasheh * allocation */ 602*ccd979bdSMark Fasheh osb->local_alloc_state = OCFS2_LA_DISABLED; 603*ccd979bdSMark Fasheh } 604*ccd979bdSMark Fasheh } 605*ccd979bdSMark Fasheh 606*ccd979bdSMark Fasheh if (status == -ENOSPC) { 607*ccd979bdSMark Fasheh status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 608*ccd979bdSMark Fasheh if (status < 0) { 609*ccd979bdSMark Fasheh if (status != -ENOSPC) 610*ccd979bdSMark Fasheh mlog_errno(status); 611*ccd979bdSMark Fasheh goto bail; 612*ccd979bdSMark Fasheh } 613*ccd979bdSMark Fasheh } 614*ccd979bdSMark Fasheh 615*ccd979bdSMark Fasheh status = 0; 616*ccd979bdSMark Fasheh bail: 617*ccd979bdSMark Fasheh if ((status < 0) && *ac) { 618*ccd979bdSMark Fasheh ocfs2_free_alloc_context(*ac); 619*ccd979bdSMark Fasheh *ac = NULL; 620*ccd979bdSMark Fasheh } 621*ccd979bdSMark Fasheh 622*ccd979bdSMark Fasheh mlog_exit(status); 623*ccd979bdSMark Fasheh return status; 624*ccd979bdSMark Fasheh } 625*ccd979bdSMark Fasheh 626*ccd979bdSMark Fasheh /* 627*ccd979bdSMark Fasheh * More or less lifted from ext3. I'll leave their description below: 628*ccd979bdSMark Fasheh * 629*ccd979bdSMark Fasheh * "For ext3 allocations, we must not reuse any blocks which are 630*ccd979bdSMark Fasheh * allocated in the bitmap buffer's "last committed data" copy. This 631*ccd979bdSMark Fasheh * prevents deletes from freeing up the page for reuse until we have 632*ccd979bdSMark Fasheh * committed the delete transaction. 633*ccd979bdSMark Fasheh * 634*ccd979bdSMark Fasheh * If we didn't do this, then deleting something and reallocating it as 635*ccd979bdSMark Fasheh * data would allow the old block to be overwritten before the 636*ccd979bdSMark Fasheh * transaction committed (because we force data to disk before commit). 637*ccd979bdSMark Fasheh * This would lead to corruption if we crashed between overwriting the 638*ccd979bdSMark Fasheh * data and committing the delete. 639*ccd979bdSMark Fasheh * 640*ccd979bdSMark Fasheh * @@@ We may want to make this allocation behaviour conditional on 641*ccd979bdSMark Fasheh * data-writes at some point, and disable it for metadata allocations or 642*ccd979bdSMark Fasheh * sync-data inodes." 643*ccd979bdSMark Fasheh * 644*ccd979bdSMark Fasheh * Note: OCFS2 already does this differently for metadata vs data 645*ccd979bdSMark Fasheh * allocations, as those bitmaps are seperate and undo access is never 646*ccd979bdSMark Fasheh * called on a metadata group descriptor. 647*ccd979bdSMark Fasheh */ 648*ccd979bdSMark Fasheh static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, 649*ccd979bdSMark Fasheh int nr) 650*ccd979bdSMark Fasheh { 651*ccd979bdSMark Fasheh struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 652*ccd979bdSMark Fasheh 653*ccd979bdSMark Fasheh if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) 654*ccd979bdSMark Fasheh return 0; 655*ccd979bdSMark Fasheh if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data) 656*ccd979bdSMark Fasheh return 1; 657*ccd979bdSMark Fasheh 658*ccd979bdSMark Fasheh bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data; 659*ccd979bdSMark Fasheh return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); 660*ccd979bdSMark Fasheh } 661*ccd979bdSMark Fasheh 662*ccd979bdSMark Fasheh static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, 663*ccd979bdSMark Fasheh struct buffer_head *bg_bh, 664*ccd979bdSMark Fasheh unsigned int bits_wanted, 665*ccd979bdSMark Fasheh u16 *bit_off, 666*ccd979bdSMark Fasheh u16 *bits_found) 667*ccd979bdSMark Fasheh { 668*ccd979bdSMark Fasheh void *bitmap; 669*ccd979bdSMark Fasheh u16 best_offset, best_size; 670*ccd979bdSMark Fasheh int offset, start, found, status = 0; 671*ccd979bdSMark Fasheh struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 672*ccd979bdSMark Fasheh 673*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 674*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg); 675*ccd979bdSMark Fasheh return -EIO; 676*ccd979bdSMark Fasheh } 677*ccd979bdSMark Fasheh 678*ccd979bdSMark Fasheh found = start = best_offset = best_size = 0; 679*ccd979bdSMark Fasheh bitmap = bg->bg_bitmap; 680*ccd979bdSMark Fasheh 681*ccd979bdSMark Fasheh while((offset = ocfs2_find_next_zero_bit(bitmap, 682*ccd979bdSMark Fasheh le16_to_cpu(bg->bg_bits), 683*ccd979bdSMark Fasheh start)) != -1) { 684*ccd979bdSMark Fasheh if (offset == le16_to_cpu(bg->bg_bits)) 685*ccd979bdSMark Fasheh break; 686*ccd979bdSMark Fasheh 687*ccd979bdSMark Fasheh if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) { 688*ccd979bdSMark Fasheh /* We found a zero, but we can't use it as it 689*ccd979bdSMark Fasheh * hasn't been put to disk yet! */ 690*ccd979bdSMark Fasheh found = 0; 691*ccd979bdSMark Fasheh start = offset + 1; 692*ccd979bdSMark Fasheh } else if (offset == start) { 693*ccd979bdSMark Fasheh /* we found a zero */ 694*ccd979bdSMark Fasheh found++; 695*ccd979bdSMark Fasheh /* move start to the next bit to test */ 696*ccd979bdSMark Fasheh start++; 697*ccd979bdSMark Fasheh } else { 698*ccd979bdSMark Fasheh /* got a zero after some ones */ 699*ccd979bdSMark Fasheh found = 1; 700*ccd979bdSMark Fasheh start = offset + 1; 701*ccd979bdSMark Fasheh } 702*ccd979bdSMark Fasheh if (found > best_size) { 703*ccd979bdSMark Fasheh best_size = found; 704*ccd979bdSMark Fasheh best_offset = start - found; 705*ccd979bdSMark Fasheh } 706*ccd979bdSMark Fasheh /* we got everything we needed */ 707*ccd979bdSMark Fasheh if (found == bits_wanted) { 708*ccd979bdSMark Fasheh /* mlog(0, "Found it all!\n"); */ 709*ccd979bdSMark Fasheh break; 710*ccd979bdSMark Fasheh } 711*ccd979bdSMark Fasheh } 712*ccd979bdSMark Fasheh 713*ccd979bdSMark Fasheh /* XXX: I think the first clause is equivalent to the second 714*ccd979bdSMark Fasheh * - jlbec */ 715*ccd979bdSMark Fasheh if (found == bits_wanted) { 716*ccd979bdSMark Fasheh *bit_off = start - found; 717*ccd979bdSMark Fasheh *bits_found = found; 718*ccd979bdSMark Fasheh } else if (best_size) { 719*ccd979bdSMark Fasheh *bit_off = best_offset; 720*ccd979bdSMark Fasheh *bits_found = best_size; 721*ccd979bdSMark Fasheh } else { 722*ccd979bdSMark Fasheh status = -ENOSPC; 723*ccd979bdSMark Fasheh /* No error log here -- see the comment above 724*ccd979bdSMark Fasheh * ocfs2_test_bg_bit_allocatable */ 725*ccd979bdSMark Fasheh } 726*ccd979bdSMark Fasheh 727*ccd979bdSMark Fasheh return status; 728*ccd979bdSMark Fasheh } 729*ccd979bdSMark Fasheh 730*ccd979bdSMark Fasheh static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, 731*ccd979bdSMark Fasheh struct inode *alloc_inode, 732*ccd979bdSMark Fasheh struct ocfs2_group_desc *bg, 733*ccd979bdSMark Fasheh struct buffer_head *group_bh, 734*ccd979bdSMark Fasheh unsigned int bit_off, 735*ccd979bdSMark Fasheh unsigned int num_bits) 736*ccd979bdSMark Fasheh { 737*ccd979bdSMark Fasheh int status; 738*ccd979bdSMark Fasheh void *bitmap = bg->bg_bitmap; 739*ccd979bdSMark Fasheh int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; 740*ccd979bdSMark Fasheh 741*ccd979bdSMark Fasheh mlog_entry_void(); 742*ccd979bdSMark Fasheh 743*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 744*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 745*ccd979bdSMark Fasheh status = -EIO; 746*ccd979bdSMark Fasheh goto bail; 747*ccd979bdSMark Fasheh } 748*ccd979bdSMark Fasheh BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); 749*ccd979bdSMark Fasheh 750*ccd979bdSMark Fasheh mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, 751*ccd979bdSMark Fasheh num_bits); 752*ccd979bdSMark Fasheh 753*ccd979bdSMark Fasheh if (ocfs2_is_cluster_bitmap(alloc_inode)) 754*ccd979bdSMark Fasheh journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 755*ccd979bdSMark Fasheh 756*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, 757*ccd979bdSMark Fasheh alloc_inode, 758*ccd979bdSMark Fasheh group_bh, 759*ccd979bdSMark Fasheh journal_type); 760*ccd979bdSMark Fasheh if (status < 0) { 761*ccd979bdSMark Fasheh mlog_errno(status); 762*ccd979bdSMark Fasheh goto bail; 763*ccd979bdSMark Fasheh } 764*ccd979bdSMark Fasheh 765*ccd979bdSMark Fasheh le16_add_cpu(&bg->bg_free_bits_count, -num_bits); 766*ccd979bdSMark Fasheh 767*ccd979bdSMark Fasheh while(num_bits--) 768*ccd979bdSMark Fasheh ocfs2_set_bit(bit_off++, bitmap); 769*ccd979bdSMark Fasheh 770*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, 771*ccd979bdSMark Fasheh group_bh); 772*ccd979bdSMark Fasheh if (status < 0) { 773*ccd979bdSMark Fasheh mlog_errno(status); 774*ccd979bdSMark Fasheh goto bail; 775*ccd979bdSMark Fasheh } 776*ccd979bdSMark Fasheh 777*ccd979bdSMark Fasheh bail: 778*ccd979bdSMark Fasheh mlog_exit(status); 779*ccd979bdSMark Fasheh return status; 780*ccd979bdSMark Fasheh } 781*ccd979bdSMark Fasheh 782*ccd979bdSMark Fasheh /* find the one with the most empty bits */ 783*ccd979bdSMark Fasheh static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl) 784*ccd979bdSMark Fasheh { 785*ccd979bdSMark Fasheh u16 curr, best; 786*ccd979bdSMark Fasheh 787*ccd979bdSMark Fasheh BUG_ON(!cl->cl_next_free_rec); 788*ccd979bdSMark Fasheh 789*ccd979bdSMark Fasheh best = curr = 0; 790*ccd979bdSMark Fasheh while (curr < le16_to_cpu(cl->cl_next_free_rec)) { 791*ccd979bdSMark Fasheh if (le32_to_cpu(cl->cl_recs[curr].c_free) > 792*ccd979bdSMark Fasheh le32_to_cpu(cl->cl_recs[best].c_free)) 793*ccd979bdSMark Fasheh best = curr; 794*ccd979bdSMark Fasheh curr++; 795*ccd979bdSMark Fasheh } 796*ccd979bdSMark Fasheh 797*ccd979bdSMark Fasheh BUG_ON(best >= le16_to_cpu(cl->cl_next_free_rec)); 798*ccd979bdSMark Fasheh return best; 799*ccd979bdSMark Fasheh } 800*ccd979bdSMark Fasheh 801*ccd979bdSMark Fasheh static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle, 802*ccd979bdSMark Fasheh struct inode *alloc_inode, 803*ccd979bdSMark Fasheh struct buffer_head *fe_bh, 804*ccd979bdSMark Fasheh struct buffer_head *bg_bh, 805*ccd979bdSMark Fasheh struct buffer_head *prev_bg_bh, 806*ccd979bdSMark Fasheh u16 chain) 807*ccd979bdSMark Fasheh { 808*ccd979bdSMark Fasheh int status; 809*ccd979bdSMark Fasheh /* there is a really tiny chance the journal calls could fail, 810*ccd979bdSMark Fasheh * but we wouldn't want inconsistent blocks in *any* case. */ 811*ccd979bdSMark Fasheh u64 fe_ptr, bg_ptr, prev_bg_ptr; 812*ccd979bdSMark Fasheh struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; 813*ccd979bdSMark Fasheh struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 814*ccd979bdSMark Fasheh struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; 815*ccd979bdSMark Fasheh 816*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(fe)) { 817*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 818*ccd979bdSMark Fasheh status = -EIO; 819*ccd979bdSMark Fasheh goto out; 820*ccd979bdSMark Fasheh } 821*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 822*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 823*ccd979bdSMark Fasheh status = -EIO; 824*ccd979bdSMark Fasheh goto out; 825*ccd979bdSMark Fasheh } 826*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) { 827*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg); 828*ccd979bdSMark Fasheh status = -EIO; 829*ccd979bdSMark Fasheh goto out; 830*ccd979bdSMark Fasheh } 831*ccd979bdSMark Fasheh 832*ccd979bdSMark Fasheh mlog(0, "In suballoc %"MLFu64", chain %u, move group %"MLFu64" to " 833*ccd979bdSMark Fasheh "top, prev = %"MLFu64"\n", 834*ccd979bdSMark Fasheh fe->i_blkno, chain, bg->bg_blkno, prev_bg->bg_blkno); 835*ccd979bdSMark Fasheh 836*ccd979bdSMark Fasheh fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); 837*ccd979bdSMark Fasheh bg_ptr = le64_to_cpu(bg->bg_next_group); 838*ccd979bdSMark Fasheh prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); 839*ccd979bdSMark Fasheh 840*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh, 841*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 842*ccd979bdSMark Fasheh if (status < 0) { 843*ccd979bdSMark Fasheh mlog_errno(status); 844*ccd979bdSMark Fasheh goto out_rollback; 845*ccd979bdSMark Fasheh } 846*ccd979bdSMark Fasheh 847*ccd979bdSMark Fasheh prev_bg->bg_next_group = bg->bg_next_group; 848*ccd979bdSMark Fasheh 849*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, prev_bg_bh); 850*ccd979bdSMark Fasheh if (status < 0) { 851*ccd979bdSMark Fasheh mlog_errno(status); 852*ccd979bdSMark Fasheh goto out_rollback; 853*ccd979bdSMark Fasheh } 854*ccd979bdSMark Fasheh 855*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, alloc_inode, bg_bh, 856*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 857*ccd979bdSMark Fasheh if (status < 0) { 858*ccd979bdSMark Fasheh mlog_errno(status); 859*ccd979bdSMark Fasheh goto out_rollback; 860*ccd979bdSMark Fasheh } 861*ccd979bdSMark Fasheh 862*ccd979bdSMark Fasheh bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; 863*ccd979bdSMark Fasheh 864*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, bg_bh); 865*ccd979bdSMark Fasheh if (status < 0) { 866*ccd979bdSMark Fasheh mlog_errno(status); 867*ccd979bdSMark Fasheh goto out_rollback; 868*ccd979bdSMark Fasheh } 869*ccd979bdSMark Fasheh 870*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, alloc_inode, fe_bh, 871*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 872*ccd979bdSMark Fasheh if (status < 0) { 873*ccd979bdSMark Fasheh mlog_errno(status); 874*ccd979bdSMark Fasheh goto out_rollback; 875*ccd979bdSMark Fasheh } 876*ccd979bdSMark Fasheh 877*ccd979bdSMark Fasheh fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; 878*ccd979bdSMark Fasheh 879*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, fe_bh); 880*ccd979bdSMark Fasheh if (status < 0) { 881*ccd979bdSMark Fasheh mlog_errno(status); 882*ccd979bdSMark Fasheh goto out_rollback; 883*ccd979bdSMark Fasheh } 884*ccd979bdSMark Fasheh 885*ccd979bdSMark Fasheh status = 0; 886*ccd979bdSMark Fasheh out_rollback: 887*ccd979bdSMark Fasheh if (status < 0) { 888*ccd979bdSMark Fasheh fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); 889*ccd979bdSMark Fasheh bg->bg_next_group = cpu_to_le64(bg_ptr); 890*ccd979bdSMark Fasheh prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); 891*ccd979bdSMark Fasheh } 892*ccd979bdSMark Fasheh out: 893*ccd979bdSMark Fasheh mlog_exit(status); 894*ccd979bdSMark Fasheh return status; 895*ccd979bdSMark Fasheh } 896*ccd979bdSMark Fasheh 897*ccd979bdSMark Fasheh static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, 898*ccd979bdSMark Fasheh u32 wanted) 899*ccd979bdSMark Fasheh { 900*ccd979bdSMark Fasheh return le16_to_cpu(bg->bg_free_bits_count) > wanted; 901*ccd979bdSMark Fasheh } 902*ccd979bdSMark Fasheh 903*ccd979bdSMark Fasheh /* return 0 on success, -ENOSPC to keep searching and any other < 0 904*ccd979bdSMark Fasheh * value on error. */ 905*ccd979bdSMark Fasheh static int ocfs2_cluster_group_search(struct inode *inode, 906*ccd979bdSMark Fasheh struct buffer_head *group_bh, 907*ccd979bdSMark Fasheh u32 bits_wanted, u32 min_bits, 908*ccd979bdSMark Fasheh u16 *bit_off, u16 *bits_found) 909*ccd979bdSMark Fasheh { 910*ccd979bdSMark Fasheh int search = -ENOSPC; 911*ccd979bdSMark Fasheh int ret; 912*ccd979bdSMark Fasheh struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; 913*ccd979bdSMark Fasheh u16 tmp_off, tmp_found; 914*ccd979bdSMark Fasheh 915*ccd979bdSMark Fasheh BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 916*ccd979bdSMark Fasheh 917*ccd979bdSMark Fasheh if (bg->bg_free_bits_count) { 918*ccd979bdSMark Fasheh ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 919*ccd979bdSMark Fasheh group_bh, bits_wanted, 920*ccd979bdSMark Fasheh &tmp_off, &tmp_found); 921*ccd979bdSMark Fasheh if (ret) 922*ccd979bdSMark Fasheh return ret; 923*ccd979bdSMark Fasheh 924*ccd979bdSMark Fasheh /* ocfs2_block_group_find_clear_bits() might 925*ccd979bdSMark Fasheh * return success, but we still want to return 926*ccd979bdSMark Fasheh * -ENOSPC unless it found the minimum number 927*ccd979bdSMark Fasheh * of bits. */ 928*ccd979bdSMark Fasheh if (min_bits <= tmp_found) { 929*ccd979bdSMark Fasheh *bit_off = tmp_off; 930*ccd979bdSMark Fasheh *bits_found = tmp_found; 931*ccd979bdSMark Fasheh search = 0; /* success */ 932*ccd979bdSMark Fasheh } 933*ccd979bdSMark Fasheh } 934*ccd979bdSMark Fasheh 935*ccd979bdSMark Fasheh return search; 936*ccd979bdSMark Fasheh } 937*ccd979bdSMark Fasheh 938*ccd979bdSMark Fasheh static int ocfs2_block_group_search(struct inode *inode, 939*ccd979bdSMark Fasheh struct buffer_head *group_bh, 940*ccd979bdSMark Fasheh u32 bits_wanted, u32 min_bits, 941*ccd979bdSMark Fasheh u16 *bit_off, u16 *bits_found) 942*ccd979bdSMark Fasheh { 943*ccd979bdSMark Fasheh int ret = -ENOSPC; 944*ccd979bdSMark Fasheh struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; 945*ccd979bdSMark Fasheh 946*ccd979bdSMark Fasheh BUG_ON(min_bits != 1); 947*ccd979bdSMark Fasheh BUG_ON(ocfs2_is_cluster_bitmap(inode)); 948*ccd979bdSMark Fasheh 949*ccd979bdSMark Fasheh if (bg->bg_free_bits_count) 950*ccd979bdSMark Fasheh ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 951*ccd979bdSMark Fasheh group_bh, bits_wanted, 952*ccd979bdSMark Fasheh bit_off, bits_found); 953*ccd979bdSMark Fasheh 954*ccd979bdSMark Fasheh return ret; 955*ccd979bdSMark Fasheh } 956*ccd979bdSMark Fasheh 957*ccd979bdSMark Fasheh static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, 958*ccd979bdSMark Fasheh u32 bits_wanted, 959*ccd979bdSMark Fasheh u32 min_bits, 960*ccd979bdSMark Fasheh u16 *bit_off, 961*ccd979bdSMark Fasheh unsigned int *num_bits, 962*ccd979bdSMark Fasheh u64 *bg_blkno) 963*ccd979bdSMark Fasheh { 964*ccd979bdSMark Fasheh int status; 965*ccd979bdSMark Fasheh u16 chain, tmp_bits; 966*ccd979bdSMark Fasheh u32 tmp_used; 967*ccd979bdSMark Fasheh u64 next_group; 968*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle = ac->ac_handle; 969*ccd979bdSMark Fasheh struct inode *alloc_inode = ac->ac_inode; 970*ccd979bdSMark Fasheh struct buffer_head *group_bh = NULL; 971*ccd979bdSMark Fasheh struct buffer_head *prev_group_bh = NULL; 972*ccd979bdSMark Fasheh struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; 973*ccd979bdSMark Fasheh struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; 974*ccd979bdSMark Fasheh struct ocfs2_group_desc *bg; 975*ccd979bdSMark Fasheh 976*ccd979bdSMark Fasheh chain = ac->ac_chain; 977*ccd979bdSMark Fasheh mlog(0, "trying to alloc %u bits from chain %u, inode %"MLFu64"\n", 978*ccd979bdSMark Fasheh bits_wanted, chain, OCFS2_I(alloc_inode)->ip_blkno); 979*ccd979bdSMark Fasheh 980*ccd979bdSMark Fasheh status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), 981*ccd979bdSMark Fasheh le64_to_cpu(cl->cl_recs[chain].c_blkno), 982*ccd979bdSMark Fasheh &group_bh, OCFS2_BH_CACHED, alloc_inode); 983*ccd979bdSMark Fasheh if (status < 0) { 984*ccd979bdSMark Fasheh mlog_errno(status); 985*ccd979bdSMark Fasheh goto bail; 986*ccd979bdSMark Fasheh } 987*ccd979bdSMark Fasheh bg = (struct ocfs2_group_desc *) group_bh->b_data; 988*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 989*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 990*ccd979bdSMark Fasheh status = -EIO; 991*ccd979bdSMark Fasheh goto bail; 992*ccd979bdSMark Fasheh } 993*ccd979bdSMark Fasheh 994*ccd979bdSMark Fasheh status = -ENOSPC; 995*ccd979bdSMark Fasheh /* for now, the chain search is a bit simplistic. We just use 996*ccd979bdSMark Fasheh * the 1st group with any empty bits. */ 997*ccd979bdSMark Fasheh while ((status = ac->ac_group_search(alloc_inode, group_bh, 998*ccd979bdSMark Fasheh bits_wanted, min_bits, bit_off, 999*ccd979bdSMark Fasheh &tmp_bits)) == -ENOSPC) { 1000*ccd979bdSMark Fasheh if (!bg->bg_next_group) 1001*ccd979bdSMark Fasheh break; 1002*ccd979bdSMark Fasheh 1003*ccd979bdSMark Fasheh if (prev_group_bh) { 1004*ccd979bdSMark Fasheh brelse(prev_group_bh); 1005*ccd979bdSMark Fasheh prev_group_bh = NULL; 1006*ccd979bdSMark Fasheh } 1007*ccd979bdSMark Fasheh next_group = le64_to_cpu(bg->bg_next_group); 1008*ccd979bdSMark Fasheh prev_group_bh = group_bh; 1009*ccd979bdSMark Fasheh group_bh = NULL; 1010*ccd979bdSMark Fasheh status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), 1011*ccd979bdSMark Fasheh next_group, &group_bh, 1012*ccd979bdSMark Fasheh OCFS2_BH_CACHED, alloc_inode); 1013*ccd979bdSMark Fasheh if (status < 0) { 1014*ccd979bdSMark Fasheh mlog_errno(status); 1015*ccd979bdSMark Fasheh goto bail; 1016*ccd979bdSMark Fasheh } 1017*ccd979bdSMark Fasheh bg = (struct ocfs2_group_desc *) group_bh->b_data; 1018*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 1019*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 1020*ccd979bdSMark Fasheh status = -EIO; 1021*ccd979bdSMark Fasheh goto bail; 1022*ccd979bdSMark Fasheh } 1023*ccd979bdSMark Fasheh } 1024*ccd979bdSMark Fasheh if (status < 0) { 1025*ccd979bdSMark Fasheh if (status != -ENOSPC) 1026*ccd979bdSMark Fasheh mlog_errno(status); 1027*ccd979bdSMark Fasheh goto bail; 1028*ccd979bdSMark Fasheh } 1029*ccd979bdSMark Fasheh 1030*ccd979bdSMark Fasheh mlog(0, "alloc succeeds: we give %u bits from block group %"MLFu64"\n", 1031*ccd979bdSMark Fasheh tmp_bits, bg->bg_blkno); 1032*ccd979bdSMark Fasheh 1033*ccd979bdSMark Fasheh *num_bits = tmp_bits; 1034*ccd979bdSMark Fasheh 1035*ccd979bdSMark Fasheh BUG_ON(*num_bits == 0); 1036*ccd979bdSMark Fasheh 1037*ccd979bdSMark Fasheh /* 1038*ccd979bdSMark Fasheh * Keep track of previous block descriptor read. When 1039*ccd979bdSMark Fasheh * we find a target, if we have read more than X 1040*ccd979bdSMark Fasheh * number of descriptors, and the target is reasonably 1041*ccd979bdSMark Fasheh * empty, relink him to top of his chain. 1042*ccd979bdSMark Fasheh * 1043*ccd979bdSMark Fasheh * We've read 0 extra blocks and only send one more to 1044*ccd979bdSMark Fasheh * the transaction, yet the next guy to search has a 1045*ccd979bdSMark Fasheh * much easier time. 1046*ccd979bdSMark Fasheh * 1047*ccd979bdSMark Fasheh * Do this *after* figuring out how many bits we're taking out 1048*ccd979bdSMark Fasheh * of our target group. 1049*ccd979bdSMark Fasheh */ 1050*ccd979bdSMark Fasheh if (ac->ac_allow_chain_relink && 1051*ccd979bdSMark Fasheh (prev_group_bh) && 1052*ccd979bdSMark Fasheh (ocfs2_block_group_reasonably_empty(bg, *num_bits))) { 1053*ccd979bdSMark Fasheh status = ocfs2_relink_block_group(handle, alloc_inode, 1054*ccd979bdSMark Fasheh ac->ac_bh, group_bh, 1055*ccd979bdSMark Fasheh prev_group_bh, chain); 1056*ccd979bdSMark Fasheh if (status < 0) { 1057*ccd979bdSMark Fasheh mlog_errno(status); 1058*ccd979bdSMark Fasheh goto bail; 1059*ccd979bdSMark Fasheh } 1060*ccd979bdSMark Fasheh } 1061*ccd979bdSMark Fasheh 1062*ccd979bdSMark Fasheh /* Ok, claim our bits now: set the info on dinode, chainlist 1063*ccd979bdSMark Fasheh * and then the group */ 1064*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, 1065*ccd979bdSMark Fasheh alloc_inode, 1066*ccd979bdSMark Fasheh ac->ac_bh, 1067*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1068*ccd979bdSMark Fasheh if (status < 0) { 1069*ccd979bdSMark Fasheh mlog_errno(status); 1070*ccd979bdSMark Fasheh goto bail; 1071*ccd979bdSMark Fasheh } 1072*ccd979bdSMark Fasheh 1073*ccd979bdSMark Fasheh tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); 1074*ccd979bdSMark Fasheh fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used); 1075*ccd979bdSMark Fasheh le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits)); 1076*ccd979bdSMark Fasheh 1077*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, 1078*ccd979bdSMark Fasheh ac->ac_bh); 1079*ccd979bdSMark Fasheh if (status < 0) { 1080*ccd979bdSMark Fasheh mlog_errno(status); 1081*ccd979bdSMark Fasheh goto bail; 1082*ccd979bdSMark Fasheh } 1083*ccd979bdSMark Fasheh 1084*ccd979bdSMark Fasheh status = ocfs2_block_group_set_bits(handle, 1085*ccd979bdSMark Fasheh alloc_inode, 1086*ccd979bdSMark Fasheh bg, 1087*ccd979bdSMark Fasheh group_bh, 1088*ccd979bdSMark Fasheh *bit_off, 1089*ccd979bdSMark Fasheh *num_bits); 1090*ccd979bdSMark Fasheh if (status < 0) { 1091*ccd979bdSMark Fasheh mlog_errno(status); 1092*ccd979bdSMark Fasheh goto bail; 1093*ccd979bdSMark Fasheh } 1094*ccd979bdSMark Fasheh 1095*ccd979bdSMark Fasheh mlog(0, "Allocated %u bits from suballocator %"MLFu64"\n", 1096*ccd979bdSMark Fasheh *num_bits, fe->i_blkno); 1097*ccd979bdSMark Fasheh 1098*ccd979bdSMark Fasheh *bg_blkno = le64_to_cpu(bg->bg_blkno); 1099*ccd979bdSMark Fasheh bail: 1100*ccd979bdSMark Fasheh if (group_bh) 1101*ccd979bdSMark Fasheh brelse(group_bh); 1102*ccd979bdSMark Fasheh if (prev_group_bh) 1103*ccd979bdSMark Fasheh brelse(prev_group_bh); 1104*ccd979bdSMark Fasheh 1105*ccd979bdSMark Fasheh mlog_exit(status); 1106*ccd979bdSMark Fasheh return status; 1107*ccd979bdSMark Fasheh } 1108*ccd979bdSMark Fasheh 1109*ccd979bdSMark Fasheh /* will give out up to bits_wanted contiguous bits. */ 1110*ccd979bdSMark Fasheh static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 1111*ccd979bdSMark Fasheh struct ocfs2_alloc_context *ac, 1112*ccd979bdSMark Fasheh u32 bits_wanted, 1113*ccd979bdSMark Fasheh u32 min_bits, 1114*ccd979bdSMark Fasheh u16 *bit_off, 1115*ccd979bdSMark Fasheh unsigned int *num_bits, 1116*ccd979bdSMark Fasheh u64 *bg_blkno) 1117*ccd979bdSMark Fasheh { 1118*ccd979bdSMark Fasheh int status; 1119*ccd979bdSMark Fasheh u16 victim, i; 1120*ccd979bdSMark Fasheh struct ocfs2_chain_list *cl; 1121*ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 1122*ccd979bdSMark Fasheh 1123*ccd979bdSMark Fasheh mlog_entry_void(); 1124*ccd979bdSMark Fasheh 1125*ccd979bdSMark Fasheh BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); 1126*ccd979bdSMark Fasheh BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given)); 1127*ccd979bdSMark Fasheh BUG_ON(!ac->ac_bh); 1128*ccd979bdSMark Fasheh 1129*ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; 1130*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(fe)) { 1131*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe); 1132*ccd979bdSMark Fasheh status = -EIO; 1133*ccd979bdSMark Fasheh goto bail; 1134*ccd979bdSMark Fasheh } 1135*ccd979bdSMark Fasheh if (le32_to_cpu(fe->id1.bitmap1.i_used) >= 1136*ccd979bdSMark Fasheh le32_to_cpu(fe->id1.bitmap1.i_total)) { 1137*ccd979bdSMark Fasheh ocfs2_error(osb->sb, "Chain allocator dinode %"MLFu64" has %u" 1138*ccd979bdSMark Fasheh "used bits but only %u total.", 1139*ccd979bdSMark Fasheh le64_to_cpu(fe->i_blkno), 1140*ccd979bdSMark Fasheh le32_to_cpu(fe->id1.bitmap1.i_used), 1141*ccd979bdSMark Fasheh le32_to_cpu(fe->id1.bitmap1.i_total)); 1142*ccd979bdSMark Fasheh status = -EIO; 1143*ccd979bdSMark Fasheh goto bail; 1144*ccd979bdSMark Fasheh } 1145*ccd979bdSMark Fasheh 1146*ccd979bdSMark Fasheh cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; 1147*ccd979bdSMark Fasheh 1148*ccd979bdSMark Fasheh victim = ocfs2_find_victim_chain(cl); 1149*ccd979bdSMark Fasheh ac->ac_chain = victim; 1150*ccd979bdSMark Fasheh ac->ac_allow_chain_relink = 1; 1151*ccd979bdSMark Fasheh 1152*ccd979bdSMark Fasheh status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off, 1153*ccd979bdSMark Fasheh num_bits, bg_blkno); 1154*ccd979bdSMark Fasheh if (!status) 1155*ccd979bdSMark Fasheh goto bail; 1156*ccd979bdSMark Fasheh if (status < 0 && status != -ENOSPC) { 1157*ccd979bdSMark Fasheh mlog_errno(status); 1158*ccd979bdSMark Fasheh goto bail; 1159*ccd979bdSMark Fasheh } 1160*ccd979bdSMark Fasheh 1161*ccd979bdSMark Fasheh mlog(0, "Search of victim chain %u came up with nothing, " 1162*ccd979bdSMark Fasheh "trying all chains now.\n", victim); 1163*ccd979bdSMark Fasheh 1164*ccd979bdSMark Fasheh /* If we didn't pick a good victim, then just default to 1165*ccd979bdSMark Fasheh * searching each chain in order. Don't allow chain relinking 1166*ccd979bdSMark Fasheh * because we only calculate enough journal credits for one 1167*ccd979bdSMark Fasheh * relink per alloc. */ 1168*ccd979bdSMark Fasheh ac->ac_allow_chain_relink = 0; 1169*ccd979bdSMark Fasheh for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { 1170*ccd979bdSMark Fasheh if (i == victim) 1171*ccd979bdSMark Fasheh continue; 1172*ccd979bdSMark Fasheh if (!cl->cl_recs[i].c_free) 1173*ccd979bdSMark Fasheh continue; 1174*ccd979bdSMark Fasheh 1175*ccd979bdSMark Fasheh ac->ac_chain = i; 1176*ccd979bdSMark Fasheh status = ocfs2_search_chain(ac, bits_wanted, min_bits, 1177*ccd979bdSMark Fasheh bit_off, num_bits, 1178*ccd979bdSMark Fasheh bg_blkno); 1179*ccd979bdSMark Fasheh if (!status) 1180*ccd979bdSMark Fasheh break; 1181*ccd979bdSMark Fasheh if (status < 0 && status != -ENOSPC) { 1182*ccd979bdSMark Fasheh mlog_errno(status); 1183*ccd979bdSMark Fasheh goto bail; 1184*ccd979bdSMark Fasheh } 1185*ccd979bdSMark Fasheh } 1186*ccd979bdSMark Fasheh bail: 1187*ccd979bdSMark Fasheh 1188*ccd979bdSMark Fasheh mlog_exit(status); 1189*ccd979bdSMark Fasheh return status; 1190*ccd979bdSMark Fasheh } 1191*ccd979bdSMark Fasheh 1192*ccd979bdSMark Fasheh int ocfs2_claim_metadata(struct ocfs2_super *osb, 1193*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 1194*ccd979bdSMark Fasheh struct ocfs2_alloc_context *ac, 1195*ccd979bdSMark Fasheh u32 bits_wanted, 1196*ccd979bdSMark Fasheh u16 *suballoc_bit_start, 1197*ccd979bdSMark Fasheh unsigned int *num_bits, 1198*ccd979bdSMark Fasheh u64 *blkno_start) 1199*ccd979bdSMark Fasheh { 1200*ccd979bdSMark Fasheh int status; 1201*ccd979bdSMark Fasheh u64 bg_blkno; 1202*ccd979bdSMark Fasheh 1203*ccd979bdSMark Fasheh BUG_ON(!ac); 1204*ccd979bdSMark Fasheh BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted)); 1205*ccd979bdSMark Fasheh BUG_ON(ac->ac_which != OCFS2_AC_USE_META); 1206*ccd979bdSMark Fasheh BUG_ON(ac->ac_handle != handle); 1207*ccd979bdSMark Fasheh 1208*ccd979bdSMark Fasheh status = ocfs2_claim_suballoc_bits(osb, 1209*ccd979bdSMark Fasheh ac, 1210*ccd979bdSMark Fasheh bits_wanted, 1211*ccd979bdSMark Fasheh 1, 1212*ccd979bdSMark Fasheh suballoc_bit_start, 1213*ccd979bdSMark Fasheh num_bits, 1214*ccd979bdSMark Fasheh &bg_blkno); 1215*ccd979bdSMark Fasheh if (status < 0) { 1216*ccd979bdSMark Fasheh mlog_errno(status); 1217*ccd979bdSMark Fasheh goto bail; 1218*ccd979bdSMark Fasheh } 1219*ccd979bdSMark Fasheh atomic_inc(&osb->alloc_stats.bg_allocs); 1220*ccd979bdSMark Fasheh 1221*ccd979bdSMark Fasheh *blkno_start = bg_blkno + (u64) *suballoc_bit_start; 1222*ccd979bdSMark Fasheh ac->ac_bits_given += (*num_bits); 1223*ccd979bdSMark Fasheh status = 0; 1224*ccd979bdSMark Fasheh bail: 1225*ccd979bdSMark Fasheh mlog_exit(status); 1226*ccd979bdSMark Fasheh return status; 1227*ccd979bdSMark Fasheh } 1228*ccd979bdSMark Fasheh 1229*ccd979bdSMark Fasheh int ocfs2_claim_new_inode(struct ocfs2_super *osb, 1230*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 1231*ccd979bdSMark Fasheh struct ocfs2_alloc_context *ac, 1232*ccd979bdSMark Fasheh u16 *suballoc_bit, 1233*ccd979bdSMark Fasheh u64 *fe_blkno) 1234*ccd979bdSMark Fasheh { 1235*ccd979bdSMark Fasheh int status; 1236*ccd979bdSMark Fasheh unsigned int num_bits; 1237*ccd979bdSMark Fasheh u64 bg_blkno; 1238*ccd979bdSMark Fasheh 1239*ccd979bdSMark Fasheh mlog_entry_void(); 1240*ccd979bdSMark Fasheh 1241*ccd979bdSMark Fasheh BUG_ON(!ac); 1242*ccd979bdSMark Fasheh BUG_ON(ac->ac_bits_given != 0); 1243*ccd979bdSMark Fasheh BUG_ON(ac->ac_bits_wanted != 1); 1244*ccd979bdSMark Fasheh BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); 1245*ccd979bdSMark Fasheh BUG_ON(ac->ac_handle != handle); 1246*ccd979bdSMark Fasheh 1247*ccd979bdSMark Fasheh status = ocfs2_claim_suballoc_bits(osb, 1248*ccd979bdSMark Fasheh ac, 1249*ccd979bdSMark Fasheh 1, 1250*ccd979bdSMark Fasheh 1, 1251*ccd979bdSMark Fasheh suballoc_bit, 1252*ccd979bdSMark Fasheh &num_bits, 1253*ccd979bdSMark Fasheh &bg_blkno); 1254*ccd979bdSMark Fasheh if (status < 0) { 1255*ccd979bdSMark Fasheh mlog_errno(status); 1256*ccd979bdSMark Fasheh goto bail; 1257*ccd979bdSMark Fasheh } 1258*ccd979bdSMark Fasheh atomic_inc(&osb->alloc_stats.bg_allocs); 1259*ccd979bdSMark Fasheh 1260*ccd979bdSMark Fasheh BUG_ON(num_bits != 1); 1261*ccd979bdSMark Fasheh 1262*ccd979bdSMark Fasheh *fe_blkno = bg_blkno + (u64) (*suballoc_bit); 1263*ccd979bdSMark Fasheh ac->ac_bits_given++; 1264*ccd979bdSMark Fasheh status = 0; 1265*ccd979bdSMark Fasheh bail: 1266*ccd979bdSMark Fasheh mlog_exit(status); 1267*ccd979bdSMark Fasheh return status; 1268*ccd979bdSMark Fasheh } 1269*ccd979bdSMark Fasheh 1270*ccd979bdSMark Fasheh /* translate a group desc. blkno and it's bitmap offset into 1271*ccd979bdSMark Fasheh * disk cluster offset. */ 1272*ccd979bdSMark Fasheh static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, 1273*ccd979bdSMark Fasheh u64 bg_blkno, 1274*ccd979bdSMark Fasheh u16 bg_bit_off) 1275*ccd979bdSMark Fasheh { 1276*ccd979bdSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1277*ccd979bdSMark Fasheh u32 cluster = 0; 1278*ccd979bdSMark Fasheh 1279*ccd979bdSMark Fasheh BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 1280*ccd979bdSMark Fasheh 1281*ccd979bdSMark Fasheh if (bg_blkno != osb->first_cluster_group_blkno) 1282*ccd979bdSMark Fasheh cluster = ocfs2_blocks_to_clusters(inode->i_sb, bg_blkno); 1283*ccd979bdSMark Fasheh cluster += (u32) bg_bit_off; 1284*ccd979bdSMark Fasheh return cluster; 1285*ccd979bdSMark Fasheh } 1286*ccd979bdSMark Fasheh 1287*ccd979bdSMark Fasheh /* given a cluster offset, calculate which block group it belongs to 1288*ccd979bdSMark Fasheh * and return that block offset. */ 1289*ccd979bdSMark Fasheh static inline u64 ocfs2_which_cluster_group(struct inode *inode, 1290*ccd979bdSMark Fasheh u32 cluster) 1291*ccd979bdSMark Fasheh { 1292*ccd979bdSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1293*ccd979bdSMark Fasheh u32 group_no; 1294*ccd979bdSMark Fasheh 1295*ccd979bdSMark Fasheh BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 1296*ccd979bdSMark Fasheh 1297*ccd979bdSMark Fasheh group_no = cluster / osb->bitmap_cpg; 1298*ccd979bdSMark Fasheh if (!group_no) 1299*ccd979bdSMark Fasheh return osb->first_cluster_group_blkno; 1300*ccd979bdSMark Fasheh return ocfs2_clusters_to_blocks(inode->i_sb, 1301*ccd979bdSMark Fasheh group_no * osb->bitmap_cpg); 1302*ccd979bdSMark Fasheh } 1303*ccd979bdSMark Fasheh 1304*ccd979bdSMark Fasheh /* given the block number of a cluster start, calculate which cluster 1305*ccd979bdSMark Fasheh * group and descriptor bitmap offset that corresponds to. */ 1306*ccd979bdSMark Fasheh static inline void ocfs2_block_to_cluster_group(struct inode *inode, 1307*ccd979bdSMark Fasheh u64 data_blkno, 1308*ccd979bdSMark Fasheh u64 *bg_blkno, 1309*ccd979bdSMark Fasheh u16 *bg_bit_off) 1310*ccd979bdSMark Fasheh { 1311*ccd979bdSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1312*ccd979bdSMark Fasheh u32 data_cluster = ocfs2_blocks_to_clusters(osb->sb, data_blkno); 1313*ccd979bdSMark Fasheh 1314*ccd979bdSMark Fasheh BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 1315*ccd979bdSMark Fasheh 1316*ccd979bdSMark Fasheh *bg_blkno = ocfs2_which_cluster_group(inode, 1317*ccd979bdSMark Fasheh data_cluster); 1318*ccd979bdSMark Fasheh 1319*ccd979bdSMark Fasheh if (*bg_blkno == osb->first_cluster_group_blkno) 1320*ccd979bdSMark Fasheh *bg_bit_off = (u16) data_cluster; 1321*ccd979bdSMark Fasheh else 1322*ccd979bdSMark Fasheh *bg_bit_off = (u16) ocfs2_blocks_to_clusters(osb->sb, 1323*ccd979bdSMark Fasheh data_blkno - *bg_blkno); 1324*ccd979bdSMark Fasheh } 1325*ccd979bdSMark Fasheh 1326*ccd979bdSMark Fasheh /* 1327*ccd979bdSMark Fasheh * min_bits - minimum contiguous chunk from this total allocation we 1328*ccd979bdSMark Fasheh * can handle. set to what we asked for originally for a full 1329*ccd979bdSMark Fasheh * contig. allocation, set to '1' to indicate we can deal with extents 1330*ccd979bdSMark Fasheh * of any size. 1331*ccd979bdSMark Fasheh */ 1332*ccd979bdSMark Fasheh int ocfs2_claim_clusters(struct ocfs2_super *osb, 1333*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 1334*ccd979bdSMark Fasheh struct ocfs2_alloc_context *ac, 1335*ccd979bdSMark Fasheh u32 min_clusters, 1336*ccd979bdSMark Fasheh u32 *cluster_start, 1337*ccd979bdSMark Fasheh u32 *num_clusters) 1338*ccd979bdSMark Fasheh { 1339*ccd979bdSMark Fasheh int status; 1340*ccd979bdSMark Fasheh unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; 1341*ccd979bdSMark Fasheh u64 bg_blkno; 1342*ccd979bdSMark Fasheh u16 bg_bit_off; 1343*ccd979bdSMark Fasheh 1344*ccd979bdSMark Fasheh mlog_entry_void(); 1345*ccd979bdSMark Fasheh 1346*ccd979bdSMark Fasheh BUG_ON(!ac); 1347*ccd979bdSMark Fasheh BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); 1348*ccd979bdSMark Fasheh 1349*ccd979bdSMark Fasheh BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL 1350*ccd979bdSMark Fasheh && ac->ac_which != OCFS2_AC_USE_MAIN); 1351*ccd979bdSMark Fasheh BUG_ON(ac->ac_handle != handle); 1352*ccd979bdSMark Fasheh 1353*ccd979bdSMark Fasheh if (ac->ac_which == OCFS2_AC_USE_LOCAL) { 1354*ccd979bdSMark Fasheh status = ocfs2_claim_local_alloc_bits(osb, 1355*ccd979bdSMark Fasheh handle, 1356*ccd979bdSMark Fasheh ac, 1357*ccd979bdSMark Fasheh bits_wanted, 1358*ccd979bdSMark Fasheh cluster_start, 1359*ccd979bdSMark Fasheh num_clusters); 1360*ccd979bdSMark Fasheh if (!status) 1361*ccd979bdSMark Fasheh atomic_inc(&osb->alloc_stats.local_data); 1362*ccd979bdSMark Fasheh } else { 1363*ccd979bdSMark Fasheh if (min_clusters > (osb->bitmap_cpg - 1)) { 1364*ccd979bdSMark Fasheh /* The only paths asking for contiguousness 1365*ccd979bdSMark Fasheh * should know about this already. */ 1366*ccd979bdSMark Fasheh mlog(ML_ERROR, "minimum allocation requested exceeds " 1367*ccd979bdSMark Fasheh "group bitmap size!"); 1368*ccd979bdSMark Fasheh status = -ENOSPC; 1369*ccd979bdSMark Fasheh goto bail; 1370*ccd979bdSMark Fasheh } 1371*ccd979bdSMark Fasheh /* clamp the current request down to a realistic size. */ 1372*ccd979bdSMark Fasheh if (bits_wanted > (osb->bitmap_cpg - 1)) 1373*ccd979bdSMark Fasheh bits_wanted = osb->bitmap_cpg - 1; 1374*ccd979bdSMark Fasheh 1375*ccd979bdSMark Fasheh status = ocfs2_claim_suballoc_bits(osb, 1376*ccd979bdSMark Fasheh ac, 1377*ccd979bdSMark Fasheh bits_wanted, 1378*ccd979bdSMark Fasheh min_clusters, 1379*ccd979bdSMark Fasheh &bg_bit_off, 1380*ccd979bdSMark Fasheh num_clusters, 1381*ccd979bdSMark Fasheh &bg_blkno); 1382*ccd979bdSMark Fasheh if (!status) { 1383*ccd979bdSMark Fasheh *cluster_start = 1384*ccd979bdSMark Fasheh ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode, 1385*ccd979bdSMark Fasheh bg_blkno, 1386*ccd979bdSMark Fasheh bg_bit_off); 1387*ccd979bdSMark Fasheh atomic_inc(&osb->alloc_stats.bitmap_data); 1388*ccd979bdSMark Fasheh } 1389*ccd979bdSMark Fasheh } 1390*ccd979bdSMark Fasheh if (status < 0) { 1391*ccd979bdSMark Fasheh if (status != -ENOSPC) 1392*ccd979bdSMark Fasheh mlog_errno(status); 1393*ccd979bdSMark Fasheh goto bail; 1394*ccd979bdSMark Fasheh } 1395*ccd979bdSMark Fasheh 1396*ccd979bdSMark Fasheh ac->ac_bits_given += *num_clusters; 1397*ccd979bdSMark Fasheh 1398*ccd979bdSMark Fasheh bail: 1399*ccd979bdSMark Fasheh mlog_exit(status); 1400*ccd979bdSMark Fasheh return status; 1401*ccd979bdSMark Fasheh } 1402*ccd979bdSMark Fasheh 1403*ccd979bdSMark Fasheh static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle, 1404*ccd979bdSMark Fasheh struct inode *alloc_inode, 1405*ccd979bdSMark Fasheh struct ocfs2_group_desc *bg, 1406*ccd979bdSMark Fasheh struct buffer_head *group_bh, 1407*ccd979bdSMark Fasheh unsigned int bit_off, 1408*ccd979bdSMark Fasheh unsigned int num_bits) 1409*ccd979bdSMark Fasheh { 1410*ccd979bdSMark Fasheh int status; 1411*ccd979bdSMark Fasheh unsigned int tmp; 1412*ccd979bdSMark Fasheh int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; 1413*ccd979bdSMark Fasheh struct ocfs2_group_desc *undo_bg = NULL; 1414*ccd979bdSMark Fasheh 1415*ccd979bdSMark Fasheh mlog_entry_void(); 1416*ccd979bdSMark Fasheh 1417*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 1418*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 1419*ccd979bdSMark Fasheh status = -EIO; 1420*ccd979bdSMark Fasheh goto bail; 1421*ccd979bdSMark Fasheh } 1422*ccd979bdSMark Fasheh 1423*ccd979bdSMark Fasheh mlog(0, "off = %u, num = %u\n", bit_off, num_bits); 1424*ccd979bdSMark Fasheh 1425*ccd979bdSMark Fasheh if (ocfs2_is_cluster_bitmap(alloc_inode)) 1426*ccd979bdSMark Fasheh journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 1427*ccd979bdSMark Fasheh 1428*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, alloc_inode, group_bh, 1429*ccd979bdSMark Fasheh journal_type); 1430*ccd979bdSMark Fasheh if (status < 0) { 1431*ccd979bdSMark Fasheh mlog_errno(status); 1432*ccd979bdSMark Fasheh goto bail; 1433*ccd979bdSMark Fasheh } 1434*ccd979bdSMark Fasheh 1435*ccd979bdSMark Fasheh if (ocfs2_is_cluster_bitmap(alloc_inode)) 1436*ccd979bdSMark Fasheh undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data; 1437*ccd979bdSMark Fasheh 1438*ccd979bdSMark Fasheh tmp = num_bits; 1439*ccd979bdSMark Fasheh while(tmp--) { 1440*ccd979bdSMark Fasheh ocfs2_clear_bit((bit_off + tmp), 1441*ccd979bdSMark Fasheh (unsigned long *) bg->bg_bitmap); 1442*ccd979bdSMark Fasheh if (ocfs2_is_cluster_bitmap(alloc_inode)) 1443*ccd979bdSMark Fasheh ocfs2_set_bit(bit_off + tmp, 1444*ccd979bdSMark Fasheh (unsigned long *) undo_bg->bg_bitmap); 1445*ccd979bdSMark Fasheh } 1446*ccd979bdSMark Fasheh le16_add_cpu(&bg->bg_free_bits_count, num_bits); 1447*ccd979bdSMark Fasheh 1448*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, group_bh); 1449*ccd979bdSMark Fasheh if (status < 0) 1450*ccd979bdSMark Fasheh mlog_errno(status); 1451*ccd979bdSMark Fasheh bail: 1452*ccd979bdSMark Fasheh return status; 1453*ccd979bdSMark Fasheh } 1454*ccd979bdSMark Fasheh 1455*ccd979bdSMark Fasheh /* 1456*ccd979bdSMark Fasheh * expects the suballoc inode to already be locked. 1457*ccd979bdSMark Fasheh */ 1458*ccd979bdSMark Fasheh static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, 1459*ccd979bdSMark Fasheh struct inode *alloc_inode, 1460*ccd979bdSMark Fasheh struct buffer_head *alloc_bh, 1461*ccd979bdSMark Fasheh unsigned int start_bit, 1462*ccd979bdSMark Fasheh u64 bg_blkno, 1463*ccd979bdSMark Fasheh unsigned int count) 1464*ccd979bdSMark Fasheh { 1465*ccd979bdSMark Fasheh int status = 0; 1466*ccd979bdSMark Fasheh u32 tmp_used; 1467*ccd979bdSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); 1468*ccd979bdSMark Fasheh struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; 1469*ccd979bdSMark Fasheh struct ocfs2_chain_list *cl = &fe->id2.i_chain; 1470*ccd979bdSMark Fasheh struct buffer_head *group_bh = NULL; 1471*ccd979bdSMark Fasheh struct ocfs2_group_desc *group; 1472*ccd979bdSMark Fasheh 1473*ccd979bdSMark Fasheh mlog_entry_void(); 1474*ccd979bdSMark Fasheh 1475*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(fe)) { 1476*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 1477*ccd979bdSMark Fasheh status = -EIO; 1478*ccd979bdSMark Fasheh goto bail; 1479*ccd979bdSMark Fasheh } 1480*ccd979bdSMark Fasheh BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); 1481*ccd979bdSMark Fasheh 1482*ccd979bdSMark Fasheh mlog(0, "suballocator %"MLFu64": freeing %u bits from group %"MLFu64 1483*ccd979bdSMark Fasheh ", starting at %u\n", 1484*ccd979bdSMark Fasheh OCFS2_I(alloc_inode)->ip_blkno, count, bg_blkno, 1485*ccd979bdSMark Fasheh start_bit); 1486*ccd979bdSMark Fasheh 1487*ccd979bdSMark Fasheh status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED, 1488*ccd979bdSMark Fasheh alloc_inode); 1489*ccd979bdSMark Fasheh if (status < 0) { 1490*ccd979bdSMark Fasheh mlog_errno(status); 1491*ccd979bdSMark Fasheh goto bail; 1492*ccd979bdSMark Fasheh } 1493*ccd979bdSMark Fasheh 1494*ccd979bdSMark Fasheh group = (struct ocfs2_group_desc *) group_bh->b_data; 1495*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_GROUP_DESC(group)) { 1496*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group); 1497*ccd979bdSMark Fasheh status = -EIO; 1498*ccd979bdSMark Fasheh goto bail; 1499*ccd979bdSMark Fasheh } 1500*ccd979bdSMark Fasheh BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); 1501*ccd979bdSMark Fasheh 1502*ccd979bdSMark Fasheh status = ocfs2_block_group_clear_bits(handle, alloc_inode, 1503*ccd979bdSMark Fasheh group, group_bh, 1504*ccd979bdSMark Fasheh start_bit, count); 1505*ccd979bdSMark Fasheh if (status < 0) { 1506*ccd979bdSMark Fasheh mlog_errno(status); 1507*ccd979bdSMark Fasheh goto bail; 1508*ccd979bdSMark Fasheh } 1509*ccd979bdSMark Fasheh 1510*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, alloc_inode, alloc_bh, 1511*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1512*ccd979bdSMark Fasheh if (status < 0) { 1513*ccd979bdSMark Fasheh mlog_errno(status); 1514*ccd979bdSMark Fasheh goto bail; 1515*ccd979bdSMark Fasheh } 1516*ccd979bdSMark Fasheh 1517*ccd979bdSMark Fasheh le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free, 1518*ccd979bdSMark Fasheh count); 1519*ccd979bdSMark Fasheh tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); 1520*ccd979bdSMark Fasheh fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count); 1521*ccd979bdSMark Fasheh 1522*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, alloc_bh); 1523*ccd979bdSMark Fasheh if (status < 0) { 1524*ccd979bdSMark Fasheh mlog_errno(status); 1525*ccd979bdSMark Fasheh goto bail; 1526*ccd979bdSMark Fasheh } 1527*ccd979bdSMark Fasheh 1528*ccd979bdSMark Fasheh bail: 1529*ccd979bdSMark Fasheh if (group_bh) 1530*ccd979bdSMark Fasheh brelse(group_bh); 1531*ccd979bdSMark Fasheh 1532*ccd979bdSMark Fasheh mlog_exit(status); 1533*ccd979bdSMark Fasheh return status; 1534*ccd979bdSMark Fasheh } 1535*ccd979bdSMark Fasheh 1536*ccd979bdSMark Fasheh static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) 1537*ccd979bdSMark Fasheh { 1538*ccd979bdSMark Fasheh u64 group = block - (u64) bit; 1539*ccd979bdSMark Fasheh 1540*ccd979bdSMark Fasheh return group; 1541*ccd979bdSMark Fasheh } 1542*ccd979bdSMark Fasheh 1543*ccd979bdSMark Fasheh int ocfs2_free_dinode(struct ocfs2_journal_handle *handle, 1544*ccd979bdSMark Fasheh struct inode *inode_alloc_inode, 1545*ccd979bdSMark Fasheh struct buffer_head *inode_alloc_bh, 1546*ccd979bdSMark Fasheh struct ocfs2_dinode *di) 1547*ccd979bdSMark Fasheh { 1548*ccd979bdSMark Fasheh u64 blk = le64_to_cpu(di->i_blkno); 1549*ccd979bdSMark Fasheh u16 bit = le16_to_cpu(di->i_suballoc_bit); 1550*ccd979bdSMark Fasheh u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 1551*ccd979bdSMark Fasheh 1552*ccd979bdSMark Fasheh return ocfs2_free_suballoc_bits(handle, inode_alloc_inode, 1553*ccd979bdSMark Fasheh inode_alloc_bh, bit, bg_blkno, 1); 1554*ccd979bdSMark Fasheh } 1555*ccd979bdSMark Fasheh 1556*ccd979bdSMark Fasheh int ocfs2_free_extent_block(struct ocfs2_journal_handle *handle, 1557*ccd979bdSMark Fasheh struct inode *eb_alloc_inode, 1558*ccd979bdSMark Fasheh struct buffer_head *eb_alloc_bh, 1559*ccd979bdSMark Fasheh struct ocfs2_extent_block *eb) 1560*ccd979bdSMark Fasheh { 1561*ccd979bdSMark Fasheh u64 blk = le64_to_cpu(eb->h_blkno); 1562*ccd979bdSMark Fasheh u16 bit = le16_to_cpu(eb->h_suballoc_bit); 1563*ccd979bdSMark Fasheh u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 1564*ccd979bdSMark Fasheh 1565*ccd979bdSMark Fasheh return ocfs2_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh, 1566*ccd979bdSMark Fasheh bit, bg_blkno, 1); 1567*ccd979bdSMark Fasheh } 1568*ccd979bdSMark Fasheh 1569*ccd979bdSMark Fasheh int ocfs2_free_clusters(struct ocfs2_journal_handle *handle, 1570*ccd979bdSMark Fasheh struct inode *bitmap_inode, 1571*ccd979bdSMark Fasheh struct buffer_head *bitmap_bh, 1572*ccd979bdSMark Fasheh u64 start_blk, 1573*ccd979bdSMark Fasheh unsigned int num_clusters) 1574*ccd979bdSMark Fasheh { 1575*ccd979bdSMark Fasheh int status; 1576*ccd979bdSMark Fasheh u16 bg_start_bit; 1577*ccd979bdSMark Fasheh u64 bg_blkno; 1578*ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 1579*ccd979bdSMark Fasheh 1580*ccd979bdSMark Fasheh /* You can't ever have a contiguous set of clusters 1581*ccd979bdSMark Fasheh * bigger than a block group bitmap so we never have to worry 1582*ccd979bdSMark Fasheh * about looping on them. */ 1583*ccd979bdSMark Fasheh 1584*ccd979bdSMark Fasheh mlog_entry_void(); 1585*ccd979bdSMark Fasheh 1586*ccd979bdSMark Fasheh /* This is expensive. We can safely remove once this stuff has 1587*ccd979bdSMark Fasheh * gotten tested really well. */ 1588*ccd979bdSMark Fasheh BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk))); 1589*ccd979bdSMark Fasheh 1590*ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) bitmap_bh->b_data; 1591*ccd979bdSMark Fasheh 1592*ccd979bdSMark Fasheh ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno, 1593*ccd979bdSMark Fasheh &bg_start_bit); 1594*ccd979bdSMark Fasheh 1595*ccd979bdSMark Fasheh mlog(0, "want to free %u clusters starting at block %"MLFu64"\n", 1596*ccd979bdSMark Fasheh num_clusters, start_blk); 1597*ccd979bdSMark Fasheh mlog(0, "bg_blkno = %"MLFu64", bg_start_bit = %u\n", 1598*ccd979bdSMark Fasheh bg_blkno, bg_start_bit); 1599*ccd979bdSMark Fasheh 1600*ccd979bdSMark Fasheh status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, 1601*ccd979bdSMark Fasheh bg_start_bit, bg_blkno, 1602*ccd979bdSMark Fasheh num_clusters); 1603*ccd979bdSMark Fasheh if (status < 0) 1604*ccd979bdSMark Fasheh mlog_errno(status); 1605*ccd979bdSMark Fasheh 1606*ccd979bdSMark Fasheh mlog_exit(status); 1607*ccd979bdSMark Fasheh return status; 1608*ccd979bdSMark Fasheh } 1609*ccd979bdSMark Fasheh 1610*ccd979bdSMark Fasheh static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) 1611*ccd979bdSMark Fasheh { 1612*ccd979bdSMark Fasheh printk("Block Group:\n"); 1613*ccd979bdSMark Fasheh printk("bg_signature: %s\n", bg->bg_signature); 1614*ccd979bdSMark Fasheh printk("bg_size: %u\n", bg->bg_size); 1615*ccd979bdSMark Fasheh printk("bg_bits: %u\n", bg->bg_bits); 1616*ccd979bdSMark Fasheh printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count); 1617*ccd979bdSMark Fasheh printk("bg_chain: %u\n", bg->bg_chain); 1618*ccd979bdSMark Fasheh printk("bg_generation: %u\n", le32_to_cpu(bg->bg_generation)); 1619*ccd979bdSMark Fasheh printk("bg_next_group: %"MLFu64"\n", bg->bg_next_group); 1620*ccd979bdSMark Fasheh printk("bg_parent_dinode: %"MLFu64"\n", bg->bg_parent_dinode); 1621*ccd979bdSMark Fasheh printk("bg_blkno: %"MLFu64"\n", bg->bg_blkno); 1622*ccd979bdSMark Fasheh } 1623*ccd979bdSMark Fasheh 1624*ccd979bdSMark Fasheh static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe) 1625*ccd979bdSMark Fasheh { 1626*ccd979bdSMark Fasheh int i; 1627*ccd979bdSMark Fasheh 1628*ccd979bdSMark Fasheh printk("Suballoc Inode %"MLFu64":\n", fe->i_blkno); 1629*ccd979bdSMark Fasheh printk("i_signature: %s\n", fe->i_signature); 1630*ccd979bdSMark Fasheh printk("i_size: %"MLFu64"\n", fe->i_size); 1631*ccd979bdSMark Fasheh printk("i_clusters: %u\n", fe->i_clusters); 1632*ccd979bdSMark Fasheh printk("i_generation: %u\n", 1633*ccd979bdSMark Fasheh le32_to_cpu(fe->i_generation)); 1634*ccd979bdSMark Fasheh printk("id1.bitmap1.i_used: %u\n", 1635*ccd979bdSMark Fasheh le32_to_cpu(fe->id1.bitmap1.i_used)); 1636*ccd979bdSMark Fasheh printk("id1.bitmap1.i_total: %u\n", 1637*ccd979bdSMark Fasheh le32_to_cpu(fe->id1.bitmap1.i_total)); 1638*ccd979bdSMark Fasheh printk("id2.i_chain.cl_cpg: %u\n", fe->id2.i_chain.cl_cpg); 1639*ccd979bdSMark Fasheh printk("id2.i_chain.cl_bpc: %u\n", fe->id2.i_chain.cl_bpc); 1640*ccd979bdSMark Fasheh printk("id2.i_chain.cl_count: %u\n", fe->id2.i_chain.cl_count); 1641*ccd979bdSMark Fasheh printk("id2.i_chain.cl_next_free_rec: %u\n", 1642*ccd979bdSMark Fasheh fe->id2.i_chain.cl_next_free_rec); 1643*ccd979bdSMark Fasheh for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) { 1644*ccd979bdSMark Fasheh printk("fe->id2.i_chain.cl_recs[%d].c_free: %u\n", i, 1645*ccd979bdSMark Fasheh fe->id2.i_chain.cl_recs[i].c_free); 1646*ccd979bdSMark Fasheh printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i, 1647*ccd979bdSMark Fasheh fe->id2.i_chain.cl_recs[i].c_total); 1648*ccd979bdSMark Fasheh printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %"MLFu64"\n", i, 1649*ccd979bdSMark Fasheh fe->id2.i_chain.cl_recs[i].c_blkno); 1650*ccd979bdSMark Fasheh } 1651*ccd979bdSMark Fasheh } 1652