1*ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*- 2*ccd979bdSMark Fasheh * vim: noexpandtab sw=8 ts=8 sts=0: 3*ccd979bdSMark Fasheh * 4*ccd979bdSMark Fasheh * alloc.c 5*ccd979bdSMark Fasheh * 6*ccd979bdSMark Fasheh * Extent allocs and frees 7*ccd979bdSMark Fasheh * 8*ccd979bdSMark Fasheh * Copyright (C) 2002, 2004 Oracle. All rights reserved. 9*ccd979bdSMark Fasheh * 10*ccd979bdSMark Fasheh * This program is free software; you can redistribute it and/or 11*ccd979bdSMark Fasheh * modify it under the terms of the GNU General Public 12*ccd979bdSMark Fasheh * License as published by the Free Software Foundation; either 13*ccd979bdSMark Fasheh * version 2 of the License, or (at your option) any later version. 14*ccd979bdSMark Fasheh * 15*ccd979bdSMark Fasheh * This program is distributed in the hope that it will be useful, 16*ccd979bdSMark Fasheh * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*ccd979bdSMark Fasheh * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18*ccd979bdSMark Fasheh * General Public License for more details. 19*ccd979bdSMark Fasheh * 20*ccd979bdSMark Fasheh * You should have received a copy of the GNU General Public 21*ccd979bdSMark Fasheh * License along with this program; if not, write to the 22*ccd979bdSMark Fasheh * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23*ccd979bdSMark Fasheh * Boston, MA 021110-1307, USA. 24*ccd979bdSMark Fasheh */ 25*ccd979bdSMark Fasheh 26*ccd979bdSMark Fasheh #include <linux/fs.h> 27*ccd979bdSMark Fasheh #include <linux/types.h> 28*ccd979bdSMark Fasheh #include <linux/slab.h> 29*ccd979bdSMark Fasheh #include <linux/highmem.h> 30*ccd979bdSMark Fasheh 31*ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DISK_ALLOC 32*ccd979bdSMark Fasheh #include <cluster/masklog.h> 33*ccd979bdSMark Fasheh 34*ccd979bdSMark Fasheh #include "ocfs2.h" 35*ccd979bdSMark Fasheh 36*ccd979bdSMark Fasheh #include "alloc.h" 37*ccd979bdSMark Fasheh #include "dlmglue.h" 38*ccd979bdSMark Fasheh #include "extent_map.h" 39*ccd979bdSMark Fasheh #include "inode.h" 40*ccd979bdSMark Fasheh #include "journal.h" 41*ccd979bdSMark Fasheh #include "localalloc.h" 42*ccd979bdSMark Fasheh #include "suballoc.h" 43*ccd979bdSMark Fasheh #include "sysfile.h" 44*ccd979bdSMark Fasheh #include "file.h" 45*ccd979bdSMark Fasheh #include "super.h" 46*ccd979bdSMark Fasheh #include "uptodate.h" 47*ccd979bdSMark Fasheh 48*ccd979bdSMark Fasheh #include "buffer_head_io.h" 49*ccd979bdSMark Fasheh 50*ccd979bdSMark Fasheh static int ocfs2_extent_contig(struct inode *inode, 51*ccd979bdSMark Fasheh struct ocfs2_extent_rec *ext, 52*ccd979bdSMark Fasheh u64 blkno); 53*ccd979bdSMark Fasheh 54*ccd979bdSMark Fasheh static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, 55*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 56*ccd979bdSMark Fasheh struct inode *inode, 57*ccd979bdSMark Fasheh int wanted, 58*ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac, 59*ccd979bdSMark Fasheh struct buffer_head *bhs[]); 60*ccd979bdSMark Fasheh 61*ccd979bdSMark Fasheh static int ocfs2_add_branch(struct ocfs2_super *osb, 62*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 63*ccd979bdSMark Fasheh struct inode *inode, 64*ccd979bdSMark Fasheh struct buffer_head *fe_bh, 65*ccd979bdSMark Fasheh struct buffer_head *eb_bh, 66*ccd979bdSMark Fasheh struct buffer_head *last_eb_bh, 67*ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac); 68*ccd979bdSMark Fasheh 69*ccd979bdSMark Fasheh static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, 70*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 71*ccd979bdSMark Fasheh struct inode *inode, 72*ccd979bdSMark Fasheh struct buffer_head *fe_bh, 73*ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac, 74*ccd979bdSMark Fasheh struct buffer_head **ret_new_eb_bh); 75*ccd979bdSMark Fasheh 76*ccd979bdSMark Fasheh static int ocfs2_do_insert_extent(struct ocfs2_super *osb, 77*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 78*ccd979bdSMark Fasheh struct inode *inode, 79*ccd979bdSMark Fasheh struct buffer_head *fe_bh, 80*ccd979bdSMark Fasheh u64 blkno, 81*ccd979bdSMark Fasheh u32 new_clusters); 82*ccd979bdSMark Fasheh 83*ccd979bdSMark Fasheh static int ocfs2_find_branch_target(struct ocfs2_super *osb, 84*ccd979bdSMark Fasheh struct inode *inode, 85*ccd979bdSMark Fasheh struct buffer_head *fe_bh, 86*ccd979bdSMark Fasheh struct buffer_head **target_bh); 87*ccd979bdSMark Fasheh 88*ccd979bdSMark Fasheh static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb, 89*ccd979bdSMark Fasheh struct inode *inode, 90*ccd979bdSMark Fasheh struct ocfs2_dinode *fe, 91*ccd979bdSMark Fasheh unsigned int new_i_clusters, 92*ccd979bdSMark Fasheh struct buffer_head *old_last_eb, 93*ccd979bdSMark Fasheh struct buffer_head **new_last_eb); 94*ccd979bdSMark Fasheh 95*ccd979bdSMark Fasheh static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); 96*ccd979bdSMark Fasheh 97*ccd979bdSMark Fasheh static int ocfs2_extent_contig(struct inode *inode, 98*ccd979bdSMark Fasheh struct ocfs2_extent_rec *ext, 99*ccd979bdSMark Fasheh u64 blkno) 100*ccd979bdSMark Fasheh { 101*ccd979bdSMark Fasheh return blkno == (le64_to_cpu(ext->e_blkno) + 102*ccd979bdSMark Fasheh ocfs2_clusters_to_blocks(inode->i_sb, 103*ccd979bdSMark Fasheh le32_to_cpu(ext->e_clusters))); 104*ccd979bdSMark Fasheh } 105*ccd979bdSMark Fasheh 106*ccd979bdSMark Fasheh /* 107*ccd979bdSMark Fasheh * How many free extents have we got before we need more meta data? 108*ccd979bdSMark Fasheh */ 109*ccd979bdSMark Fasheh int ocfs2_num_free_extents(struct ocfs2_super *osb, 110*ccd979bdSMark Fasheh struct inode *inode, 111*ccd979bdSMark Fasheh struct ocfs2_dinode *fe) 112*ccd979bdSMark Fasheh { 113*ccd979bdSMark Fasheh int retval; 114*ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 115*ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 116*ccd979bdSMark Fasheh struct buffer_head *eb_bh = NULL; 117*ccd979bdSMark Fasheh 118*ccd979bdSMark Fasheh mlog_entry_void(); 119*ccd979bdSMark Fasheh 120*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(fe)) { 121*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 122*ccd979bdSMark Fasheh retval = -EIO; 123*ccd979bdSMark Fasheh goto bail; 124*ccd979bdSMark Fasheh } 125*ccd979bdSMark Fasheh 126*ccd979bdSMark Fasheh if (fe->i_last_eb_blk) { 127*ccd979bdSMark Fasheh retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 128*ccd979bdSMark Fasheh &eb_bh, OCFS2_BH_CACHED, inode); 129*ccd979bdSMark Fasheh if (retval < 0) { 130*ccd979bdSMark Fasheh mlog_errno(retval); 131*ccd979bdSMark Fasheh goto bail; 132*ccd979bdSMark Fasheh } 133*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) eb_bh->b_data; 134*ccd979bdSMark Fasheh el = &eb->h_list; 135*ccd979bdSMark Fasheh } else 136*ccd979bdSMark Fasheh el = &fe->id2.i_list; 137*ccd979bdSMark Fasheh 138*ccd979bdSMark Fasheh BUG_ON(el->l_tree_depth != 0); 139*ccd979bdSMark Fasheh 140*ccd979bdSMark Fasheh retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); 141*ccd979bdSMark Fasheh bail: 142*ccd979bdSMark Fasheh if (eb_bh) 143*ccd979bdSMark Fasheh brelse(eb_bh); 144*ccd979bdSMark Fasheh 145*ccd979bdSMark Fasheh mlog_exit(retval); 146*ccd979bdSMark Fasheh return retval; 147*ccd979bdSMark Fasheh } 148*ccd979bdSMark Fasheh 149*ccd979bdSMark Fasheh /* expects array to already be allocated 150*ccd979bdSMark Fasheh * 151*ccd979bdSMark Fasheh * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and 152*ccd979bdSMark Fasheh * l_count for you 153*ccd979bdSMark Fasheh */ 154*ccd979bdSMark Fasheh static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, 155*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 156*ccd979bdSMark Fasheh struct inode *inode, 157*ccd979bdSMark Fasheh int wanted, 158*ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac, 159*ccd979bdSMark Fasheh struct buffer_head *bhs[]) 160*ccd979bdSMark Fasheh { 161*ccd979bdSMark Fasheh int count, status, i; 162*ccd979bdSMark Fasheh u16 suballoc_bit_start; 163*ccd979bdSMark Fasheh u32 num_got; 164*ccd979bdSMark Fasheh u64 first_blkno; 165*ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 166*ccd979bdSMark Fasheh 167*ccd979bdSMark Fasheh mlog_entry_void(); 168*ccd979bdSMark Fasheh 169*ccd979bdSMark Fasheh count = 0; 170*ccd979bdSMark Fasheh while (count < wanted) { 171*ccd979bdSMark Fasheh status = ocfs2_claim_metadata(osb, 172*ccd979bdSMark Fasheh handle, 173*ccd979bdSMark Fasheh meta_ac, 174*ccd979bdSMark Fasheh wanted - count, 175*ccd979bdSMark Fasheh &suballoc_bit_start, 176*ccd979bdSMark Fasheh &num_got, 177*ccd979bdSMark Fasheh &first_blkno); 178*ccd979bdSMark Fasheh if (status < 0) { 179*ccd979bdSMark Fasheh mlog_errno(status); 180*ccd979bdSMark Fasheh goto bail; 181*ccd979bdSMark Fasheh } 182*ccd979bdSMark Fasheh 183*ccd979bdSMark Fasheh for(i = count; i < (num_got + count); i++) { 184*ccd979bdSMark Fasheh bhs[i] = sb_getblk(osb->sb, first_blkno); 185*ccd979bdSMark Fasheh if (bhs[i] == NULL) { 186*ccd979bdSMark Fasheh status = -EIO; 187*ccd979bdSMark Fasheh mlog_errno(status); 188*ccd979bdSMark Fasheh goto bail; 189*ccd979bdSMark Fasheh } 190*ccd979bdSMark Fasheh ocfs2_set_new_buffer_uptodate(inode, bhs[i]); 191*ccd979bdSMark Fasheh 192*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, bhs[i], 193*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE); 194*ccd979bdSMark Fasheh if (status < 0) { 195*ccd979bdSMark Fasheh mlog_errno(status); 196*ccd979bdSMark Fasheh goto bail; 197*ccd979bdSMark Fasheh } 198*ccd979bdSMark Fasheh 199*ccd979bdSMark Fasheh memset(bhs[i]->b_data, 0, osb->sb->s_blocksize); 200*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bhs[i]->b_data; 201*ccd979bdSMark Fasheh /* Ok, setup the minimal stuff here. */ 202*ccd979bdSMark Fasheh strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); 203*ccd979bdSMark Fasheh eb->h_blkno = cpu_to_le64(first_blkno); 204*ccd979bdSMark Fasheh eb->h_fs_generation = cpu_to_le32(osb->fs_generation); 205*ccd979bdSMark Fasheh 206*ccd979bdSMark Fasheh #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS 207*ccd979bdSMark Fasheh /* we always use slot zero's suballocator */ 208*ccd979bdSMark Fasheh eb->h_suballoc_slot = 0; 209*ccd979bdSMark Fasheh #else 210*ccd979bdSMark Fasheh eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); 211*ccd979bdSMark Fasheh #endif 212*ccd979bdSMark Fasheh eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); 213*ccd979bdSMark Fasheh eb->h_list.l_count = 214*ccd979bdSMark Fasheh cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); 215*ccd979bdSMark Fasheh 216*ccd979bdSMark Fasheh suballoc_bit_start++; 217*ccd979bdSMark Fasheh first_blkno++; 218*ccd979bdSMark Fasheh 219*ccd979bdSMark Fasheh /* We'll also be dirtied by the caller, so 220*ccd979bdSMark Fasheh * this isn't absolutely necessary. */ 221*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, bhs[i]); 222*ccd979bdSMark Fasheh if (status < 0) { 223*ccd979bdSMark Fasheh mlog_errno(status); 224*ccd979bdSMark Fasheh goto bail; 225*ccd979bdSMark Fasheh } 226*ccd979bdSMark Fasheh } 227*ccd979bdSMark Fasheh 228*ccd979bdSMark Fasheh count += num_got; 229*ccd979bdSMark Fasheh } 230*ccd979bdSMark Fasheh 231*ccd979bdSMark Fasheh status = 0; 232*ccd979bdSMark Fasheh bail: 233*ccd979bdSMark Fasheh if (status < 0) { 234*ccd979bdSMark Fasheh for(i = 0; i < wanted; i++) { 235*ccd979bdSMark Fasheh if (bhs[i]) 236*ccd979bdSMark Fasheh brelse(bhs[i]); 237*ccd979bdSMark Fasheh bhs[i] = NULL; 238*ccd979bdSMark Fasheh } 239*ccd979bdSMark Fasheh } 240*ccd979bdSMark Fasheh mlog_exit(status); 241*ccd979bdSMark Fasheh return status; 242*ccd979bdSMark Fasheh } 243*ccd979bdSMark Fasheh 244*ccd979bdSMark Fasheh /* 245*ccd979bdSMark Fasheh * Add an entire tree branch to our inode. eb_bh is the extent block 246*ccd979bdSMark Fasheh * to start at, if we don't want to start the branch at the dinode 247*ccd979bdSMark Fasheh * structure. 248*ccd979bdSMark Fasheh * 249*ccd979bdSMark Fasheh * last_eb_bh is required as we have to update it's next_leaf pointer 250*ccd979bdSMark Fasheh * for the new last extent block. 251*ccd979bdSMark Fasheh * 252*ccd979bdSMark Fasheh * the new branch will be 'empty' in the sense that every block will 253*ccd979bdSMark Fasheh * contain a single record with e_clusters == 0. 254*ccd979bdSMark Fasheh */ 255*ccd979bdSMark Fasheh static int ocfs2_add_branch(struct ocfs2_super *osb, 256*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 257*ccd979bdSMark Fasheh struct inode *inode, 258*ccd979bdSMark Fasheh struct buffer_head *fe_bh, 259*ccd979bdSMark Fasheh struct buffer_head *eb_bh, 260*ccd979bdSMark Fasheh struct buffer_head *last_eb_bh, 261*ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac) 262*ccd979bdSMark Fasheh { 263*ccd979bdSMark Fasheh int status, new_blocks, i; 264*ccd979bdSMark Fasheh u64 next_blkno, new_last_eb_blk; 265*ccd979bdSMark Fasheh struct buffer_head *bh; 266*ccd979bdSMark Fasheh struct buffer_head **new_eb_bhs = NULL; 267*ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 268*ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 269*ccd979bdSMark Fasheh struct ocfs2_extent_list *eb_el; 270*ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 271*ccd979bdSMark Fasheh 272*ccd979bdSMark Fasheh mlog_entry_void(); 273*ccd979bdSMark Fasheh 274*ccd979bdSMark Fasheh BUG_ON(!last_eb_bh); 275*ccd979bdSMark Fasheh 276*ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 277*ccd979bdSMark Fasheh 278*ccd979bdSMark Fasheh if (eb_bh) { 279*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) eb_bh->b_data; 280*ccd979bdSMark Fasheh el = &eb->h_list; 281*ccd979bdSMark Fasheh } else 282*ccd979bdSMark Fasheh el = &fe->id2.i_list; 283*ccd979bdSMark Fasheh 284*ccd979bdSMark Fasheh /* we never add a branch to a leaf. */ 285*ccd979bdSMark Fasheh BUG_ON(!el->l_tree_depth); 286*ccd979bdSMark Fasheh 287*ccd979bdSMark Fasheh new_blocks = le16_to_cpu(el->l_tree_depth); 288*ccd979bdSMark Fasheh 289*ccd979bdSMark Fasheh /* allocate the number of new eb blocks we need */ 290*ccd979bdSMark Fasheh new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *), 291*ccd979bdSMark Fasheh GFP_KERNEL); 292*ccd979bdSMark Fasheh if (!new_eb_bhs) { 293*ccd979bdSMark Fasheh status = -ENOMEM; 294*ccd979bdSMark Fasheh mlog_errno(status); 295*ccd979bdSMark Fasheh goto bail; 296*ccd979bdSMark Fasheh } 297*ccd979bdSMark Fasheh 298*ccd979bdSMark Fasheh status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks, 299*ccd979bdSMark Fasheh meta_ac, new_eb_bhs); 300*ccd979bdSMark Fasheh if (status < 0) { 301*ccd979bdSMark Fasheh mlog_errno(status); 302*ccd979bdSMark Fasheh goto bail; 303*ccd979bdSMark Fasheh } 304*ccd979bdSMark Fasheh 305*ccd979bdSMark Fasheh /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be 306*ccd979bdSMark Fasheh * linked with the rest of the tree. 307*ccd979bdSMark Fasheh * conversly, new_eb_bhs[0] is the new bottommost leaf. 308*ccd979bdSMark Fasheh * 309*ccd979bdSMark Fasheh * when we leave the loop, new_last_eb_blk will point to the 310*ccd979bdSMark Fasheh * newest leaf, and next_blkno will point to the topmost extent 311*ccd979bdSMark Fasheh * block. */ 312*ccd979bdSMark Fasheh next_blkno = new_last_eb_blk = 0; 313*ccd979bdSMark Fasheh for(i = 0; i < new_blocks; i++) { 314*ccd979bdSMark Fasheh bh = new_eb_bhs[i]; 315*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 316*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 317*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 318*ccd979bdSMark Fasheh status = -EIO; 319*ccd979bdSMark Fasheh goto bail; 320*ccd979bdSMark Fasheh } 321*ccd979bdSMark Fasheh eb_el = &eb->h_list; 322*ccd979bdSMark Fasheh 323*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, bh, 324*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE); 325*ccd979bdSMark Fasheh if (status < 0) { 326*ccd979bdSMark Fasheh mlog_errno(status); 327*ccd979bdSMark Fasheh goto bail; 328*ccd979bdSMark Fasheh } 329*ccd979bdSMark Fasheh 330*ccd979bdSMark Fasheh eb->h_next_leaf_blk = 0; 331*ccd979bdSMark Fasheh eb_el->l_tree_depth = cpu_to_le16(i); 332*ccd979bdSMark Fasheh eb_el->l_next_free_rec = cpu_to_le16(1); 333*ccd979bdSMark Fasheh eb_el->l_recs[0].e_cpos = fe->i_clusters; 334*ccd979bdSMark Fasheh eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno); 335*ccd979bdSMark Fasheh eb_el->l_recs[0].e_clusters = cpu_to_le32(0); 336*ccd979bdSMark Fasheh if (!eb_el->l_tree_depth) 337*ccd979bdSMark Fasheh new_last_eb_blk = le64_to_cpu(eb->h_blkno); 338*ccd979bdSMark Fasheh 339*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, bh); 340*ccd979bdSMark Fasheh if (status < 0) { 341*ccd979bdSMark Fasheh mlog_errno(status); 342*ccd979bdSMark Fasheh goto bail; 343*ccd979bdSMark Fasheh } 344*ccd979bdSMark Fasheh 345*ccd979bdSMark Fasheh next_blkno = le64_to_cpu(eb->h_blkno); 346*ccd979bdSMark Fasheh } 347*ccd979bdSMark Fasheh 348*ccd979bdSMark Fasheh /* This is a bit hairy. We want to update up to three blocks 349*ccd979bdSMark Fasheh * here without leaving any of them in an inconsistent state 350*ccd979bdSMark Fasheh * in case of error. We don't have to worry about 351*ccd979bdSMark Fasheh * journal_dirty erroring as it won't unless we've aborted the 352*ccd979bdSMark Fasheh * handle (in which case we would never be here) so reserving 353*ccd979bdSMark Fasheh * the write with journal_access is all we need to do. */ 354*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, last_eb_bh, 355*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 356*ccd979bdSMark Fasheh if (status < 0) { 357*ccd979bdSMark Fasheh mlog_errno(status); 358*ccd979bdSMark Fasheh goto bail; 359*ccd979bdSMark Fasheh } 360*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, fe_bh, 361*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 362*ccd979bdSMark Fasheh if (status < 0) { 363*ccd979bdSMark Fasheh mlog_errno(status); 364*ccd979bdSMark Fasheh goto bail; 365*ccd979bdSMark Fasheh } 366*ccd979bdSMark Fasheh if (eb_bh) { 367*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, eb_bh, 368*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 369*ccd979bdSMark Fasheh if (status < 0) { 370*ccd979bdSMark Fasheh mlog_errno(status); 371*ccd979bdSMark Fasheh goto bail; 372*ccd979bdSMark Fasheh } 373*ccd979bdSMark Fasheh } 374*ccd979bdSMark Fasheh 375*ccd979bdSMark Fasheh /* Link the new branch into the rest of the tree (el will 376*ccd979bdSMark Fasheh * either be on the fe, or the extent block passed in. */ 377*ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec); 378*ccd979bdSMark Fasheh el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); 379*ccd979bdSMark Fasheh el->l_recs[i].e_cpos = fe->i_clusters; 380*ccd979bdSMark Fasheh el->l_recs[i].e_clusters = 0; 381*ccd979bdSMark Fasheh le16_add_cpu(&el->l_next_free_rec, 1); 382*ccd979bdSMark Fasheh 383*ccd979bdSMark Fasheh /* fe needs a new last extent block pointer, as does the 384*ccd979bdSMark Fasheh * next_leaf on the previously last-extent-block. */ 385*ccd979bdSMark Fasheh fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); 386*ccd979bdSMark Fasheh 387*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 388*ccd979bdSMark Fasheh eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); 389*ccd979bdSMark Fasheh 390*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, last_eb_bh); 391*ccd979bdSMark Fasheh if (status < 0) 392*ccd979bdSMark Fasheh mlog_errno(status); 393*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, fe_bh); 394*ccd979bdSMark Fasheh if (status < 0) 395*ccd979bdSMark Fasheh mlog_errno(status); 396*ccd979bdSMark Fasheh if (eb_bh) { 397*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, eb_bh); 398*ccd979bdSMark Fasheh if (status < 0) 399*ccd979bdSMark Fasheh mlog_errno(status); 400*ccd979bdSMark Fasheh } 401*ccd979bdSMark Fasheh 402*ccd979bdSMark Fasheh status = 0; 403*ccd979bdSMark Fasheh bail: 404*ccd979bdSMark Fasheh if (new_eb_bhs) { 405*ccd979bdSMark Fasheh for (i = 0; i < new_blocks; i++) 406*ccd979bdSMark Fasheh if (new_eb_bhs[i]) 407*ccd979bdSMark Fasheh brelse(new_eb_bhs[i]); 408*ccd979bdSMark Fasheh kfree(new_eb_bhs); 409*ccd979bdSMark Fasheh } 410*ccd979bdSMark Fasheh 411*ccd979bdSMark Fasheh mlog_exit(status); 412*ccd979bdSMark Fasheh return status; 413*ccd979bdSMark Fasheh } 414*ccd979bdSMark Fasheh 415*ccd979bdSMark Fasheh /* 416*ccd979bdSMark Fasheh * adds another level to the allocation tree. 417*ccd979bdSMark Fasheh * returns back the new extent block so you can add a branch to it 418*ccd979bdSMark Fasheh * after this call. 419*ccd979bdSMark Fasheh */ 420*ccd979bdSMark Fasheh static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, 421*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 422*ccd979bdSMark Fasheh struct inode *inode, 423*ccd979bdSMark Fasheh struct buffer_head *fe_bh, 424*ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac, 425*ccd979bdSMark Fasheh struct buffer_head **ret_new_eb_bh) 426*ccd979bdSMark Fasheh { 427*ccd979bdSMark Fasheh int status, i; 428*ccd979bdSMark Fasheh struct buffer_head *new_eb_bh = NULL; 429*ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 430*ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 431*ccd979bdSMark Fasheh struct ocfs2_extent_list *fe_el; 432*ccd979bdSMark Fasheh struct ocfs2_extent_list *eb_el; 433*ccd979bdSMark Fasheh 434*ccd979bdSMark Fasheh mlog_entry_void(); 435*ccd979bdSMark Fasheh 436*ccd979bdSMark Fasheh status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac, 437*ccd979bdSMark Fasheh &new_eb_bh); 438*ccd979bdSMark Fasheh if (status < 0) { 439*ccd979bdSMark Fasheh mlog_errno(status); 440*ccd979bdSMark Fasheh goto bail; 441*ccd979bdSMark Fasheh } 442*ccd979bdSMark Fasheh 443*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) new_eb_bh->b_data; 444*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 445*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 446*ccd979bdSMark Fasheh status = -EIO; 447*ccd979bdSMark Fasheh goto bail; 448*ccd979bdSMark Fasheh } 449*ccd979bdSMark Fasheh 450*ccd979bdSMark Fasheh eb_el = &eb->h_list; 451*ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 452*ccd979bdSMark Fasheh fe_el = &fe->id2.i_list; 453*ccd979bdSMark Fasheh 454*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, new_eb_bh, 455*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE); 456*ccd979bdSMark Fasheh if (status < 0) { 457*ccd979bdSMark Fasheh mlog_errno(status); 458*ccd979bdSMark Fasheh goto bail; 459*ccd979bdSMark Fasheh } 460*ccd979bdSMark Fasheh 461*ccd979bdSMark Fasheh /* copy the fe data into the new extent block */ 462*ccd979bdSMark Fasheh eb_el->l_tree_depth = fe_el->l_tree_depth; 463*ccd979bdSMark Fasheh eb_el->l_next_free_rec = fe_el->l_next_free_rec; 464*ccd979bdSMark Fasheh for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) { 465*ccd979bdSMark Fasheh eb_el->l_recs[i].e_cpos = fe_el->l_recs[i].e_cpos; 466*ccd979bdSMark Fasheh eb_el->l_recs[i].e_clusters = fe_el->l_recs[i].e_clusters; 467*ccd979bdSMark Fasheh eb_el->l_recs[i].e_blkno = fe_el->l_recs[i].e_blkno; 468*ccd979bdSMark Fasheh } 469*ccd979bdSMark Fasheh 470*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, new_eb_bh); 471*ccd979bdSMark Fasheh if (status < 0) { 472*ccd979bdSMark Fasheh mlog_errno(status); 473*ccd979bdSMark Fasheh goto bail; 474*ccd979bdSMark Fasheh } 475*ccd979bdSMark Fasheh 476*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, fe_bh, 477*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 478*ccd979bdSMark Fasheh if (status < 0) { 479*ccd979bdSMark Fasheh mlog_errno(status); 480*ccd979bdSMark Fasheh goto bail; 481*ccd979bdSMark Fasheh } 482*ccd979bdSMark Fasheh 483*ccd979bdSMark Fasheh /* update fe now */ 484*ccd979bdSMark Fasheh le16_add_cpu(&fe_el->l_tree_depth, 1); 485*ccd979bdSMark Fasheh fe_el->l_recs[0].e_cpos = 0; 486*ccd979bdSMark Fasheh fe_el->l_recs[0].e_blkno = eb->h_blkno; 487*ccd979bdSMark Fasheh fe_el->l_recs[0].e_clusters = fe->i_clusters; 488*ccd979bdSMark Fasheh for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) { 489*ccd979bdSMark Fasheh fe_el->l_recs[i].e_cpos = 0; 490*ccd979bdSMark Fasheh fe_el->l_recs[i].e_clusters = 0; 491*ccd979bdSMark Fasheh fe_el->l_recs[i].e_blkno = 0; 492*ccd979bdSMark Fasheh } 493*ccd979bdSMark Fasheh fe_el->l_next_free_rec = cpu_to_le16(1); 494*ccd979bdSMark Fasheh 495*ccd979bdSMark Fasheh /* If this is our 1st tree depth shift, then last_eb_blk 496*ccd979bdSMark Fasheh * becomes the allocated extent block */ 497*ccd979bdSMark Fasheh if (fe_el->l_tree_depth == cpu_to_le16(1)) 498*ccd979bdSMark Fasheh fe->i_last_eb_blk = eb->h_blkno; 499*ccd979bdSMark Fasheh 500*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, fe_bh); 501*ccd979bdSMark Fasheh if (status < 0) { 502*ccd979bdSMark Fasheh mlog_errno(status); 503*ccd979bdSMark Fasheh goto bail; 504*ccd979bdSMark Fasheh } 505*ccd979bdSMark Fasheh 506*ccd979bdSMark Fasheh *ret_new_eb_bh = new_eb_bh; 507*ccd979bdSMark Fasheh new_eb_bh = NULL; 508*ccd979bdSMark Fasheh status = 0; 509*ccd979bdSMark Fasheh bail: 510*ccd979bdSMark Fasheh if (new_eb_bh) 511*ccd979bdSMark Fasheh brelse(new_eb_bh); 512*ccd979bdSMark Fasheh 513*ccd979bdSMark Fasheh mlog_exit(status); 514*ccd979bdSMark Fasheh return status; 515*ccd979bdSMark Fasheh } 516*ccd979bdSMark Fasheh 517*ccd979bdSMark Fasheh /* 518*ccd979bdSMark Fasheh * Expects the tree to already have room in the rightmost leaf for the 519*ccd979bdSMark Fasheh * extent. Updates all the extent blocks (and the dinode) on the way 520*ccd979bdSMark Fasheh * down. 521*ccd979bdSMark Fasheh */ 522*ccd979bdSMark Fasheh static int ocfs2_do_insert_extent(struct ocfs2_super *osb, 523*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 524*ccd979bdSMark Fasheh struct inode *inode, 525*ccd979bdSMark Fasheh struct buffer_head *fe_bh, 526*ccd979bdSMark Fasheh u64 start_blk, 527*ccd979bdSMark Fasheh u32 new_clusters) 528*ccd979bdSMark Fasheh { 529*ccd979bdSMark Fasheh int status, i, num_bhs = 0; 530*ccd979bdSMark Fasheh u64 next_blkno; 531*ccd979bdSMark Fasheh u16 next_free; 532*ccd979bdSMark Fasheh struct buffer_head **eb_bhs = NULL; 533*ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 534*ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 535*ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 536*ccd979bdSMark Fasheh 537*ccd979bdSMark Fasheh mlog_entry_void(); 538*ccd979bdSMark Fasheh 539*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, fe_bh, 540*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 541*ccd979bdSMark Fasheh if (status < 0) { 542*ccd979bdSMark Fasheh mlog_errno(status); 543*ccd979bdSMark Fasheh goto bail; 544*ccd979bdSMark Fasheh } 545*ccd979bdSMark Fasheh 546*ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 547*ccd979bdSMark Fasheh el = &fe->id2.i_list; 548*ccd979bdSMark Fasheh if (el->l_tree_depth) { 549*ccd979bdSMark Fasheh /* This is another operation where we want to be 550*ccd979bdSMark Fasheh * careful about our tree updates. An error here means 551*ccd979bdSMark Fasheh * none of the previous changes we made should roll 552*ccd979bdSMark Fasheh * forward. As a result, we have to record the buffers 553*ccd979bdSMark Fasheh * for this part of the tree in an array and reserve a 554*ccd979bdSMark Fasheh * journal write to them before making any changes. */ 555*ccd979bdSMark Fasheh num_bhs = le16_to_cpu(fe->id2.i_list.l_tree_depth); 556*ccd979bdSMark Fasheh eb_bhs = kcalloc(num_bhs, sizeof(struct buffer_head *), 557*ccd979bdSMark Fasheh GFP_KERNEL); 558*ccd979bdSMark Fasheh if (!eb_bhs) { 559*ccd979bdSMark Fasheh status = -ENOMEM; 560*ccd979bdSMark Fasheh mlog_errno(status); 561*ccd979bdSMark Fasheh goto bail; 562*ccd979bdSMark Fasheh } 563*ccd979bdSMark Fasheh 564*ccd979bdSMark Fasheh i = 0; 565*ccd979bdSMark Fasheh while(el->l_tree_depth) { 566*ccd979bdSMark Fasheh next_free = le16_to_cpu(el->l_next_free_rec); 567*ccd979bdSMark Fasheh if (next_free == 0) { 568*ccd979bdSMark Fasheh ocfs2_error(inode->i_sb, 569*ccd979bdSMark Fasheh "Dinode %"MLFu64" has a bad " 570*ccd979bdSMark Fasheh "extent list", 571*ccd979bdSMark Fasheh OCFS2_I(inode)->ip_blkno); 572*ccd979bdSMark Fasheh status = -EIO; 573*ccd979bdSMark Fasheh goto bail; 574*ccd979bdSMark Fasheh } 575*ccd979bdSMark Fasheh next_blkno = le64_to_cpu(el->l_recs[next_free - 1].e_blkno); 576*ccd979bdSMark Fasheh 577*ccd979bdSMark Fasheh BUG_ON(i >= num_bhs); 578*ccd979bdSMark Fasheh status = ocfs2_read_block(osb, next_blkno, &eb_bhs[i], 579*ccd979bdSMark Fasheh OCFS2_BH_CACHED, inode); 580*ccd979bdSMark Fasheh if (status < 0) { 581*ccd979bdSMark Fasheh mlog_errno(status); 582*ccd979bdSMark Fasheh goto bail; 583*ccd979bdSMark Fasheh } 584*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data; 585*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 586*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, 587*ccd979bdSMark Fasheh eb); 588*ccd979bdSMark Fasheh status = -EIO; 589*ccd979bdSMark Fasheh goto bail; 590*ccd979bdSMark Fasheh } 591*ccd979bdSMark Fasheh 592*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, eb_bhs[i], 593*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 594*ccd979bdSMark Fasheh if (status < 0) { 595*ccd979bdSMark Fasheh mlog_errno(status); 596*ccd979bdSMark Fasheh goto bail; 597*ccd979bdSMark Fasheh } 598*ccd979bdSMark Fasheh 599*ccd979bdSMark Fasheh el = &eb->h_list; 600*ccd979bdSMark Fasheh i++; 601*ccd979bdSMark Fasheh /* When we leave this loop, eb_bhs[num_bhs - 1] will 602*ccd979bdSMark Fasheh * hold the bottom-most leaf extent block. */ 603*ccd979bdSMark Fasheh } 604*ccd979bdSMark Fasheh BUG_ON(el->l_tree_depth); 605*ccd979bdSMark Fasheh 606*ccd979bdSMark Fasheh el = &fe->id2.i_list; 607*ccd979bdSMark Fasheh /* If we have tree depth, then the fe update is 608*ccd979bdSMark Fasheh * trivial, and we want to switch el out for the 609*ccd979bdSMark Fasheh * bottom-most leaf in order to update it with the 610*ccd979bdSMark Fasheh * actual extent data below. */ 611*ccd979bdSMark Fasheh next_free = le16_to_cpu(el->l_next_free_rec); 612*ccd979bdSMark Fasheh if (next_free == 0) { 613*ccd979bdSMark Fasheh ocfs2_error(inode->i_sb, 614*ccd979bdSMark Fasheh "Dinode %"MLFu64" has a bad " 615*ccd979bdSMark Fasheh "extent list", 616*ccd979bdSMark Fasheh OCFS2_I(inode)->ip_blkno); 617*ccd979bdSMark Fasheh status = -EIO; 618*ccd979bdSMark Fasheh goto bail; 619*ccd979bdSMark Fasheh } 620*ccd979bdSMark Fasheh le32_add_cpu(&el->l_recs[next_free - 1].e_clusters, 621*ccd979bdSMark Fasheh new_clusters); 622*ccd979bdSMark Fasheh /* (num_bhs - 1) to avoid the leaf */ 623*ccd979bdSMark Fasheh for(i = 0; i < (num_bhs - 1); i++) { 624*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data; 625*ccd979bdSMark Fasheh el = &eb->h_list; 626*ccd979bdSMark Fasheh 627*ccd979bdSMark Fasheh /* finally, make our actual change to the 628*ccd979bdSMark Fasheh * intermediate extent blocks. */ 629*ccd979bdSMark Fasheh next_free = le16_to_cpu(el->l_next_free_rec); 630*ccd979bdSMark Fasheh le32_add_cpu(&el->l_recs[next_free - 1].e_clusters, 631*ccd979bdSMark Fasheh new_clusters); 632*ccd979bdSMark Fasheh 633*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, eb_bhs[i]); 634*ccd979bdSMark Fasheh if (status < 0) 635*ccd979bdSMark Fasheh mlog_errno(status); 636*ccd979bdSMark Fasheh } 637*ccd979bdSMark Fasheh BUG_ON(i != (num_bhs - 1)); 638*ccd979bdSMark Fasheh /* note that the leaf block wasn't touched in 639*ccd979bdSMark Fasheh * the loop above */ 640*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) eb_bhs[num_bhs - 1]->b_data; 641*ccd979bdSMark Fasheh el = &eb->h_list; 642*ccd979bdSMark Fasheh BUG_ON(el->l_tree_depth); 643*ccd979bdSMark Fasheh } 644*ccd979bdSMark Fasheh 645*ccd979bdSMark Fasheh /* yay, we can finally add the actual extent now! */ 646*ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 647*ccd979bdSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) && 648*ccd979bdSMark Fasheh ocfs2_extent_contig(inode, &el->l_recs[i], start_blk)) { 649*ccd979bdSMark Fasheh le32_add_cpu(&el->l_recs[i].e_clusters, new_clusters); 650*ccd979bdSMark Fasheh } else if (le16_to_cpu(el->l_next_free_rec) && 651*ccd979bdSMark Fasheh (le32_to_cpu(el->l_recs[i].e_clusters) == 0)) { 652*ccd979bdSMark Fasheh /* having an empty extent at eof is legal. */ 653*ccd979bdSMark Fasheh if (el->l_recs[i].e_cpos != fe->i_clusters) { 654*ccd979bdSMark Fasheh ocfs2_error(inode->i_sb, 655*ccd979bdSMark Fasheh "Dinode %"MLFu64" trailing extent is bad: " 656*ccd979bdSMark Fasheh "cpos (%u) != number of clusters (%u)", 657*ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_cpos), 658*ccd979bdSMark Fasheh le32_to_cpu(fe->i_clusters)); 659*ccd979bdSMark Fasheh status = -EIO; 660*ccd979bdSMark Fasheh goto bail; 661*ccd979bdSMark Fasheh } 662*ccd979bdSMark Fasheh el->l_recs[i].e_blkno = cpu_to_le64(start_blk); 663*ccd979bdSMark Fasheh el->l_recs[i].e_clusters = cpu_to_le32(new_clusters); 664*ccd979bdSMark Fasheh } else { 665*ccd979bdSMark Fasheh /* No contiguous record, or no empty record at eof, so 666*ccd979bdSMark Fasheh * we add a new one. */ 667*ccd979bdSMark Fasheh 668*ccd979bdSMark Fasheh BUG_ON(le16_to_cpu(el->l_next_free_rec) >= 669*ccd979bdSMark Fasheh le16_to_cpu(el->l_count)); 670*ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec); 671*ccd979bdSMark Fasheh 672*ccd979bdSMark Fasheh el->l_recs[i].e_blkno = cpu_to_le64(start_blk); 673*ccd979bdSMark Fasheh el->l_recs[i].e_clusters = cpu_to_le32(new_clusters); 674*ccd979bdSMark Fasheh el->l_recs[i].e_cpos = fe->i_clusters; 675*ccd979bdSMark Fasheh le16_add_cpu(&el->l_next_free_rec, 1); 676*ccd979bdSMark Fasheh } 677*ccd979bdSMark Fasheh 678*ccd979bdSMark Fasheh /* 679*ccd979bdSMark Fasheh * extent_map errors are not fatal, so they are ignored outside 680*ccd979bdSMark Fasheh * of flushing the thing. 681*ccd979bdSMark Fasheh */ 682*ccd979bdSMark Fasheh status = ocfs2_extent_map_append(inode, &el->l_recs[i], 683*ccd979bdSMark Fasheh new_clusters); 684*ccd979bdSMark Fasheh if (status) { 685*ccd979bdSMark Fasheh mlog_errno(status); 686*ccd979bdSMark Fasheh ocfs2_extent_map_drop(inode, le32_to_cpu(fe->i_clusters)); 687*ccd979bdSMark Fasheh } 688*ccd979bdSMark Fasheh 689*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, fe_bh); 690*ccd979bdSMark Fasheh if (status < 0) 691*ccd979bdSMark Fasheh mlog_errno(status); 692*ccd979bdSMark Fasheh if (fe->id2.i_list.l_tree_depth) { 693*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, eb_bhs[num_bhs - 1]); 694*ccd979bdSMark Fasheh if (status < 0) 695*ccd979bdSMark Fasheh mlog_errno(status); 696*ccd979bdSMark Fasheh } 697*ccd979bdSMark Fasheh 698*ccd979bdSMark Fasheh status = 0; 699*ccd979bdSMark Fasheh bail: 700*ccd979bdSMark Fasheh if (eb_bhs) { 701*ccd979bdSMark Fasheh for (i = 0; i < num_bhs; i++) 702*ccd979bdSMark Fasheh if (eb_bhs[i]) 703*ccd979bdSMark Fasheh brelse(eb_bhs[i]); 704*ccd979bdSMark Fasheh kfree(eb_bhs); 705*ccd979bdSMark Fasheh } 706*ccd979bdSMark Fasheh 707*ccd979bdSMark Fasheh mlog_exit(status); 708*ccd979bdSMark Fasheh return status; 709*ccd979bdSMark Fasheh } 710*ccd979bdSMark Fasheh 711*ccd979bdSMark Fasheh /* 712*ccd979bdSMark Fasheh * Should only be called when there is no space left in any of the 713*ccd979bdSMark Fasheh * leaf nodes. What we want to do is find the lowest tree depth 714*ccd979bdSMark Fasheh * non-leaf extent block with room for new records. There are three 715*ccd979bdSMark Fasheh * valid results of this search: 716*ccd979bdSMark Fasheh * 717*ccd979bdSMark Fasheh * 1) a lowest extent block is found, then we pass it back in 718*ccd979bdSMark Fasheh * *lowest_eb_bh and return '0' 719*ccd979bdSMark Fasheh * 720*ccd979bdSMark Fasheh * 2) the search fails to find anything, but the dinode has room. We 721*ccd979bdSMark Fasheh * pass NULL back in *lowest_eb_bh, but still return '0' 722*ccd979bdSMark Fasheh * 723*ccd979bdSMark Fasheh * 3) the search fails to find anything AND the dinode is full, in 724*ccd979bdSMark Fasheh * which case we return > 0 725*ccd979bdSMark Fasheh * 726*ccd979bdSMark Fasheh * return status < 0 indicates an error. 727*ccd979bdSMark Fasheh */ 728*ccd979bdSMark Fasheh static int ocfs2_find_branch_target(struct ocfs2_super *osb, 729*ccd979bdSMark Fasheh struct inode *inode, 730*ccd979bdSMark Fasheh struct buffer_head *fe_bh, 731*ccd979bdSMark Fasheh struct buffer_head **target_bh) 732*ccd979bdSMark Fasheh { 733*ccd979bdSMark Fasheh int status = 0, i; 734*ccd979bdSMark Fasheh u64 blkno; 735*ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 736*ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 737*ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 738*ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 739*ccd979bdSMark Fasheh struct buffer_head *lowest_bh = NULL; 740*ccd979bdSMark Fasheh 741*ccd979bdSMark Fasheh mlog_entry_void(); 742*ccd979bdSMark Fasheh 743*ccd979bdSMark Fasheh *target_bh = NULL; 744*ccd979bdSMark Fasheh 745*ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 746*ccd979bdSMark Fasheh el = &fe->id2.i_list; 747*ccd979bdSMark Fasheh 748*ccd979bdSMark Fasheh while(le16_to_cpu(el->l_tree_depth) > 1) { 749*ccd979bdSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 0) { 750*ccd979bdSMark Fasheh ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has empty " 751*ccd979bdSMark Fasheh "extent list (next_free_rec == 0)", 752*ccd979bdSMark Fasheh OCFS2_I(inode)->ip_blkno); 753*ccd979bdSMark Fasheh status = -EIO; 754*ccd979bdSMark Fasheh goto bail; 755*ccd979bdSMark Fasheh } 756*ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 757*ccd979bdSMark Fasheh blkno = le64_to_cpu(el->l_recs[i].e_blkno); 758*ccd979bdSMark Fasheh if (!blkno) { 759*ccd979bdSMark Fasheh ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has extent " 760*ccd979bdSMark Fasheh "list where extent # %d has no physical " 761*ccd979bdSMark Fasheh "block start", 762*ccd979bdSMark Fasheh OCFS2_I(inode)->ip_blkno, i); 763*ccd979bdSMark Fasheh status = -EIO; 764*ccd979bdSMark Fasheh goto bail; 765*ccd979bdSMark Fasheh } 766*ccd979bdSMark Fasheh 767*ccd979bdSMark Fasheh if (bh) { 768*ccd979bdSMark Fasheh brelse(bh); 769*ccd979bdSMark Fasheh bh = NULL; 770*ccd979bdSMark Fasheh } 771*ccd979bdSMark Fasheh 772*ccd979bdSMark Fasheh status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED, 773*ccd979bdSMark Fasheh inode); 774*ccd979bdSMark Fasheh if (status < 0) { 775*ccd979bdSMark Fasheh mlog_errno(status); 776*ccd979bdSMark Fasheh goto bail; 777*ccd979bdSMark Fasheh } 778*ccd979bdSMark Fasheh 779*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 780*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 781*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 782*ccd979bdSMark Fasheh status = -EIO; 783*ccd979bdSMark Fasheh goto bail; 784*ccd979bdSMark Fasheh } 785*ccd979bdSMark Fasheh el = &eb->h_list; 786*ccd979bdSMark Fasheh 787*ccd979bdSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) < 788*ccd979bdSMark Fasheh le16_to_cpu(el->l_count)) { 789*ccd979bdSMark Fasheh if (lowest_bh) 790*ccd979bdSMark Fasheh brelse(lowest_bh); 791*ccd979bdSMark Fasheh lowest_bh = bh; 792*ccd979bdSMark Fasheh get_bh(lowest_bh); 793*ccd979bdSMark Fasheh } 794*ccd979bdSMark Fasheh } 795*ccd979bdSMark Fasheh 796*ccd979bdSMark Fasheh /* If we didn't find one and the fe doesn't have any room, 797*ccd979bdSMark Fasheh * then return '1' */ 798*ccd979bdSMark Fasheh if (!lowest_bh 799*ccd979bdSMark Fasheh && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count)) 800*ccd979bdSMark Fasheh status = 1; 801*ccd979bdSMark Fasheh 802*ccd979bdSMark Fasheh *target_bh = lowest_bh; 803*ccd979bdSMark Fasheh bail: 804*ccd979bdSMark Fasheh if (bh) 805*ccd979bdSMark Fasheh brelse(bh); 806*ccd979bdSMark Fasheh 807*ccd979bdSMark Fasheh mlog_exit(status); 808*ccd979bdSMark Fasheh return status; 809*ccd979bdSMark Fasheh } 810*ccd979bdSMark Fasheh 811*ccd979bdSMark Fasheh /* the caller needs to update fe->i_clusters */ 812*ccd979bdSMark Fasheh int ocfs2_insert_extent(struct ocfs2_super *osb, 813*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 814*ccd979bdSMark Fasheh struct inode *inode, 815*ccd979bdSMark Fasheh struct buffer_head *fe_bh, 816*ccd979bdSMark Fasheh u64 start_blk, 817*ccd979bdSMark Fasheh u32 new_clusters, 818*ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac) 819*ccd979bdSMark Fasheh { 820*ccd979bdSMark Fasheh int status, i, shift; 821*ccd979bdSMark Fasheh struct buffer_head *last_eb_bh = NULL; 822*ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 823*ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 824*ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 825*ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 826*ccd979bdSMark Fasheh 827*ccd979bdSMark Fasheh mlog_entry_void(); 828*ccd979bdSMark Fasheh 829*ccd979bdSMark Fasheh mlog(0, "add %u clusters starting at block %"MLFu64" to " 830*ccd979bdSMark Fasheh "inode %"MLFu64"\n", 831*ccd979bdSMark Fasheh new_clusters, start_blk, OCFS2_I(inode)->ip_blkno); 832*ccd979bdSMark Fasheh 833*ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 834*ccd979bdSMark Fasheh el = &fe->id2.i_list; 835*ccd979bdSMark Fasheh 836*ccd979bdSMark Fasheh if (el->l_tree_depth) { 837*ccd979bdSMark Fasheh /* jump to end of tree */ 838*ccd979bdSMark Fasheh status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 839*ccd979bdSMark Fasheh &last_eb_bh, OCFS2_BH_CACHED, inode); 840*ccd979bdSMark Fasheh if (status < 0) { 841*ccd979bdSMark Fasheh mlog_exit(status); 842*ccd979bdSMark Fasheh goto bail; 843*ccd979bdSMark Fasheh } 844*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 845*ccd979bdSMark Fasheh el = &eb->h_list; 846*ccd979bdSMark Fasheh } 847*ccd979bdSMark Fasheh 848*ccd979bdSMark Fasheh /* Can we allocate without adding/shifting tree bits? */ 849*ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 850*ccd979bdSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 0 851*ccd979bdSMark Fasheh || (le16_to_cpu(el->l_next_free_rec) < le16_to_cpu(el->l_count)) 852*ccd979bdSMark Fasheh || le32_to_cpu(el->l_recs[i].e_clusters) == 0 853*ccd979bdSMark Fasheh || ocfs2_extent_contig(inode, &el->l_recs[i], start_blk)) 854*ccd979bdSMark Fasheh goto out_add; 855*ccd979bdSMark Fasheh 856*ccd979bdSMark Fasheh mlog(0, "ocfs2_allocate_extent: couldn't do a simple add, traversing " 857*ccd979bdSMark Fasheh "tree now.\n"); 858*ccd979bdSMark Fasheh 859*ccd979bdSMark Fasheh shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh); 860*ccd979bdSMark Fasheh if (shift < 0) { 861*ccd979bdSMark Fasheh status = shift; 862*ccd979bdSMark Fasheh mlog_errno(status); 863*ccd979bdSMark Fasheh goto bail; 864*ccd979bdSMark Fasheh } 865*ccd979bdSMark Fasheh 866*ccd979bdSMark Fasheh /* We traveled all the way to the bottom of the allocation tree 867*ccd979bdSMark Fasheh * and didn't find room for any more extents - we need to add 868*ccd979bdSMark Fasheh * another tree level */ 869*ccd979bdSMark Fasheh if (shift) { 870*ccd979bdSMark Fasheh /* if we hit a leaf, we'd better be empty :) */ 871*ccd979bdSMark Fasheh BUG_ON(le16_to_cpu(el->l_next_free_rec) != 872*ccd979bdSMark Fasheh le16_to_cpu(el->l_count)); 873*ccd979bdSMark Fasheh BUG_ON(bh); 874*ccd979bdSMark Fasheh mlog(0, "ocfs2_allocate_extent: need to shift tree depth " 875*ccd979bdSMark Fasheh "(current = %u)\n", 876*ccd979bdSMark Fasheh le16_to_cpu(fe->id2.i_list.l_tree_depth)); 877*ccd979bdSMark Fasheh 878*ccd979bdSMark Fasheh /* ocfs2_shift_tree_depth will return us a buffer with 879*ccd979bdSMark Fasheh * the new extent block (so we can pass that to 880*ccd979bdSMark Fasheh * ocfs2_add_branch). */ 881*ccd979bdSMark Fasheh status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh, 882*ccd979bdSMark Fasheh meta_ac, &bh); 883*ccd979bdSMark Fasheh if (status < 0) { 884*ccd979bdSMark Fasheh mlog_errno(status); 885*ccd979bdSMark Fasheh goto bail; 886*ccd979bdSMark Fasheh } 887*ccd979bdSMark Fasheh /* Special case: we have room now if we shifted from 888*ccd979bdSMark Fasheh * tree_depth 0 */ 889*ccd979bdSMark Fasheh if (fe->id2.i_list.l_tree_depth == cpu_to_le16(1)) 890*ccd979bdSMark Fasheh goto out_add; 891*ccd979bdSMark Fasheh } 892*ccd979bdSMark Fasheh 893*ccd979bdSMark Fasheh /* call ocfs2_add_branch to add the final part of the tree with 894*ccd979bdSMark Fasheh * the new data. */ 895*ccd979bdSMark Fasheh mlog(0, "ocfs2_allocate_extent: add branch. bh = %p\n", bh); 896*ccd979bdSMark Fasheh status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh, 897*ccd979bdSMark Fasheh meta_ac); 898*ccd979bdSMark Fasheh if (status < 0) { 899*ccd979bdSMark Fasheh mlog_errno(status); 900*ccd979bdSMark Fasheh goto bail; 901*ccd979bdSMark Fasheh } 902*ccd979bdSMark Fasheh 903*ccd979bdSMark Fasheh out_add: 904*ccd979bdSMark Fasheh /* Finally, we can add clusters. */ 905*ccd979bdSMark Fasheh status = ocfs2_do_insert_extent(osb, handle, inode, fe_bh, 906*ccd979bdSMark Fasheh start_blk, new_clusters); 907*ccd979bdSMark Fasheh if (status < 0) 908*ccd979bdSMark Fasheh mlog_errno(status); 909*ccd979bdSMark Fasheh 910*ccd979bdSMark Fasheh bail: 911*ccd979bdSMark Fasheh if (bh) 912*ccd979bdSMark Fasheh brelse(bh); 913*ccd979bdSMark Fasheh 914*ccd979bdSMark Fasheh if (last_eb_bh) 915*ccd979bdSMark Fasheh brelse(last_eb_bh); 916*ccd979bdSMark Fasheh 917*ccd979bdSMark Fasheh mlog_exit(status); 918*ccd979bdSMark Fasheh return status; 919*ccd979bdSMark Fasheh } 920*ccd979bdSMark Fasheh 921*ccd979bdSMark Fasheh static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) 922*ccd979bdSMark Fasheh { 923*ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 924*ccd979bdSMark Fasheh struct ocfs2_dinode *di; 925*ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 926*ccd979bdSMark Fasheh 927*ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 928*ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 929*ccd979bdSMark Fasheh 930*ccd979bdSMark Fasheh mlog_bug_on_msg(le16_to_cpu(tl->tl_used) > le16_to_cpu(tl->tl_count), 931*ccd979bdSMark Fasheh "slot %d, invalid truncate log parameters: used = " 932*ccd979bdSMark Fasheh "%u, count = %u\n", osb->slot_num, 933*ccd979bdSMark Fasheh le16_to_cpu(tl->tl_used), le16_to_cpu(tl->tl_count)); 934*ccd979bdSMark Fasheh return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count); 935*ccd979bdSMark Fasheh } 936*ccd979bdSMark Fasheh 937*ccd979bdSMark Fasheh static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl, 938*ccd979bdSMark Fasheh unsigned int new_start) 939*ccd979bdSMark Fasheh { 940*ccd979bdSMark Fasheh unsigned int tail_index; 941*ccd979bdSMark Fasheh unsigned int current_tail; 942*ccd979bdSMark Fasheh 943*ccd979bdSMark Fasheh /* No records, nothing to coalesce */ 944*ccd979bdSMark Fasheh if (!le16_to_cpu(tl->tl_used)) 945*ccd979bdSMark Fasheh return 0; 946*ccd979bdSMark Fasheh 947*ccd979bdSMark Fasheh tail_index = le16_to_cpu(tl->tl_used) - 1; 948*ccd979bdSMark Fasheh current_tail = le32_to_cpu(tl->tl_recs[tail_index].t_start); 949*ccd979bdSMark Fasheh current_tail += le32_to_cpu(tl->tl_recs[tail_index].t_clusters); 950*ccd979bdSMark Fasheh 951*ccd979bdSMark Fasheh return current_tail == new_start; 952*ccd979bdSMark Fasheh } 953*ccd979bdSMark Fasheh 954*ccd979bdSMark Fasheh static int ocfs2_truncate_log_append(struct ocfs2_super *osb, 955*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 956*ccd979bdSMark Fasheh u64 start_blk, 957*ccd979bdSMark Fasheh unsigned int num_clusters) 958*ccd979bdSMark Fasheh { 959*ccd979bdSMark Fasheh int status, index; 960*ccd979bdSMark Fasheh unsigned int start_cluster, tl_count; 961*ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 962*ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 963*ccd979bdSMark Fasheh struct ocfs2_dinode *di; 964*ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 965*ccd979bdSMark Fasheh 966*ccd979bdSMark Fasheh mlog_entry("start_blk = %"MLFu64", num_clusters = %u\n", start_blk, 967*ccd979bdSMark Fasheh num_clusters); 968*ccd979bdSMark Fasheh 969*ccd979bdSMark Fasheh BUG_ON(!down_trylock(&tl_inode->i_sem)); 970*ccd979bdSMark Fasheh 971*ccd979bdSMark Fasheh start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); 972*ccd979bdSMark Fasheh 973*ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 974*ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 975*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(di)) { 976*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); 977*ccd979bdSMark Fasheh status = -EIO; 978*ccd979bdSMark Fasheh goto bail; 979*ccd979bdSMark Fasheh } 980*ccd979bdSMark Fasheh 981*ccd979bdSMark Fasheh tl_count = le16_to_cpu(tl->tl_count); 982*ccd979bdSMark Fasheh mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) || 983*ccd979bdSMark Fasheh tl_count == 0, 984*ccd979bdSMark Fasheh "Truncate record count on #%"MLFu64" invalid (" 985*ccd979bdSMark Fasheh "wanted %u, actual %u\n", OCFS2_I(tl_inode)->ip_blkno, 986*ccd979bdSMark Fasheh ocfs2_truncate_recs_per_inode(osb->sb), 987*ccd979bdSMark Fasheh le16_to_cpu(tl->tl_count)); 988*ccd979bdSMark Fasheh 989*ccd979bdSMark Fasheh /* Caller should have known to flush before calling us. */ 990*ccd979bdSMark Fasheh index = le16_to_cpu(tl->tl_used); 991*ccd979bdSMark Fasheh if (index >= tl_count) { 992*ccd979bdSMark Fasheh status = -ENOSPC; 993*ccd979bdSMark Fasheh mlog_errno(status); 994*ccd979bdSMark Fasheh goto bail; 995*ccd979bdSMark Fasheh } 996*ccd979bdSMark Fasheh 997*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, tl_inode, tl_bh, 998*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 999*ccd979bdSMark Fasheh if (status < 0) { 1000*ccd979bdSMark Fasheh mlog_errno(status); 1001*ccd979bdSMark Fasheh goto bail; 1002*ccd979bdSMark Fasheh } 1003*ccd979bdSMark Fasheh 1004*ccd979bdSMark Fasheh mlog(0, "Log truncate of %u clusters starting at cluster %u to " 1005*ccd979bdSMark Fasheh "%"MLFu64" (index = %d)\n", num_clusters, start_cluster, 1006*ccd979bdSMark Fasheh OCFS2_I(tl_inode)->ip_blkno, index); 1007*ccd979bdSMark Fasheh 1008*ccd979bdSMark Fasheh if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) { 1009*ccd979bdSMark Fasheh /* 1010*ccd979bdSMark Fasheh * Move index back to the record we are coalescing with. 1011*ccd979bdSMark Fasheh * ocfs2_truncate_log_can_coalesce() guarantees nonzero 1012*ccd979bdSMark Fasheh */ 1013*ccd979bdSMark Fasheh index--; 1014*ccd979bdSMark Fasheh 1015*ccd979bdSMark Fasheh num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters); 1016*ccd979bdSMark Fasheh mlog(0, "Coalesce with index %u (start = %u, clusters = %u)\n", 1017*ccd979bdSMark Fasheh index, le32_to_cpu(tl->tl_recs[index].t_start), 1018*ccd979bdSMark Fasheh num_clusters); 1019*ccd979bdSMark Fasheh } else { 1020*ccd979bdSMark Fasheh tl->tl_recs[index].t_start = cpu_to_le32(start_cluster); 1021*ccd979bdSMark Fasheh tl->tl_used = cpu_to_le16(index + 1); 1022*ccd979bdSMark Fasheh } 1023*ccd979bdSMark Fasheh tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters); 1024*ccd979bdSMark Fasheh 1025*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, tl_bh); 1026*ccd979bdSMark Fasheh if (status < 0) { 1027*ccd979bdSMark Fasheh mlog_errno(status); 1028*ccd979bdSMark Fasheh goto bail; 1029*ccd979bdSMark Fasheh } 1030*ccd979bdSMark Fasheh 1031*ccd979bdSMark Fasheh bail: 1032*ccd979bdSMark Fasheh mlog_exit(status); 1033*ccd979bdSMark Fasheh return status; 1034*ccd979bdSMark Fasheh } 1035*ccd979bdSMark Fasheh 1036*ccd979bdSMark Fasheh static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, 1037*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 1038*ccd979bdSMark Fasheh struct inode *data_alloc_inode, 1039*ccd979bdSMark Fasheh struct buffer_head *data_alloc_bh) 1040*ccd979bdSMark Fasheh { 1041*ccd979bdSMark Fasheh int status = 0; 1042*ccd979bdSMark Fasheh int i; 1043*ccd979bdSMark Fasheh unsigned int num_clusters; 1044*ccd979bdSMark Fasheh u64 start_blk; 1045*ccd979bdSMark Fasheh struct ocfs2_truncate_rec rec; 1046*ccd979bdSMark Fasheh struct ocfs2_dinode *di; 1047*ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 1048*ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 1049*ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 1050*ccd979bdSMark Fasheh 1051*ccd979bdSMark Fasheh mlog_entry_void(); 1052*ccd979bdSMark Fasheh 1053*ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 1054*ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 1055*ccd979bdSMark Fasheh i = le16_to_cpu(tl->tl_used) - 1; 1056*ccd979bdSMark Fasheh while (i >= 0) { 1057*ccd979bdSMark Fasheh /* Caller has given us at least enough credits to 1058*ccd979bdSMark Fasheh * update the truncate log dinode */ 1059*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, tl_inode, tl_bh, 1060*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1061*ccd979bdSMark Fasheh if (status < 0) { 1062*ccd979bdSMark Fasheh mlog_errno(status); 1063*ccd979bdSMark Fasheh goto bail; 1064*ccd979bdSMark Fasheh } 1065*ccd979bdSMark Fasheh 1066*ccd979bdSMark Fasheh tl->tl_used = cpu_to_le16(i); 1067*ccd979bdSMark Fasheh 1068*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, tl_bh); 1069*ccd979bdSMark Fasheh if (status < 0) { 1070*ccd979bdSMark Fasheh mlog_errno(status); 1071*ccd979bdSMark Fasheh goto bail; 1072*ccd979bdSMark Fasheh } 1073*ccd979bdSMark Fasheh 1074*ccd979bdSMark Fasheh /* TODO: Perhaps we can calculate the bulk of the 1075*ccd979bdSMark Fasheh * credits up front rather than extending like 1076*ccd979bdSMark Fasheh * this. */ 1077*ccd979bdSMark Fasheh status = ocfs2_extend_trans(handle, 1078*ccd979bdSMark Fasheh OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); 1079*ccd979bdSMark Fasheh if (status < 0) { 1080*ccd979bdSMark Fasheh mlog_errno(status); 1081*ccd979bdSMark Fasheh goto bail; 1082*ccd979bdSMark Fasheh } 1083*ccd979bdSMark Fasheh 1084*ccd979bdSMark Fasheh rec = tl->tl_recs[i]; 1085*ccd979bdSMark Fasheh start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb, 1086*ccd979bdSMark Fasheh le32_to_cpu(rec.t_start)); 1087*ccd979bdSMark Fasheh num_clusters = le32_to_cpu(rec.t_clusters); 1088*ccd979bdSMark Fasheh 1089*ccd979bdSMark Fasheh /* if start_blk is not set, we ignore the record as 1090*ccd979bdSMark Fasheh * invalid. */ 1091*ccd979bdSMark Fasheh if (start_blk) { 1092*ccd979bdSMark Fasheh mlog(0, "free record %d, start = %u, clusters = %u\n", 1093*ccd979bdSMark Fasheh i, le32_to_cpu(rec.t_start), num_clusters); 1094*ccd979bdSMark Fasheh 1095*ccd979bdSMark Fasheh status = ocfs2_free_clusters(handle, data_alloc_inode, 1096*ccd979bdSMark Fasheh data_alloc_bh, start_blk, 1097*ccd979bdSMark Fasheh num_clusters); 1098*ccd979bdSMark Fasheh if (status < 0) { 1099*ccd979bdSMark Fasheh mlog_errno(status); 1100*ccd979bdSMark Fasheh goto bail; 1101*ccd979bdSMark Fasheh } 1102*ccd979bdSMark Fasheh } 1103*ccd979bdSMark Fasheh i--; 1104*ccd979bdSMark Fasheh } 1105*ccd979bdSMark Fasheh 1106*ccd979bdSMark Fasheh bail: 1107*ccd979bdSMark Fasheh mlog_exit(status); 1108*ccd979bdSMark Fasheh return status; 1109*ccd979bdSMark Fasheh } 1110*ccd979bdSMark Fasheh 1111*ccd979bdSMark Fasheh /* Expects you to already be holding tl_inode->i_sem */ 1112*ccd979bdSMark Fasheh static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) 1113*ccd979bdSMark Fasheh { 1114*ccd979bdSMark Fasheh int status; 1115*ccd979bdSMark Fasheh unsigned int num_to_flush; 1116*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle = NULL; 1117*ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 1118*ccd979bdSMark Fasheh struct inode *data_alloc_inode = NULL; 1119*ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 1120*ccd979bdSMark Fasheh struct buffer_head *data_alloc_bh = NULL; 1121*ccd979bdSMark Fasheh struct ocfs2_dinode *di; 1122*ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 1123*ccd979bdSMark Fasheh 1124*ccd979bdSMark Fasheh mlog_entry_void(); 1125*ccd979bdSMark Fasheh 1126*ccd979bdSMark Fasheh BUG_ON(!down_trylock(&tl_inode->i_sem)); 1127*ccd979bdSMark Fasheh 1128*ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 1129*ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 1130*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(di)) { 1131*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); 1132*ccd979bdSMark Fasheh status = -EIO; 1133*ccd979bdSMark Fasheh goto bail; 1134*ccd979bdSMark Fasheh } 1135*ccd979bdSMark Fasheh 1136*ccd979bdSMark Fasheh num_to_flush = le16_to_cpu(tl->tl_used); 1137*ccd979bdSMark Fasheh mlog(0, "Flush %u records from truncate log #%"MLFu64"\n", 1138*ccd979bdSMark Fasheh num_to_flush, OCFS2_I(tl_inode)->ip_blkno); 1139*ccd979bdSMark Fasheh if (!num_to_flush) { 1140*ccd979bdSMark Fasheh status = 0; 1141*ccd979bdSMark Fasheh goto bail; 1142*ccd979bdSMark Fasheh } 1143*ccd979bdSMark Fasheh 1144*ccd979bdSMark Fasheh handle = ocfs2_alloc_handle(osb); 1145*ccd979bdSMark Fasheh if (!handle) { 1146*ccd979bdSMark Fasheh status = -ENOMEM; 1147*ccd979bdSMark Fasheh mlog_errno(status); 1148*ccd979bdSMark Fasheh goto bail; 1149*ccd979bdSMark Fasheh } 1150*ccd979bdSMark Fasheh 1151*ccd979bdSMark Fasheh data_alloc_inode = ocfs2_get_system_file_inode(osb, 1152*ccd979bdSMark Fasheh GLOBAL_BITMAP_SYSTEM_INODE, 1153*ccd979bdSMark Fasheh OCFS2_INVALID_SLOT); 1154*ccd979bdSMark Fasheh if (!data_alloc_inode) { 1155*ccd979bdSMark Fasheh status = -EINVAL; 1156*ccd979bdSMark Fasheh mlog(ML_ERROR, "Could not get bitmap inode!\n"); 1157*ccd979bdSMark Fasheh goto bail; 1158*ccd979bdSMark Fasheh } 1159*ccd979bdSMark Fasheh 1160*ccd979bdSMark Fasheh ocfs2_handle_add_inode(handle, data_alloc_inode); 1161*ccd979bdSMark Fasheh status = ocfs2_meta_lock(data_alloc_inode, handle, &data_alloc_bh, 1); 1162*ccd979bdSMark Fasheh if (status < 0) { 1163*ccd979bdSMark Fasheh mlog_errno(status); 1164*ccd979bdSMark Fasheh goto bail; 1165*ccd979bdSMark Fasheh } 1166*ccd979bdSMark Fasheh 1167*ccd979bdSMark Fasheh handle = ocfs2_start_trans(osb, handle, OCFS2_TRUNCATE_LOG_UPDATE); 1168*ccd979bdSMark Fasheh if (IS_ERR(handle)) { 1169*ccd979bdSMark Fasheh status = PTR_ERR(handle); 1170*ccd979bdSMark Fasheh handle = NULL; 1171*ccd979bdSMark Fasheh mlog_errno(status); 1172*ccd979bdSMark Fasheh goto bail; 1173*ccd979bdSMark Fasheh } 1174*ccd979bdSMark Fasheh 1175*ccd979bdSMark Fasheh status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode, 1176*ccd979bdSMark Fasheh data_alloc_bh); 1177*ccd979bdSMark Fasheh if (status < 0) { 1178*ccd979bdSMark Fasheh mlog_errno(status); 1179*ccd979bdSMark Fasheh goto bail; 1180*ccd979bdSMark Fasheh } 1181*ccd979bdSMark Fasheh 1182*ccd979bdSMark Fasheh bail: 1183*ccd979bdSMark Fasheh if (handle) 1184*ccd979bdSMark Fasheh ocfs2_commit_trans(handle); 1185*ccd979bdSMark Fasheh 1186*ccd979bdSMark Fasheh if (data_alloc_inode) 1187*ccd979bdSMark Fasheh iput(data_alloc_inode); 1188*ccd979bdSMark Fasheh 1189*ccd979bdSMark Fasheh if (data_alloc_bh) 1190*ccd979bdSMark Fasheh brelse(data_alloc_bh); 1191*ccd979bdSMark Fasheh 1192*ccd979bdSMark Fasheh mlog_exit(status); 1193*ccd979bdSMark Fasheh return status; 1194*ccd979bdSMark Fasheh } 1195*ccd979bdSMark Fasheh 1196*ccd979bdSMark Fasheh int ocfs2_flush_truncate_log(struct ocfs2_super *osb) 1197*ccd979bdSMark Fasheh { 1198*ccd979bdSMark Fasheh int status; 1199*ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 1200*ccd979bdSMark Fasheh 1201*ccd979bdSMark Fasheh down(&tl_inode->i_sem); 1202*ccd979bdSMark Fasheh status = __ocfs2_flush_truncate_log(osb); 1203*ccd979bdSMark Fasheh up(&tl_inode->i_sem); 1204*ccd979bdSMark Fasheh 1205*ccd979bdSMark Fasheh return status; 1206*ccd979bdSMark Fasheh } 1207*ccd979bdSMark Fasheh 1208*ccd979bdSMark Fasheh static void ocfs2_truncate_log_worker(void *data) 1209*ccd979bdSMark Fasheh { 1210*ccd979bdSMark Fasheh int status; 1211*ccd979bdSMark Fasheh struct ocfs2_super *osb = data; 1212*ccd979bdSMark Fasheh 1213*ccd979bdSMark Fasheh mlog_entry_void(); 1214*ccd979bdSMark Fasheh 1215*ccd979bdSMark Fasheh status = ocfs2_flush_truncate_log(osb); 1216*ccd979bdSMark Fasheh if (status < 0) 1217*ccd979bdSMark Fasheh mlog_errno(status); 1218*ccd979bdSMark Fasheh 1219*ccd979bdSMark Fasheh mlog_exit(status); 1220*ccd979bdSMark Fasheh } 1221*ccd979bdSMark Fasheh 1222*ccd979bdSMark Fasheh #define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ) 1223*ccd979bdSMark Fasheh void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb, 1224*ccd979bdSMark Fasheh int cancel) 1225*ccd979bdSMark Fasheh { 1226*ccd979bdSMark Fasheh if (osb->osb_tl_inode) { 1227*ccd979bdSMark Fasheh /* We want to push off log flushes while truncates are 1228*ccd979bdSMark Fasheh * still running. */ 1229*ccd979bdSMark Fasheh if (cancel) 1230*ccd979bdSMark Fasheh cancel_delayed_work(&osb->osb_truncate_log_wq); 1231*ccd979bdSMark Fasheh 1232*ccd979bdSMark Fasheh queue_delayed_work(ocfs2_wq, &osb->osb_truncate_log_wq, 1233*ccd979bdSMark Fasheh OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL); 1234*ccd979bdSMark Fasheh } 1235*ccd979bdSMark Fasheh } 1236*ccd979bdSMark Fasheh 1237*ccd979bdSMark Fasheh static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, 1238*ccd979bdSMark Fasheh int slot_num, 1239*ccd979bdSMark Fasheh struct inode **tl_inode, 1240*ccd979bdSMark Fasheh struct buffer_head **tl_bh) 1241*ccd979bdSMark Fasheh { 1242*ccd979bdSMark Fasheh int status; 1243*ccd979bdSMark Fasheh struct inode *inode = NULL; 1244*ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 1245*ccd979bdSMark Fasheh 1246*ccd979bdSMark Fasheh inode = ocfs2_get_system_file_inode(osb, 1247*ccd979bdSMark Fasheh TRUNCATE_LOG_SYSTEM_INODE, 1248*ccd979bdSMark Fasheh slot_num); 1249*ccd979bdSMark Fasheh if (!inode) { 1250*ccd979bdSMark Fasheh status = -EINVAL; 1251*ccd979bdSMark Fasheh mlog(ML_ERROR, "Could not get load truncate log inode!\n"); 1252*ccd979bdSMark Fasheh goto bail; 1253*ccd979bdSMark Fasheh } 1254*ccd979bdSMark Fasheh 1255*ccd979bdSMark Fasheh status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, 1256*ccd979bdSMark Fasheh OCFS2_BH_CACHED, inode); 1257*ccd979bdSMark Fasheh if (status < 0) { 1258*ccd979bdSMark Fasheh iput(inode); 1259*ccd979bdSMark Fasheh mlog_errno(status); 1260*ccd979bdSMark Fasheh goto bail; 1261*ccd979bdSMark Fasheh } 1262*ccd979bdSMark Fasheh 1263*ccd979bdSMark Fasheh *tl_inode = inode; 1264*ccd979bdSMark Fasheh *tl_bh = bh; 1265*ccd979bdSMark Fasheh bail: 1266*ccd979bdSMark Fasheh mlog_exit(status); 1267*ccd979bdSMark Fasheh return status; 1268*ccd979bdSMark Fasheh } 1269*ccd979bdSMark Fasheh 1270*ccd979bdSMark Fasheh /* called during the 1st stage of node recovery. we stamp a clean 1271*ccd979bdSMark Fasheh * truncate log and pass back a copy for processing later. if the 1272*ccd979bdSMark Fasheh * truncate log does not require processing, a *tl_copy is set to 1273*ccd979bdSMark Fasheh * NULL. */ 1274*ccd979bdSMark Fasheh int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, 1275*ccd979bdSMark Fasheh int slot_num, 1276*ccd979bdSMark Fasheh struct ocfs2_dinode **tl_copy) 1277*ccd979bdSMark Fasheh { 1278*ccd979bdSMark Fasheh int status; 1279*ccd979bdSMark Fasheh struct inode *tl_inode = NULL; 1280*ccd979bdSMark Fasheh struct buffer_head *tl_bh = NULL; 1281*ccd979bdSMark Fasheh struct ocfs2_dinode *di; 1282*ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 1283*ccd979bdSMark Fasheh 1284*ccd979bdSMark Fasheh *tl_copy = NULL; 1285*ccd979bdSMark Fasheh 1286*ccd979bdSMark Fasheh mlog(0, "recover truncate log from slot %d\n", slot_num); 1287*ccd979bdSMark Fasheh 1288*ccd979bdSMark Fasheh status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh); 1289*ccd979bdSMark Fasheh if (status < 0) { 1290*ccd979bdSMark Fasheh mlog_errno(status); 1291*ccd979bdSMark Fasheh goto bail; 1292*ccd979bdSMark Fasheh } 1293*ccd979bdSMark Fasheh 1294*ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 1295*ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 1296*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(di)) { 1297*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di); 1298*ccd979bdSMark Fasheh status = -EIO; 1299*ccd979bdSMark Fasheh goto bail; 1300*ccd979bdSMark Fasheh } 1301*ccd979bdSMark Fasheh 1302*ccd979bdSMark Fasheh if (le16_to_cpu(tl->tl_used)) { 1303*ccd979bdSMark Fasheh mlog(0, "We'll have %u logs to recover\n", 1304*ccd979bdSMark Fasheh le16_to_cpu(tl->tl_used)); 1305*ccd979bdSMark Fasheh 1306*ccd979bdSMark Fasheh *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL); 1307*ccd979bdSMark Fasheh if (!(*tl_copy)) { 1308*ccd979bdSMark Fasheh status = -ENOMEM; 1309*ccd979bdSMark Fasheh mlog_errno(status); 1310*ccd979bdSMark Fasheh goto bail; 1311*ccd979bdSMark Fasheh } 1312*ccd979bdSMark Fasheh 1313*ccd979bdSMark Fasheh /* Assuming the write-out below goes well, this copy 1314*ccd979bdSMark Fasheh * will be passed back to recovery for processing. */ 1315*ccd979bdSMark Fasheh memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size); 1316*ccd979bdSMark Fasheh 1317*ccd979bdSMark Fasheh /* All we need to do to clear the truncate log is set 1318*ccd979bdSMark Fasheh * tl_used. */ 1319*ccd979bdSMark Fasheh tl->tl_used = 0; 1320*ccd979bdSMark Fasheh 1321*ccd979bdSMark Fasheh status = ocfs2_write_block(osb, tl_bh, tl_inode); 1322*ccd979bdSMark Fasheh if (status < 0) { 1323*ccd979bdSMark Fasheh mlog_errno(status); 1324*ccd979bdSMark Fasheh goto bail; 1325*ccd979bdSMark Fasheh } 1326*ccd979bdSMark Fasheh } 1327*ccd979bdSMark Fasheh 1328*ccd979bdSMark Fasheh bail: 1329*ccd979bdSMark Fasheh if (tl_inode) 1330*ccd979bdSMark Fasheh iput(tl_inode); 1331*ccd979bdSMark Fasheh if (tl_bh) 1332*ccd979bdSMark Fasheh brelse(tl_bh); 1333*ccd979bdSMark Fasheh 1334*ccd979bdSMark Fasheh if (status < 0 && (*tl_copy)) { 1335*ccd979bdSMark Fasheh kfree(*tl_copy); 1336*ccd979bdSMark Fasheh *tl_copy = NULL; 1337*ccd979bdSMark Fasheh } 1338*ccd979bdSMark Fasheh 1339*ccd979bdSMark Fasheh mlog_exit(status); 1340*ccd979bdSMark Fasheh return status; 1341*ccd979bdSMark Fasheh } 1342*ccd979bdSMark Fasheh 1343*ccd979bdSMark Fasheh int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, 1344*ccd979bdSMark Fasheh struct ocfs2_dinode *tl_copy) 1345*ccd979bdSMark Fasheh { 1346*ccd979bdSMark Fasheh int status = 0; 1347*ccd979bdSMark Fasheh int i; 1348*ccd979bdSMark Fasheh unsigned int clusters, num_recs, start_cluster; 1349*ccd979bdSMark Fasheh u64 start_blk; 1350*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle; 1351*ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 1352*ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 1353*ccd979bdSMark Fasheh 1354*ccd979bdSMark Fasheh mlog_entry_void(); 1355*ccd979bdSMark Fasheh 1356*ccd979bdSMark Fasheh if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) { 1357*ccd979bdSMark Fasheh mlog(ML_ERROR, "Asked to recover my own truncate log!\n"); 1358*ccd979bdSMark Fasheh return -EINVAL; 1359*ccd979bdSMark Fasheh } 1360*ccd979bdSMark Fasheh 1361*ccd979bdSMark Fasheh tl = &tl_copy->id2.i_dealloc; 1362*ccd979bdSMark Fasheh num_recs = le16_to_cpu(tl->tl_used); 1363*ccd979bdSMark Fasheh mlog(0, "cleanup %u records from %"MLFu64"\n", num_recs, 1364*ccd979bdSMark Fasheh tl_copy->i_blkno); 1365*ccd979bdSMark Fasheh 1366*ccd979bdSMark Fasheh down(&tl_inode->i_sem); 1367*ccd979bdSMark Fasheh for(i = 0; i < num_recs; i++) { 1368*ccd979bdSMark Fasheh if (ocfs2_truncate_log_needs_flush(osb)) { 1369*ccd979bdSMark Fasheh status = __ocfs2_flush_truncate_log(osb); 1370*ccd979bdSMark Fasheh if (status < 0) { 1371*ccd979bdSMark Fasheh mlog_errno(status); 1372*ccd979bdSMark Fasheh goto bail_up; 1373*ccd979bdSMark Fasheh } 1374*ccd979bdSMark Fasheh } 1375*ccd979bdSMark Fasheh 1376*ccd979bdSMark Fasheh handle = ocfs2_start_trans(osb, NULL, 1377*ccd979bdSMark Fasheh OCFS2_TRUNCATE_LOG_UPDATE); 1378*ccd979bdSMark Fasheh if (IS_ERR(handle)) { 1379*ccd979bdSMark Fasheh status = PTR_ERR(handle); 1380*ccd979bdSMark Fasheh mlog_errno(status); 1381*ccd979bdSMark Fasheh goto bail_up; 1382*ccd979bdSMark Fasheh } 1383*ccd979bdSMark Fasheh 1384*ccd979bdSMark Fasheh clusters = le32_to_cpu(tl->tl_recs[i].t_clusters); 1385*ccd979bdSMark Fasheh start_cluster = le32_to_cpu(tl->tl_recs[i].t_start); 1386*ccd979bdSMark Fasheh start_blk = ocfs2_clusters_to_blocks(osb->sb, start_cluster); 1387*ccd979bdSMark Fasheh 1388*ccd979bdSMark Fasheh status = ocfs2_truncate_log_append(osb, handle, 1389*ccd979bdSMark Fasheh start_blk, clusters); 1390*ccd979bdSMark Fasheh ocfs2_commit_trans(handle); 1391*ccd979bdSMark Fasheh if (status < 0) { 1392*ccd979bdSMark Fasheh mlog_errno(status); 1393*ccd979bdSMark Fasheh goto bail_up; 1394*ccd979bdSMark Fasheh } 1395*ccd979bdSMark Fasheh } 1396*ccd979bdSMark Fasheh 1397*ccd979bdSMark Fasheh bail_up: 1398*ccd979bdSMark Fasheh up(&tl_inode->i_sem); 1399*ccd979bdSMark Fasheh 1400*ccd979bdSMark Fasheh mlog_exit(status); 1401*ccd979bdSMark Fasheh return status; 1402*ccd979bdSMark Fasheh } 1403*ccd979bdSMark Fasheh 1404*ccd979bdSMark Fasheh void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb) 1405*ccd979bdSMark Fasheh { 1406*ccd979bdSMark Fasheh int status; 1407*ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 1408*ccd979bdSMark Fasheh 1409*ccd979bdSMark Fasheh mlog_entry_void(); 1410*ccd979bdSMark Fasheh 1411*ccd979bdSMark Fasheh if (tl_inode) { 1412*ccd979bdSMark Fasheh cancel_delayed_work(&osb->osb_truncate_log_wq); 1413*ccd979bdSMark Fasheh flush_workqueue(ocfs2_wq); 1414*ccd979bdSMark Fasheh 1415*ccd979bdSMark Fasheh status = ocfs2_flush_truncate_log(osb); 1416*ccd979bdSMark Fasheh if (status < 0) 1417*ccd979bdSMark Fasheh mlog_errno(status); 1418*ccd979bdSMark Fasheh 1419*ccd979bdSMark Fasheh brelse(osb->osb_tl_bh); 1420*ccd979bdSMark Fasheh iput(osb->osb_tl_inode); 1421*ccd979bdSMark Fasheh } 1422*ccd979bdSMark Fasheh 1423*ccd979bdSMark Fasheh mlog_exit_void(); 1424*ccd979bdSMark Fasheh } 1425*ccd979bdSMark Fasheh 1426*ccd979bdSMark Fasheh int ocfs2_truncate_log_init(struct ocfs2_super *osb) 1427*ccd979bdSMark Fasheh { 1428*ccd979bdSMark Fasheh int status; 1429*ccd979bdSMark Fasheh struct inode *tl_inode = NULL; 1430*ccd979bdSMark Fasheh struct buffer_head *tl_bh = NULL; 1431*ccd979bdSMark Fasheh 1432*ccd979bdSMark Fasheh mlog_entry_void(); 1433*ccd979bdSMark Fasheh 1434*ccd979bdSMark Fasheh status = ocfs2_get_truncate_log_info(osb, 1435*ccd979bdSMark Fasheh osb->slot_num, 1436*ccd979bdSMark Fasheh &tl_inode, 1437*ccd979bdSMark Fasheh &tl_bh); 1438*ccd979bdSMark Fasheh if (status < 0) 1439*ccd979bdSMark Fasheh mlog_errno(status); 1440*ccd979bdSMark Fasheh 1441*ccd979bdSMark Fasheh /* ocfs2_truncate_log_shutdown keys on the existence of 1442*ccd979bdSMark Fasheh * osb->osb_tl_inode so we don't set any of the osb variables 1443*ccd979bdSMark Fasheh * until we're sure all is well. */ 1444*ccd979bdSMark Fasheh INIT_WORK(&osb->osb_truncate_log_wq, ocfs2_truncate_log_worker, osb); 1445*ccd979bdSMark Fasheh osb->osb_tl_bh = tl_bh; 1446*ccd979bdSMark Fasheh osb->osb_tl_inode = tl_inode; 1447*ccd979bdSMark Fasheh 1448*ccd979bdSMark Fasheh mlog_exit(status); 1449*ccd979bdSMark Fasheh return status; 1450*ccd979bdSMark Fasheh } 1451*ccd979bdSMark Fasheh 1452*ccd979bdSMark Fasheh /* This function will figure out whether the currently last extent 1453*ccd979bdSMark Fasheh * block will be deleted, and if it will, what the new last extent 1454*ccd979bdSMark Fasheh * block will be so we can update his h_next_leaf_blk field, as well 1455*ccd979bdSMark Fasheh * as the dinodes i_last_eb_blk */ 1456*ccd979bdSMark Fasheh static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb, 1457*ccd979bdSMark Fasheh struct inode *inode, 1458*ccd979bdSMark Fasheh struct ocfs2_dinode *fe, 1459*ccd979bdSMark Fasheh u32 new_i_clusters, 1460*ccd979bdSMark Fasheh struct buffer_head *old_last_eb, 1461*ccd979bdSMark Fasheh struct buffer_head **new_last_eb) 1462*ccd979bdSMark Fasheh { 1463*ccd979bdSMark Fasheh int i, status = 0; 1464*ccd979bdSMark Fasheh u64 block = 0; 1465*ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 1466*ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 1467*ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 1468*ccd979bdSMark Fasheh 1469*ccd979bdSMark Fasheh *new_last_eb = NULL; 1470*ccd979bdSMark Fasheh 1471*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(fe)) { 1472*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 1473*ccd979bdSMark Fasheh status = -EIO; 1474*ccd979bdSMark Fasheh goto bail; 1475*ccd979bdSMark Fasheh } 1476*ccd979bdSMark Fasheh 1477*ccd979bdSMark Fasheh /* we have no tree, so of course, no last_eb. */ 1478*ccd979bdSMark Fasheh if (!fe->id2.i_list.l_tree_depth) 1479*ccd979bdSMark Fasheh goto bail; 1480*ccd979bdSMark Fasheh 1481*ccd979bdSMark Fasheh /* trunc to zero special case - this makes tree_depth = 0 1482*ccd979bdSMark Fasheh * regardless of what it is. */ 1483*ccd979bdSMark Fasheh if (!new_i_clusters) 1484*ccd979bdSMark Fasheh goto bail; 1485*ccd979bdSMark Fasheh 1486*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) old_last_eb->b_data; 1487*ccd979bdSMark Fasheh el = &(eb->h_list); 1488*ccd979bdSMark Fasheh BUG_ON(!el->l_next_free_rec); 1489*ccd979bdSMark Fasheh 1490*ccd979bdSMark Fasheh /* Make sure that this guy will actually be empty after we 1491*ccd979bdSMark Fasheh * clear away the data. */ 1492*ccd979bdSMark Fasheh if (le32_to_cpu(el->l_recs[0].e_cpos) < new_i_clusters) 1493*ccd979bdSMark Fasheh goto bail; 1494*ccd979bdSMark Fasheh 1495*ccd979bdSMark Fasheh /* Ok, at this point, we know that last_eb will definitely 1496*ccd979bdSMark Fasheh * change, so lets traverse the tree and find the second to 1497*ccd979bdSMark Fasheh * last extent block. */ 1498*ccd979bdSMark Fasheh el = &(fe->id2.i_list); 1499*ccd979bdSMark Fasheh /* go down the tree, */ 1500*ccd979bdSMark Fasheh do { 1501*ccd979bdSMark Fasheh for(i = (le16_to_cpu(el->l_next_free_rec) - 1); i >= 0; i--) { 1502*ccd979bdSMark Fasheh if (le32_to_cpu(el->l_recs[i].e_cpos) < 1503*ccd979bdSMark Fasheh new_i_clusters) { 1504*ccd979bdSMark Fasheh block = le64_to_cpu(el->l_recs[i].e_blkno); 1505*ccd979bdSMark Fasheh break; 1506*ccd979bdSMark Fasheh } 1507*ccd979bdSMark Fasheh } 1508*ccd979bdSMark Fasheh BUG_ON(i < 0); 1509*ccd979bdSMark Fasheh 1510*ccd979bdSMark Fasheh if (bh) { 1511*ccd979bdSMark Fasheh brelse(bh); 1512*ccd979bdSMark Fasheh bh = NULL; 1513*ccd979bdSMark Fasheh } 1514*ccd979bdSMark Fasheh 1515*ccd979bdSMark Fasheh status = ocfs2_read_block(osb, block, &bh, OCFS2_BH_CACHED, 1516*ccd979bdSMark Fasheh inode); 1517*ccd979bdSMark Fasheh if (status < 0) { 1518*ccd979bdSMark Fasheh mlog_errno(status); 1519*ccd979bdSMark Fasheh goto bail; 1520*ccd979bdSMark Fasheh } 1521*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 1522*ccd979bdSMark Fasheh el = &eb->h_list; 1523*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1524*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1525*ccd979bdSMark Fasheh status = -EIO; 1526*ccd979bdSMark Fasheh goto bail; 1527*ccd979bdSMark Fasheh } 1528*ccd979bdSMark Fasheh } while (el->l_tree_depth); 1529*ccd979bdSMark Fasheh 1530*ccd979bdSMark Fasheh *new_last_eb = bh; 1531*ccd979bdSMark Fasheh get_bh(*new_last_eb); 1532*ccd979bdSMark Fasheh mlog(0, "returning block %"MLFu64"\n", le64_to_cpu(eb->h_blkno)); 1533*ccd979bdSMark Fasheh bail: 1534*ccd979bdSMark Fasheh if (bh) 1535*ccd979bdSMark Fasheh brelse(bh); 1536*ccd979bdSMark Fasheh 1537*ccd979bdSMark Fasheh return status; 1538*ccd979bdSMark Fasheh } 1539*ccd979bdSMark Fasheh 1540*ccd979bdSMark Fasheh static int ocfs2_do_truncate(struct ocfs2_super *osb, 1541*ccd979bdSMark Fasheh unsigned int clusters_to_del, 1542*ccd979bdSMark Fasheh struct inode *inode, 1543*ccd979bdSMark Fasheh struct buffer_head *fe_bh, 1544*ccd979bdSMark Fasheh struct buffer_head *old_last_eb_bh, 1545*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 1546*ccd979bdSMark Fasheh struct ocfs2_truncate_context *tc) 1547*ccd979bdSMark Fasheh { 1548*ccd979bdSMark Fasheh int status, i, depth; 1549*ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 1550*ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 1551*ccd979bdSMark Fasheh struct ocfs2_extent_block *last_eb = NULL; 1552*ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 1553*ccd979bdSMark Fasheh struct buffer_head *eb_bh = NULL; 1554*ccd979bdSMark Fasheh struct buffer_head *last_eb_bh = NULL; 1555*ccd979bdSMark Fasheh u64 next_eb = 0; 1556*ccd979bdSMark Fasheh u64 delete_blk = 0; 1557*ccd979bdSMark Fasheh 1558*ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 1559*ccd979bdSMark Fasheh 1560*ccd979bdSMark Fasheh status = ocfs2_find_new_last_ext_blk(osb, 1561*ccd979bdSMark Fasheh inode, 1562*ccd979bdSMark Fasheh fe, 1563*ccd979bdSMark Fasheh le32_to_cpu(fe->i_clusters) - 1564*ccd979bdSMark Fasheh clusters_to_del, 1565*ccd979bdSMark Fasheh old_last_eb_bh, 1566*ccd979bdSMark Fasheh &last_eb_bh); 1567*ccd979bdSMark Fasheh if (status < 0) { 1568*ccd979bdSMark Fasheh mlog_errno(status); 1569*ccd979bdSMark Fasheh goto bail; 1570*ccd979bdSMark Fasheh } 1571*ccd979bdSMark Fasheh if (last_eb_bh) 1572*ccd979bdSMark Fasheh last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 1573*ccd979bdSMark Fasheh 1574*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, fe_bh, 1575*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1576*ccd979bdSMark Fasheh if (status < 0) { 1577*ccd979bdSMark Fasheh mlog_errno(status); 1578*ccd979bdSMark Fasheh goto bail; 1579*ccd979bdSMark Fasheh } 1580*ccd979bdSMark Fasheh el = &(fe->id2.i_list); 1581*ccd979bdSMark Fasheh 1582*ccd979bdSMark Fasheh spin_lock(&OCFS2_I(inode)->ip_lock); 1583*ccd979bdSMark Fasheh OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - 1584*ccd979bdSMark Fasheh clusters_to_del; 1585*ccd979bdSMark Fasheh spin_unlock(&OCFS2_I(inode)->ip_lock); 1586*ccd979bdSMark Fasheh le32_add_cpu(&fe->i_clusters, -clusters_to_del); 1587*ccd979bdSMark Fasheh fe->i_mtime = cpu_to_le64(CURRENT_TIME.tv_sec); 1588*ccd979bdSMark Fasheh fe->i_mtime_nsec = cpu_to_le32(CURRENT_TIME.tv_nsec); 1589*ccd979bdSMark Fasheh 1590*ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 1591*ccd979bdSMark Fasheh 1592*ccd979bdSMark Fasheh BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del); 1593*ccd979bdSMark Fasheh le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del); 1594*ccd979bdSMark Fasheh /* tree depth zero, we can just delete the clusters, otherwise 1595*ccd979bdSMark Fasheh * we need to record the offset of the next level extent block 1596*ccd979bdSMark Fasheh * as we may overwrite it. */ 1597*ccd979bdSMark Fasheh if (!el->l_tree_depth) 1598*ccd979bdSMark Fasheh delete_blk = le64_to_cpu(el->l_recs[i].e_blkno) 1599*ccd979bdSMark Fasheh + ocfs2_clusters_to_blocks(osb->sb, 1600*ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_clusters)); 1601*ccd979bdSMark Fasheh else 1602*ccd979bdSMark Fasheh next_eb = le64_to_cpu(el->l_recs[i].e_blkno); 1603*ccd979bdSMark Fasheh 1604*ccd979bdSMark Fasheh if (!el->l_recs[i].e_clusters) { 1605*ccd979bdSMark Fasheh /* if we deleted the whole extent record, then clear 1606*ccd979bdSMark Fasheh * out the other fields and update the extent 1607*ccd979bdSMark Fasheh * list. For depth > 0 trees, we've already recorded 1608*ccd979bdSMark Fasheh * the extent block in 'next_eb' */ 1609*ccd979bdSMark Fasheh el->l_recs[i].e_cpos = 0; 1610*ccd979bdSMark Fasheh el->l_recs[i].e_blkno = 0; 1611*ccd979bdSMark Fasheh BUG_ON(!el->l_next_free_rec); 1612*ccd979bdSMark Fasheh le16_add_cpu(&el->l_next_free_rec, -1); 1613*ccd979bdSMark Fasheh } 1614*ccd979bdSMark Fasheh 1615*ccd979bdSMark Fasheh depth = le16_to_cpu(el->l_tree_depth); 1616*ccd979bdSMark Fasheh if (!fe->i_clusters) { 1617*ccd979bdSMark Fasheh /* trunc to zero is a special case. */ 1618*ccd979bdSMark Fasheh el->l_tree_depth = 0; 1619*ccd979bdSMark Fasheh fe->i_last_eb_blk = 0; 1620*ccd979bdSMark Fasheh } else if (last_eb) 1621*ccd979bdSMark Fasheh fe->i_last_eb_blk = last_eb->h_blkno; 1622*ccd979bdSMark Fasheh 1623*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, fe_bh); 1624*ccd979bdSMark Fasheh if (status < 0) { 1625*ccd979bdSMark Fasheh mlog_errno(status); 1626*ccd979bdSMark Fasheh goto bail; 1627*ccd979bdSMark Fasheh } 1628*ccd979bdSMark Fasheh 1629*ccd979bdSMark Fasheh if (last_eb) { 1630*ccd979bdSMark Fasheh /* If there will be a new last extent block, then by 1631*ccd979bdSMark Fasheh * definition, there cannot be any leaves to the right of 1632*ccd979bdSMark Fasheh * him. */ 1633*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, last_eb_bh, 1634*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1635*ccd979bdSMark Fasheh if (status < 0) { 1636*ccd979bdSMark Fasheh mlog_errno(status); 1637*ccd979bdSMark Fasheh goto bail; 1638*ccd979bdSMark Fasheh } 1639*ccd979bdSMark Fasheh last_eb->h_next_leaf_blk = 0; 1640*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, last_eb_bh); 1641*ccd979bdSMark Fasheh if (status < 0) { 1642*ccd979bdSMark Fasheh mlog_errno(status); 1643*ccd979bdSMark Fasheh goto bail; 1644*ccd979bdSMark Fasheh } 1645*ccd979bdSMark Fasheh } 1646*ccd979bdSMark Fasheh 1647*ccd979bdSMark Fasheh /* if our tree depth > 0, update all the tree blocks below us. */ 1648*ccd979bdSMark Fasheh while (depth) { 1649*ccd979bdSMark Fasheh mlog(0, "traveling tree (depth = %d, next_eb = %"MLFu64")\n", 1650*ccd979bdSMark Fasheh depth, next_eb); 1651*ccd979bdSMark Fasheh status = ocfs2_read_block(osb, next_eb, &eb_bh, 1652*ccd979bdSMark Fasheh OCFS2_BH_CACHED, inode); 1653*ccd979bdSMark Fasheh if (status < 0) { 1654*ccd979bdSMark Fasheh mlog_errno(status); 1655*ccd979bdSMark Fasheh goto bail; 1656*ccd979bdSMark Fasheh } 1657*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *)eb_bh->b_data; 1658*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1659*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1660*ccd979bdSMark Fasheh status = -EIO; 1661*ccd979bdSMark Fasheh goto bail; 1662*ccd979bdSMark Fasheh } 1663*ccd979bdSMark Fasheh el = &(eb->h_list); 1664*ccd979bdSMark Fasheh 1665*ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, eb_bh, 1666*ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1667*ccd979bdSMark Fasheh if (status < 0) { 1668*ccd979bdSMark Fasheh mlog_errno(status); 1669*ccd979bdSMark Fasheh goto bail; 1670*ccd979bdSMark Fasheh } 1671*ccd979bdSMark Fasheh 1672*ccd979bdSMark Fasheh BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); 1673*ccd979bdSMark Fasheh BUG_ON(depth != (le16_to_cpu(el->l_tree_depth) + 1)); 1674*ccd979bdSMark Fasheh 1675*ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 1676*ccd979bdSMark Fasheh 1677*ccd979bdSMark Fasheh mlog(0, "extent block %"MLFu64", before: record %d: " 1678*ccd979bdSMark Fasheh "(%u, %u, %"MLFu64"), next = %u\n", 1679*ccd979bdSMark Fasheh le64_to_cpu(eb->h_blkno), i, 1680*ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_cpos), 1681*ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_clusters), 1682*ccd979bdSMark Fasheh le64_to_cpu(el->l_recs[i].e_blkno), 1683*ccd979bdSMark Fasheh le16_to_cpu(el->l_next_free_rec)); 1684*ccd979bdSMark Fasheh 1685*ccd979bdSMark Fasheh BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del); 1686*ccd979bdSMark Fasheh le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del); 1687*ccd979bdSMark Fasheh 1688*ccd979bdSMark Fasheh next_eb = le64_to_cpu(el->l_recs[i].e_blkno); 1689*ccd979bdSMark Fasheh /* bottom-most block requires us to delete data.*/ 1690*ccd979bdSMark Fasheh if (!el->l_tree_depth) 1691*ccd979bdSMark Fasheh delete_blk = le64_to_cpu(el->l_recs[i].e_blkno) 1692*ccd979bdSMark Fasheh + ocfs2_clusters_to_blocks(osb->sb, 1693*ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_clusters)); 1694*ccd979bdSMark Fasheh if (!el->l_recs[i].e_clusters) { 1695*ccd979bdSMark Fasheh el->l_recs[i].e_cpos = 0; 1696*ccd979bdSMark Fasheh el->l_recs[i].e_blkno = 0; 1697*ccd979bdSMark Fasheh BUG_ON(!el->l_next_free_rec); 1698*ccd979bdSMark Fasheh le16_add_cpu(&el->l_next_free_rec, -1); 1699*ccd979bdSMark Fasheh } 1700*ccd979bdSMark Fasheh mlog(0, "extent block %"MLFu64", after: record %d: " 1701*ccd979bdSMark Fasheh "(%u, %u, %"MLFu64"), next = %u\n", 1702*ccd979bdSMark Fasheh le64_to_cpu(eb->h_blkno), i, 1703*ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_cpos), 1704*ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_clusters), 1705*ccd979bdSMark Fasheh le64_to_cpu(el->l_recs[i].e_blkno), 1706*ccd979bdSMark Fasheh le16_to_cpu(el->l_next_free_rec)); 1707*ccd979bdSMark Fasheh 1708*ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, eb_bh); 1709*ccd979bdSMark Fasheh if (status < 0) { 1710*ccd979bdSMark Fasheh mlog_errno(status); 1711*ccd979bdSMark Fasheh goto bail; 1712*ccd979bdSMark Fasheh } 1713*ccd979bdSMark Fasheh 1714*ccd979bdSMark Fasheh if (!el->l_next_free_rec) { 1715*ccd979bdSMark Fasheh mlog(0, "deleting this extent block.\n"); 1716*ccd979bdSMark Fasheh 1717*ccd979bdSMark Fasheh ocfs2_remove_from_cache(inode, eb_bh); 1718*ccd979bdSMark Fasheh 1719*ccd979bdSMark Fasheh BUG_ON(eb->h_suballoc_slot); 1720*ccd979bdSMark Fasheh BUG_ON(el->l_recs[0].e_clusters); 1721*ccd979bdSMark Fasheh BUG_ON(el->l_recs[0].e_cpos); 1722*ccd979bdSMark Fasheh BUG_ON(el->l_recs[0].e_blkno); 1723*ccd979bdSMark Fasheh status = ocfs2_free_extent_block(handle, 1724*ccd979bdSMark Fasheh tc->tc_ext_alloc_inode, 1725*ccd979bdSMark Fasheh tc->tc_ext_alloc_bh, 1726*ccd979bdSMark Fasheh eb); 1727*ccd979bdSMark Fasheh if (status < 0) { 1728*ccd979bdSMark Fasheh mlog_errno(status); 1729*ccd979bdSMark Fasheh goto bail; 1730*ccd979bdSMark Fasheh } 1731*ccd979bdSMark Fasheh } 1732*ccd979bdSMark Fasheh brelse(eb_bh); 1733*ccd979bdSMark Fasheh eb_bh = NULL; 1734*ccd979bdSMark Fasheh depth--; 1735*ccd979bdSMark Fasheh } 1736*ccd979bdSMark Fasheh 1737*ccd979bdSMark Fasheh BUG_ON(!delete_blk); 1738*ccd979bdSMark Fasheh status = ocfs2_truncate_log_append(osb, handle, delete_blk, 1739*ccd979bdSMark Fasheh clusters_to_del); 1740*ccd979bdSMark Fasheh if (status < 0) { 1741*ccd979bdSMark Fasheh mlog_errno(status); 1742*ccd979bdSMark Fasheh goto bail; 1743*ccd979bdSMark Fasheh } 1744*ccd979bdSMark Fasheh status = 0; 1745*ccd979bdSMark Fasheh bail: 1746*ccd979bdSMark Fasheh if (!status) 1747*ccd979bdSMark Fasheh ocfs2_extent_map_trunc(inode, le32_to_cpu(fe->i_clusters)); 1748*ccd979bdSMark Fasheh else 1749*ccd979bdSMark Fasheh ocfs2_extent_map_drop(inode, 0); 1750*ccd979bdSMark Fasheh mlog_exit(status); 1751*ccd979bdSMark Fasheh return status; 1752*ccd979bdSMark Fasheh } 1753*ccd979bdSMark Fasheh 1754*ccd979bdSMark Fasheh /* 1755*ccd979bdSMark Fasheh * It is expected, that by the time you call this function, 1756*ccd979bdSMark Fasheh * inode->i_size and fe->i_size have been adjusted. 1757*ccd979bdSMark Fasheh * 1758*ccd979bdSMark Fasheh * WARNING: This will kfree the truncate context 1759*ccd979bdSMark Fasheh */ 1760*ccd979bdSMark Fasheh int ocfs2_commit_truncate(struct ocfs2_super *osb, 1761*ccd979bdSMark Fasheh struct inode *inode, 1762*ccd979bdSMark Fasheh struct buffer_head *fe_bh, 1763*ccd979bdSMark Fasheh struct ocfs2_truncate_context *tc) 1764*ccd979bdSMark Fasheh { 1765*ccd979bdSMark Fasheh int status, i, credits, tl_sem = 0; 1766*ccd979bdSMark Fasheh u32 clusters_to_del, target_i_clusters; 1767*ccd979bdSMark Fasheh u64 last_eb = 0; 1768*ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 1769*ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 1770*ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 1771*ccd979bdSMark Fasheh struct buffer_head *last_eb_bh; 1772*ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle = NULL; 1773*ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 1774*ccd979bdSMark Fasheh 1775*ccd979bdSMark Fasheh mlog_entry_void(); 1776*ccd979bdSMark Fasheh 1777*ccd979bdSMark Fasheh down_write(&OCFS2_I(inode)->ip_alloc_sem); 1778*ccd979bdSMark Fasheh 1779*ccd979bdSMark Fasheh target_i_clusters = ocfs2_clusters_for_bytes(osb->sb, 1780*ccd979bdSMark Fasheh i_size_read(inode)); 1781*ccd979bdSMark Fasheh 1782*ccd979bdSMark Fasheh last_eb_bh = tc->tc_last_eb_bh; 1783*ccd979bdSMark Fasheh tc->tc_last_eb_bh = NULL; 1784*ccd979bdSMark Fasheh 1785*ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 1786*ccd979bdSMark Fasheh 1787*ccd979bdSMark Fasheh if (fe->id2.i_list.l_tree_depth) { 1788*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 1789*ccd979bdSMark Fasheh el = &eb->h_list; 1790*ccd979bdSMark Fasheh } else 1791*ccd979bdSMark Fasheh el = &fe->id2.i_list; 1792*ccd979bdSMark Fasheh last_eb = le64_to_cpu(fe->i_last_eb_blk); 1793*ccd979bdSMark Fasheh start: 1794*ccd979bdSMark Fasheh mlog(0, "ocfs2_commit_truncate: fe->i_clusters = %u, " 1795*ccd979bdSMark Fasheh "last_eb = %"MLFu64", fe->i_last_eb_blk = %"MLFu64", " 1796*ccd979bdSMark Fasheh "fe->id2.i_list.l_tree_depth = %u last_eb_bh = %p\n", 1797*ccd979bdSMark Fasheh le32_to_cpu(fe->i_clusters), last_eb, 1798*ccd979bdSMark Fasheh le64_to_cpu(fe->i_last_eb_blk), 1799*ccd979bdSMark Fasheh le16_to_cpu(fe->id2.i_list.l_tree_depth), last_eb_bh); 1800*ccd979bdSMark Fasheh 1801*ccd979bdSMark Fasheh if (last_eb != le64_to_cpu(fe->i_last_eb_blk)) { 1802*ccd979bdSMark Fasheh mlog(0, "last_eb changed!\n"); 1803*ccd979bdSMark Fasheh BUG_ON(!fe->id2.i_list.l_tree_depth); 1804*ccd979bdSMark Fasheh last_eb = le64_to_cpu(fe->i_last_eb_blk); 1805*ccd979bdSMark Fasheh /* i_last_eb_blk may have changed, read it if 1806*ccd979bdSMark Fasheh * necessary. We don't have to worry about the 1807*ccd979bdSMark Fasheh * truncate to zero case here (where there becomes no 1808*ccd979bdSMark Fasheh * last_eb) because we never loop back after our work 1809*ccd979bdSMark Fasheh * is done. */ 1810*ccd979bdSMark Fasheh if (last_eb_bh) { 1811*ccd979bdSMark Fasheh brelse(last_eb_bh); 1812*ccd979bdSMark Fasheh last_eb_bh = NULL; 1813*ccd979bdSMark Fasheh } 1814*ccd979bdSMark Fasheh 1815*ccd979bdSMark Fasheh status = ocfs2_read_block(osb, last_eb, 1816*ccd979bdSMark Fasheh &last_eb_bh, OCFS2_BH_CACHED, 1817*ccd979bdSMark Fasheh inode); 1818*ccd979bdSMark Fasheh if (status < 0) { 1819*ccd979bdSMark Fasheh mlog_errno(status); 1820*ccd979bdSMark Fasheh goto bail; 1821*ccd979bdSMark Fasheh } 1822*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 1823*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1824*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1825*ccd979bdSMark Fasheh status = -EIO; 1826*ccd979bdSMark Fasheh goto bail; 1827*ccd979bdSMark Fasheh } 1828*ccd979bdSMark Fasheh el = &(eb->h_list); 1829*ccd979bdSMark Fasheh } 1830*ccd979bdSMark Fasheh 1831*ccd979bdSMark Fasheh /* by now, el will point to the extent list on the bottom most 1832*ccd979bdSMark Fasheh * portion of this tree. */ 1833*ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 1834*ccd979bdSMark Fasheh if (le32_to_cpu(el->l_recs[i].e_cpos) >= target_i_clusters) 1835*ccd979bdSMark Fasheh clusters_to_del = le32_to_cpu(el->l_recs[i].e_clusters); 1836*ccd979bdSMark Fasheh else 1837*ccd979bdSMark Fasheh clusters_to_del = (le32_to_cpu(el->l_recs[i].e_clusters) + 1838*ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_cpos)) - 1839*ccd979bdSMark Fasheh target_i_clusters; 1840*ccd979bdSMark Fasheh 1841*ccd979bdSMark Fasheh mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del); 1842*ccd979bdSMark Fasheh 1843*ccd979bdSMark Fasheh down(&tl_inode->i_sem); 1844*ccd979bdSMark Fasheh tl_sem = 1; 1845*ccd979bdSMark Fasheh /* ocfs2_truncate_log_needs_flush guarantees us at least one 1846*ccd979bdSMark Fasheh * record is free for use. If there isn't any, we flush to get 1847*ccd979bdSMark Fasheh * an empty truncate log. */ 1848*ccd979bdSMark Fasheh if (ocfs2_truncate_log_needs_flush(osb)) { 1849*ccd979bdSMark Fasheh status = __ocfs2_flush_truncate_log(osb); 1850*ccd979bdSMark Fasheh if (status < 0) { 1851*ccd979bdSMark Fasheh mlog_errno(status); 1852*ccd979bdSMark Fasheh goto bail; 1853*ccd979bdSMark Fasheh } 1854*ccd979bdSMark Fasheh } 1855*ccd979bdSMark Fasheh 1856*ccd979bdSMark Fasheh credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, 1857*ccd979bdSMark Fasheh fe, el); 1858*ccd979bdSMark Fasheh handle = ocfs2_start_trans(osb, NULL, credits); 1859*ccd979bdSMark Fasheh if (IS_ERR(handle)) { 1860*ccd979bdSMark Fasheh status = PTR_ERR(handle); 1861*ccd979bdSMark Fasheh handle = NULL; 1862*ccd979bdSMark Fasheh mlog_errno(status); 1863*ccd979bdSMark Fasheh goto bail; 1864*ccd979bdSMark Fasheh } 1865*ccd979bdSMark Fasheh 1866*ccd979bdSMark Fasheh inode->i_ctime = inode->i_mtime = CURRENT_TIME; 1867*ccd979bdSMark Fasheh status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); 1868*ccd979bdSMark Fasheh if (status < 0) 1869*ccd979bdSMark Fasheh mlog_errno(status); 1870*ccd979bdSMark Fasheh 1871*ccd979bdSMark Fasheh status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, 1872*ccd979bdSMark Fasheh last_eb_bh, handle, tc); 1873*ccd979bdSMark Fasheh if (status < 0) { 1874*ccd979bdSMark Fasheh mlog_errno(status); 1875*ccd979bdSMark Fasheh goto bail; 1876*ccd979bdSMark Fasheh } 1877*ccd979bdSMark Fasheh 1878*ccd979bdSMark Fasheh up(&tl_inode->i_sem); 1879*ccd979bdSMark Fasheh tl_sem = 0; 1880*ccd979bdSMark Fasheh 1881*ccd979bdSMark Fasheh ocfs2_commit_trans(handle); 1882*ccd979bdSMark Fasheh handle = NULL; 1883*ccd979bdSMark Fasheh 1884*ccd979bdSMark Fasheh BUG_ON(le32_to_cpu(fe->i_clusters) < target_i_clusters); 1885*ccd979bdSMark Fasheh if (le32_to_cpu(fe->i_clusters) > target_i_clusters) 1886*ccd979bdSMark Fasheh goto start; 1887*ccd979bdSMark Fasheh bail: 1888*ccd979bdSMark Fasheh up_write(&OCFS2_I(inode)->ip_alloc_sem); 1889*ccd979bdSMark Fasheh 1890*ccd979bdSMark Fasheh ocfs2_schedule_truncate_log_flush(osb, 1); 1891*ccd979bdSMark Fasheh 1892*ccd979bdSMark Fasheh if (tl_sem) 1893*ccd979bdSMark Fasheh up(&tl_inode->i_sem); 1894*ccd979bdSMark Fasheh 1895*ccd979bdSMark Fasheh if (handle) 1896*ccd979bdSMark Fasheh ocfs2_commit_trans(handle); 1897*ccd979bdSMark Fasheh 1898*ccd979bdSMark Fasheh if (last_eb_bh) 1899*ccd979bdSMark Fasheh brelse(last_eb_bh); 1900*ccd979bdSMark Fasheh 1901*ccd979bdSMark Fasheh /* This will drop the ext_alloc cluster lock for us */ 1902*ccd979bdSMark Fasheh ocfs2_free_truncate_context(tc); 1903*ccd979bdSMark Fasheh 1904*ccd979bdSMark Fasheh mlog_exit(status); 1905*ccd979bdSMark Fasheh return status; 1906*ccd979bdSMark Fasheh } 1907*ccd979bdSMark Fasheh 1908*ccd979bdSMark Fasheh 1909*ccd979bdSMark Fasheh /* 1910*ccd979bdSMark Fasheh * Expects the inode to already be locked. This will figure out which 1911*ccd979bdSMark Fasheh * inodes need to be locked and will put them on the returned truncate 1912*ccd979bdSMark Fasheh * context. 1913*ccd979bdSMark Fasheh */ 1914*ccd979bdSMark Fasheh int ocfs2_prepare_truncate(struct ocfs2_super *osb, 1915*ccd979bdSMark Fasheh struct inode *inode, 1916*ccd979bdSMark Fasheh struct buffer_head *fe_bh, 1917*ccd979bdSMark Fasheh struct ocfs2_truncate_context **tc) 1918*ccd979bdSMark Fasheh { 1919*ccd979bdSMark Fasheh int status, metadata_delete; 1920*ccd979bdSMark Fasheh unsigned int new_i_clusters; 1921*ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 1922*ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 1923*ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 1924*ccd979bdSMark Fasheh struct buffer_head *last_eb_bh = NULL; 1925*ccd979bdSMark Fasheh struct inode *ext_alloc_inode = NULL; 1926*ccd979bdSMark Fasheh struct buffer_head *ext_alloc_bh = NULL; 1927*ccd979bdSMark Fasheh 1928*ccd979bdSMark Fasheh mlog_entry_void(); 1929*ccd979bdSMark Fasheh 1930*ccd979bdSMark Fasheh *tc = NULL; 1931*ccd979bdSMark Fasheh 1932*ccd979bdSMark Fasheh new_i_clusters = ocfs2_clusters_for_bytes(osb->sb, 1933*ccd979bdSMark Fasheh i_size_read(inode)); 1934*ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 1935*ccd979bdSMark Fasheh 1936*ccd979bdSMark Fasheh mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size =" 1937*ccd979bdSMark Fasheh "%"MLFu64"\n", fe->i_clusters, new_i_clusters, fe->i_size); 1938*ccd979bdSMark Fasheh 1939*ccd979bdSMark Fasheh if (le32_to_cpu(fe->i_clusters) <= new_i_clusters) { 1940*ccd979bdSMark Fasheh ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has cluster count " 1941*ccd979bdSMark Fasheh "%u and size %"MLFu64" whereas struct inode has " 1942*ccd979bdSMark Fasheh "cluster count %u and size %llu which caused an " 1943*ccd979bdSMark Fasheh "invalid truncate to %u clusters.", 1944*ccd979bdSMark Fasheh le64_to_cpu(fe->i_blkno), 1945*ccd979bdSMark Fasheh le32_to_cpu(fe->i_clusters), 1946*ccd979bdSMark Fasheh le64_to_cpu(fe->i_size), 1947*ccd979bdSMark Fasheh OCFS2_I(inode)->ip_clusters, i_size_read(inode), 1948*ccd979bdSMark Fasheh new_i_clusters); 1949*ccd979bdSMark Fasheh mlog_meta_lvb(ML_ERROR, &OCFS2_I(inode)->ip_meta_lockres); 1950*ccd979bdSMark Fasheh status = -EIO; 1951*ccd979bdSMark Fasheh goto bail; 1952*ccd979bdSMark Fasheh } 1953*ccd979bdSMark Fasheh 1954*ccd979bdSMark Fasheh *tc = kcalloc(1, sizeof(struct ocfs2_truncate_context), GFP_KERNEL); 1955*ccd979bdSMark Fasheh if (!(*tc)) { 1956*ccd979bdSMark Fasheh status = -ENOMEM; 1957*ccd979bdSMark Fasheh mlog_errno(status); 1958*ccd979bdSMark Fasheh goto bail; 1959*ccd979bdSMark Fasheh } 1960*ccd979bdSMark Fasheh 1961*ccd979bdSMark Fasheh metadata_delete = 0; 1962*ccd979bdSMark Fasheh if (fe->id2.i_list.l_tree_depth) { 1963*ccd979bdSMark Fasheh /* If we have a tree, then the truncate may result in 1964*ccd979bdSMark Fasheh * metadata deletes. Figure this out from the 1965*ccd979bdSMark Fasheh * rightmost leaf block.*/ 1966*ccd979bdSMark Fasheh status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 1967*ccd979bdSMark Fasheh &last_eb_bh, OCFS2_BH_CACHED, inode); 1968*ccd979bdSMark Fasheh if (status < 0) { 1969*ccd979bdSMark Fasheh mlog_errno(status); 1970*ccd979bdSMark Fasheh goto bail; 1971*ccd979bdSMark Fasheh } 1972*ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 1973*ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1974*ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1975*ccd979bdSMark Fasheh 1976*ccd979bdSMark Fasheh brelse(last_eb_bh); 1977*ccd979bdSMark Fasheh status = -EIO; 1978*ccd979bdSMark Fasheh goto bail; 1979*ccd979bdSMark Fasheh } 1980*ccd979bdSMark Fasheh el = &(eb->h_list); 1981*ccd979bdSMark Fasheh if (le32_to_cpu(el->l_recs[0].e_cpos) >= new_i_clusters) 1982*ccd979bdSMark Fasheh metadata_delete = 1; 1983*ccd979bdSMark Fasheh } 1984*ccd979bdSMark Fasheh 1985*ccd979bdSMark Fasheh (*tc)->tc_last_eb_bh = last_eb_bh; 1986*ccd979bdSMark Fasheh 1987*ccd979bdSMark Fasheh if (metadata_delete) { 1988*ccd979bdSMark Fasheh mlog(0, "Will have to delete metadata for this trunc. " 1989*ccd979bdSMark Fasheh "locking allocator.\n"); 1990*ccd979bdSMark Fasheh ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0); 1991*ccd979bdSMark Fasheh if (!ext_alloc_inode) { 1992*ccd979bdSMark Fasheh status = -ENOMEM; 1993*ccd979bdSMark Fasheh mlog_errno(status); 1994*ccd979bdSMark Fasheh goto bail; 1995*ccd979bdSMark Fasheh } 1996*ccd979bdSMark Fasheh 1997*ccd979bdSMark Fasheh down(&ext_alloc_inode->i_sem); 1998*ccd979bdSMark Fasheh (*tc)->tc_ext_alloc_inode = ext_alloc_inode; 1999*ccd979bdSMark Fasheh 2000*ccd979bdSMark Fasheh status = ocfs2_meta_lock(ext_alloc_inode, 2001*ccd979bdSMark Fasheh NULL, 2002*ccd979bdSMark Fasheh &ext_alloc_bh, 2003*ccd979bdSMark Fasheh 1); 2004*ccd979bdSMark Fasheh if (status < 0) { 2005*ccd979bdSMark Fasheh mlog_errno(status); 2006*ccd979bdSMark Fasheh goto bail; 2007*ccd979bdSMark Fasheh } 2008*ccd979bdSMark Fasheh (*tc)->tc_ext_alloc_bh = ext_alloc_bh; 2009*ccd979bdSMark Fasheh (*tc)->tc_ext_alloc_locked = 1; 2010*ccd979bdSMark Fasheh } 2011*ccd979bdSMark Fasheh 2012*ccd979bdSMark Fasheh status = 0; 2013*ccd979bdSMark Fasheh bail: 2014*ccd979bdSMark Fasheh if (status < 0) { 2015*ccd979bdSMark Fasheh if (*tc) 2016*ccd979bdSMark Fasheh ocfs2_free_truncate_context(*tc); 2017*ccd979bdSMark Fasheh *tc = NULL; 2018*ccd979bdSMark Fasheh } 2019*ccd979bdSMark Fasheh mlog_exit_void(); 2020*ccd979bdSMark Fasheh return status; 2021*ccd979bdSMark Fasheh } 2022*ccd979bdSMark Fasheh 2023*ccd979bdSMark Fasheh static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) 2024*ccd979bdSMark Fasheh { 2025*ccd979bdSMark Fasheh if (tc->tc_ext_alloc_inode) { 2026*ccd979bdSMark Fasheh if (tc->tc_ext_alloc_locked) 2027*ccd979bdSMark Fasheh ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1); 2028*ccd979bdSMark Fasheh 2029*ccd979bdSMark Fasheh up(&tc->tc_ext_alloc_inode->i_sem); 2030*ccd979bdSMark Fasheh iput(tc->tc_ext_alloc_inode); 2031*ccd979bdSMark Fasheh } 2032*ccd979bdSMark Fasheh 2033*ccd979bdSMark Fasheh if (tc->tc_ext_alloc_bh) 2034*ccd979bdSMark Fasheh brelse(tc->tc_ext_alloc_bh); 2035*ccd979bdSMark Fasheh 2036*ccd979bdSMark Fasheh if (tc->tc_last_eb_bh) 2037*ccd979bdSMark Fasheh brelse(tc->tc_last_eb_bh); 2038*ccd979bdSMark Fasheh 2039*ccd979bdSMark Fasheh kfree(tc); 2040*ccd979bdSMark Fasheh } 2041