1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*- 2ccd979bdSMark Fasheh * vim: noexpandtab sw=8 ts=8 sts=0: 3ccd979bdSMark Fasheh * 4ccd979bdSMark Fasheh * alloc.c 5ccd979bdSMark Fasheh * 6ccd979bdSMark Fasheh * Extent allocs and frees 7ccd979bdSMark Fasheh * 8ccd979bdSMark Fasheh * Copyright (C) 2002, 2004 Oracle. All rights reserved. 9ccd979bdSMark Fasheh * 10ccd979bdSMark Fasheh * This program is free software; you can redistribute it and/or 11ccd979bdSMark Fasheh * modify it under the terms of the GNU General Public 12ccd979bdSMark Fasheh * License as published by the Free Software Foundation; either 13ccd979bdSMark Fasheh * version 2 of the License, or (at your option) any later version. 14ccd979bdSMark Fasheh * 15ccd979bdSMark Fasheh * This program is distributed in the hope that it will be useful, 16ccd979bdSMark Fasheh * but WITHOUT ANY WARRANTY; without even the implied warranty of 17ccd979bdSMark Fasheh * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18ccd979bdSMark Fasheh * General Public License for more details. 19ccd979bdSMark Fasheh * 20ccd979bdSMark Fasheh * You should have received a copy of the GNU General Public 21ccd979bdSMark Fasheh * License along with this program; if not, write to the 22ccd979bdSMark Fasheh * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23ccd979bdSMark Fasheh * Boston, MA 021110-1307, USA. 24ccd979bdSMark Fasheh */ 25ccd979bdSMark Fasheh 26ccd979bdSMark Fasheh #include <linux/fs.h> 27ccd979bdSMark Fasheh #include <linux/types.h> 28ccd979bdSMark Fasheh #include <linux/slab.h> 29ccd979bdSMark Fasheh #include <linux/highmem.h> 30ccd979bdSMark Fasheh 31ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DISK_ALLOC 32ccd979bdSMark Fasheh #include <cluster/masklog.h> 33ccd979bdSMark Fasheh 34ccd979bdSMark Fasheh #include "ocfs2.h" 35ccd979bdSMark Fasheh 36ccd979bdSMark Fasheh #include "alloc.h" 37ccd979bdSMark Fasheh #include "dlmglue.h" 38ccd979bdSMark Fasheh #include "extent_map.h" 39ccd979bdSMark Fasheh #include "inode.h" 40ccd979bdSMark Fasheh #include "journal.h" 41ccd979bdSMark Fasheh #include "localalloc.h" 42ccd979bdSMark Fasheh #include "suballoc.h" 43ccd979bdSMark Fasheh #include "sysfile.h" 44ccd979bdSMark Fasheh #include "file.h" 45ccd979bdSMark Fasheh #include "super.h" 46ccd979bdSMark Fasheh #include "uptodate.h" 47ccd979bdSMark Fasheh 48ccd979bdSMark Fasheh #include "buffer_head_io.h" 49ccd979bdSMark Fasheh 50ccd979bdSMark Fasheh static int ocfs2_extent_contig(struct inode *inode, 51ccd979bdSMark Fasheh struct ocfs2_extent_rec *ext, 52ccd979bdSMark Fasheh u64 blkno); 53ccd979bdSMark Fasheh 54ccd979bdSMark Fasheh static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, 55ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 56ccd979bdSMark Fasheh struct inode *inode, 57ccd979bdSMark Fasheh int wanted, 58ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac, 59ccd979bdSMark Fasheh struct buffer_head *bhs[]); 60ccd979bdSMark Fasheh 61ccd979bdSMark Fasheh static int ocfs2_add_branch(struct ocfs2_super *osb, 62ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 63ccd979bdSMark Fasheh struct inode *inode, 64ccd979bdSMark Fasheh struct buffer_head *fe_bh, 65ccd979bdSMark Fasheh struct buffer_head *eb_bh, 66ccd979bdSMark Fasheh struct buffer_head *last_eb_bh, 67ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac); 68ccd979bdSMark Fasheh 69ccd979bdSMark Fasheh static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, 70ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 71ccd979bdSMark Fasheh struct inode *inode, 72ccd979bdSMark Fasheh struct buffer_head *fe_bh, 73ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac, 74ccd979bdSMark Fasheh struct buffer_head **ret_new_eb_bh); 75ccd979bdSMark Fasheh 76ccd979bdSMark Fasheh static int ocfs2_do_insert_extent(struct ocfs2_super *osb, 77ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 78ccd979bdSMark Fasheh struct inode *inode, 79ccd979bdSMark Fasheh struct buffer_head *fe_bh, 80ccd979bdSMark Fasheh u64 blkno, 81ccd979bdSMark Fasheh u32 new_clusters); 82ccd979bdSMark Fasheh 83ccd979bdSMark Fasheh static int ocfs2_find_branch_target(struct ocfs2_super *osb, 84ccd979bdSMark Fasheh struct inode *inode, 85ccd979bdSMark Fasheh struct buffer_head *fe_bh, 86ccd979bdSMark Fasheh struct buffer_head **target_bh); 87ccd979bdSMark Fasheh 88ccd979bdSMark Fasheh static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb, 89ccd979bdSMark Fasheh struct inode *inode, 90ccd979bdSMark Fasheh struct ocfs2_dinode *fe, 91ccd979bdSMark Fasheh unsigned int new_i_clusters, 92ccd979bdSMark Fasheh struct buffer_head *old_last_eb, 93ccd979bdSMark Fasheh struct buffer_head **new_last_eb); 94ccd979bdSMark Fasheh 95ccd979bdSMark Fasheh static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); 96ccd979bdSMark Fasheh 97ccd979bdSMark Fasheh static int ocfs2_extent_contig(struct inode *inode, 98ccd979bdSMark Fasheh struct ocfs2_extent_rec *ext, 99ccd979bdSMark Fasheh u64 blkno) 100ccd979bdSMark Fasheh { 101ccd979bdSMark Fasheh return blkno == (le64_to_cpu(ext->e_blkno) + 102ccd979bdSMark Fasheh ocfs2_clusters_to_blocks(inode->i_sb, 103ccd979bdSMark Fasheh le32_to_cpu(ext->e_clusters))); 104ccd979bdSMark Fasheh } 105ccd979bdSMark Fasheh 106ccd979bdSMark Fasheh /* 107ccd979bdSMark Fasheh * How many free extents have we got before we need more meta data? 108ccd979bdSMark Fasheh */ 109ccd979bdSMark Fasheh int ocfs2_num_free_extents(struct ocfs2_super *osb, 110ccd979bdSMark Fasheh struct inode *inode, 111ccd979bdSMark Fasheh struct ocfs2_dinode *fe) 112ccd979bdSMark Fasheh { 113ccd979bdSMark Fasheh int retval; 114ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 115ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 116ccd979bdSMark Fasheh struct buffer_head *eb_bh = NULL; 117ccd979bdSMark Fasheh 118ccd979bdSMark Fasheh mlog_entry_void(); 119ccd979bdSMark Fasheh 120ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(fe)) { 121ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 122ccd979bdSMark Fasheh retval = -EIO; 123ccd979bdSMark Fasheh goto bail; 124ccd979bdSMark Fasheh } 125ccd979bdSMark Fasheh 126ccd979bdSMark Fasheh if (fe->i_last_eb_blk) { 127ccd979bdSMark Fasheh retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 128ccd979bdSMark Fasheh &eb_bh, OCFS2_BH_CACHED, inode); 129ccd979bdSMark Fasheh if (retval < 0) { 130ccd979bdSMark Fasheh mlog_errno(retval); 131ccd979bdSMark Fasheh goto bail; 132ccd979bdSMark Fasheh } 133ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) eb_bh->b_data; 134ccd979bdSMark Fasheh el = &eb->h_list; 135ccd979bdSMark Fasheh } else 136ccd979bdSMark Fasheh el = &fe->id2.i_list; 137ccd979bdSMark Fasheh 138ccd979bdSMark Fasheh BUG_ON(el->l_tree_depth != 0); 139ccd979bdSMark Fasheh 140ccd979bdSMark Fasheh retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); 141ccd979bdSMark Fasheh bail: 142ccd979bdSMark Fasheh if (eb_bh) 143ccd979bdSMark Fasheh brelse(eb_bh); 144ccd979bdSMark Fasheh 145ccd979bdSMark Fasheh mlog_exit(retval); 146ccd979bdSMark Fasheh return retval; 147ccd979bdSMark Fasheh } 148ccd979bdSMark Fasheh 149ccd979bdSMark Fasheh /* expects array to already be allocated 150ccd979bdSMark Fasheh * 151ccd979bdSMark Fasheh * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and 152ccd979bdSMark Fasheh * l_count for you 153ccd979bdSMark Fasheh */ 154ccd979bdSMark Fasheh static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, 155ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 156ccd979bdSMark Fasheh struct inode *inode, 157ccd979bdSMark Fasheh int wanted, 158ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac, 159ccd979bdSMark Fasheh struct buffer_head *bhs[]) 160ccd979bdSMark Fasheh { 161ccd979bdSMark Fasheh int count, status, i; 162ccd979bdSMark Fasheh u16 suballoc_bit_start; 163ccd979bdSMark Fasheh u32 num_got; 164ccd979bdSMark Fasheh u64 first_blkno; 165ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 166ccd979bdSMark Fasheh 167ccd979bdSMark Fasheh mlog_entry_void(); 168ccd979bdSMark Fasheh 169ccd979bdSMark Fasheh count = 0; 170ccd979bdSMark Fasheh while (count < wanted) { 171ccd979bdSMark Fasheh status = ocfs2_claim_metadata(osb, 172ccd979bdSMark Fasheh handle, 173ccd979bdSMark Fasheh meta_ac, 174ccd979bdSMark Fasheh wanted - count, 175ccd979bdSMark Fasheh &suballoc_bit_start, 176ccd979bdSMark Fasheh &num_got, 177ccd979bdSMark Fasheh &first_blkno); 178ccd979bdSMark Fasheh if (status < 0) { 179ccd979bdSMark Fasheh mlog_errno(status); 180ccd979bdSMark Fasheh goto bail; 181ccd979bdSMark Fasheh } 182ccd979bdSMark Fasheh 183ccd979bdSMark Fasheh for(i = count; i < (num_got + count); i++) { 184ccd979bdSMark Fasheh bhs[i] = sb_getblk(osb->sb, first_blkno); 185ccd979bdSMark Fasheh if (bhs[i] == NULL) { 186ccd979bdSMark Fasheh status = -EIO; 187ccd979bdSMark Fasheh mlog_errno(status); 188ccd979bdSMark Fasheh goto bail; 189ccd979bdSMark Fasheh } 190ccd979bdSMark Fasheh ocfs2_set_new_buffer_uptodate(inode, bhs[i]); 191ccd979bdSMark Fasheh 192ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, bhs[i], 193ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE); 194ccd979bdSMark Fasheh if (status < 0) { 195ccd979bdSMark Fasheh mlog_errno(status); 196ccd979bdSMark Fasheh goto bail; 197ccd979bdSMark Fasheh } 198ccd979bdSMark Fasheh 199ccd979bdSMark Fasheh memset(bhs[i]->b_data, 0, osb->sb->s_blocksize); 200ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bhs[i]->b_data; 201ccd979bdSMark Fasheh /* Ok, setup the minimal stuff here. */ 202ccd979bdSMark Fasheh strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); 203ccd979bdSMark Fasheh eb->h_blkno = cpu_to_le64(first_blkno); 204ccd979bdSMark Fasheh eb->h_fs_generation = cpu_to_le32(osb->fs_generation); 205ccd979bdSMark Fasheh 206ccd979bdSMark Fasheh #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS 207ccd979bdSMark Fasheh /* we always use slot zero's suballocator */ 208ccd979bdSMark Fasheh eb->h_suballoc_slot = 0; 209ccd979bdSMark Fasheh #else 210ccd979bdSMark Fasheh eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); 211ccd979bdSMark Fasheh #endif 212ccd979bdSMark Fasheh eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); 213ccd979bdSMark Fasheh eb->h_list.l_count = 214ccd979bdSMark Fasheh cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); 215ccd979bdSMark Fasheh 216ccd979bdSMark Fasheh suballoc_bit_start++; 217ccd979bdSMark Fasheh first_blkno++; 218ccd979bdSMark Fasheh 219ccd979bdSMark Fasheh /* We'll also be dirtied by the caller, so 220ccd979bdSMark Fasheh * this isn't absolutely necessary. */ 221ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, bhs[i]); 222ccd979bdSMark Fasheh if (status < 0) { 223ccd979bdSMark Fasheh mlog_errno(status); 224ccd979bdSMark Fasheh goto bail; 225ccd979bdSMark Fasheh } 226ccd979bdSMark Fasheh } 227ccd979bdSMark Fasheh 228ccd979bdSMark Fasheh count += num_got; 229ccd979bdSMark Fasheh } 230ccd979bdSMark Fasheh 231ccd979bdSMark Fasheh status = 0; 232ccd979bdSMark Fasheh bail: 233ccd979bdSMark Fasheh if (status < 0) { 234ccd979bdSMark Fasheh for(i = 0; i < wanted; i++) { 235ccd979bdSMark Fasheh if (bhs[i]) 236ccd979bdSMark Fasheh brelse(bhs[i]); 237ccd979bdSMark Fasheh bhs[i] = NULL; 238ccd979bdSMark Fasheh } 239ccd979bdSMark Fasheh } 240ccd979bdSMark Fasheh mlog_exit(status); 241ccd979bdSMark Fasheh return status; 242ccd979bdSMark Fasheh } 243ccd979bdSMark Fasheh 244ccd979bdSMark Fasheh /* 245ccd979bdSMark Fasheh * Add an entire tree branch to our inode. eb_bh is the extent block 246ccd979bdSMark Fasheh * to start at, if we don't want to start the branch at the dinode 247ccd979bdSMark Fasheh * structure. 248ccd979bdSMark Fasheh * 249ccd979bdSMark Fasheh * last_eb_bh is required as we have to update it's next_leaf pointer 250ccd979bdSMark Fasheh * for the new last extent block. 251ccd979bdSMark Fasheh * 252ccd979bdSMark Fasheh * the new branch will be 'empty' in the sense that every block will 253ccd979bdSMark Fasheh * contain a single record with e_clusters == 0. 254ccd979bdSMark Fasheh */ 255ccd979bdSMark Fasheh static int ocfs2_add_branch(struct ocfs2_super *osb, 256ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 257ccd979bdSMark Fasheh struct inode *inode, 258ccd979bdSMark Fasheh struct buffer_head *fe_bh, 259ccd979bdSMark Fasheh struct buffer_head *eb_bh, 260ccd979bdSMark Fasheh struct buffer_head *last_eb_bh, 261ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac) 262ccd979bdSMark Fasheh { 263ccd979bdSMark Fasheh int status, new_blocks, i; 264ccd979bdSMark Fasheh u64 next_blkno, new_last_eb_blk; 265ccd979bdSMark Fasheh struct buffer_head *bh; 266ccd979bdSMark Fasheh struct buffer_head **new_eb_bhs = NULL; 267ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 268ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 269ccd979bdSMark Fasheh struct ocfs2_extent_list *eb_el; 270ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 271ccd979bdSMark Fasheh 272ccd979bdSMark Fasheh mlog_entry_void(); 273ccd979bdSMark Fasheh 274ccd979bdSMark Fasheh BUG_ON(!last_eb_bh); 275ccd979bdSMark Fasheh 276ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 277ccd979bdSMark Fasheh 278ccd979bdSMark Fasheh if (eb_bh) { 279ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) eb_bh->b_data; 280ccd979bdSMark Fasheh el = &eb->h_list; 281ccd979bdSMark Fasheh } else 282ccd979bdSMark Fasheh el = &fe->id2.i_list; 283ccd979bdSMark Fasheh 284ccd979bdSMark Fasheh /* we never add a branch to a leaf. */ 285ccd979bdSMark Fasheh BUG_ON(!el->l_tree_depth); 286ccd979bdSMark Fasheh 287ccd979bdSMark Fasheh new_blocks = le16_to_cpu(el->l_tree_depth); 288ccd979bdSMark Fasheh 289ccd979bdSMark Fasheh /* allocate the number of new eb blocks we need */ 290ccd979bdSMark Fasheh new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *), 291ccd979bdSMark Fasheh GFP_KERNEL); 292ccd979bdSMark Fasheh if (!new_eb_bhs) { 293ccd979bdSMark Fasheh status = -ENOMEM; 294ccd979bdSMark Fasheh mlog_errno(status); 295ccd979bdSMark Fasheh goto bail; 296ccd979bdSMark Fasheh } 297ccd979bdSMark Fasheh 298ccd979bdSMark Fasheh status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks, 299ccd979bdSMark Fasheh meta_ac, new_eb_bhs); 300ccd979bdSMark Fasheh if (status < 0) { 301ccd979bdSMark Fasheh mlog_errno(status); 302ccd979bdSMark Fasheh goto bail; 303ccd979bdSMark Fasheh } 304ccd979bdSMark Fasheh 305ccd979bdSMark Fasheh /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be 306ccd979bdSMark Fasheh * linked with the rest of the tree. 307ccd979bdSMark Fasheh * conversly, new_eb_bhs[0] is the new bottommost leaf. 308ccd979bdSMark Fasheh * 309ccd979bdSMark Fasheh * when we leave the loop, new_last_eb_blk will point to the 310ccd979bdSMark Fasheh * newest leaf, and next_blkno will point to the topmost extent 311ccd979bdSMark Fasheh * block. */ 312ccd979bdSMark Fasheh next_blkno = new_last_eb_blk = 0; 313ccd979bdSMark Fasheh for(i = 0; i < new_blocks; i++) { 314ccd979bdSMark Fasheh bh = new_eb_bhs[i]; 315ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 316ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 317ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 318ccd979bdSMark Fasheh status = -EIO; 319ccd979bdSMark Fasheh goto bail; 320ccd979bdSMark Fasheh } 321ccd979bdSMark Fasheh eb_el = &eb->h_list; 322ccd979bdSMark Fasheh 323ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, bh, 324ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE); 325ccd979bdSMark Fasheh if (status < 0) { 326ccd979bdSMark Fasheh mlog_errno(status); 327ccd979bdSMark Fasheh goto bail; 328ccd979bdSMark Fasheh } 329ccd979bdSMark Fasheh 330ccd979bdSMark Fasheh eb->h_next_leaf_blk = 0; 331ccd979bdSMark Fasheh eb_el->l_tree_depth = cpu_to_le16(i); 332ccd979bdSMark Fasheh eb_el->l_next_free_rec = cpu_to_le16(1); 333ccd979bdSMark Fasheh eb_el->l_recs[0].e_cpos = fe->i_clusters; 334ccd979bdSMark Fasheh eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno); 335ccd979bdSMark Fasheh eb_el->l_recs[0].e_clusters = cpu_to_le32(0); 336ccd979bdSMark Fasheh if (!eb_el->l_tree_depth) 337ccd979bdSMark Fasheh new_last_eb_blk = le64_to_cpu(eb->h_blkno); 338ccd979bdSMark Fasheh 339ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, bh); 340ccd979bdSMark Fasheh if (status < 0) { 341ccd979bdSMark Fasheh mlog_errno(status); 342ccd979bdSMark Fasheh goto bail; 343ccd979bdSMark Fasheh } 344ccd979bdSMark Fasheh 345ccd979bdSMark Fasheh next_blkno = le64_to_cpu(eb->h_blkno); 346ccd979bdSMark Fasheh } 347ccd979bdSMark Fasheh 348ccd979bdSMark Fasheh /* This is a bit hairy. We want to update up to three blocks 349ccd979bdSMark Fasheh * here without leaving any of them in an inconsistent state 350ccd979bdSMark Fasheh * in case of error. We don't have to worry about 351ccd979bdSMark Fasheh * journal_dirty erroring as it won't unless we've aborted the 352ccd979bdSMark Fasheh * handle (in which case we would never be here) so reserving 353ccd979bdSMark Fasheh * the write with journal_access is all we need to do. */ 354ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, last_eb_bh, 355ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 356ccd979bdSMark Fasheh if (status < 0) { 357ccd979bdSMark Fasheh mlog_errno(status); 358ccd979bdSMark Fasheh goto bail; 359ccd979bdSMark Fasheh } 360ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, fe_bh, 361ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 362ccd979bdSMark Fasheh if (status < 0) { 363ccd979bdSMark Fasheh mlog_errno(status); 364ccd979bdSMark Fasheh goto bail; 365ccd979bdSMark Fasheh } 366ccd979bdSMark Fasheh if (eb_bh) { 367ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, eb_bh, 368ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 369ccd979bdSMark Fasheh if (status < 0) { 370ccd979bdSMark Fasheh mlog_errno(status); 371ccd979bdSMark Fasheh goto bail; 372ccd979bdSMark Fasheh } 373ccd979bdSMark Fasheh } 374ccd979bdSMark Fasheh 375ccd979bdSMark Fasheh /* Link the new branch into the rest of the tree (el will 376ccd979bdSMark Fasheh * either be on the fe, or the extent block passed in. */ 377ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec); 378ccd979bdSMark Fasheh el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); 379ccd979bdSMark Fasheh el->l_recs[i].e_cpos = fe->i_clusters; 380ccd979bdSMark Fasheh el->l_recs[i].e_clusters = 0; 381ccd979bdSMark Fasheh le16_add_cpu(&el->l_next_free_rec, 1); 382ccd979bdSMark Fasheh 383ccd979bdSMark Fasheh /* fe needs a new last extent block pointer, as does the 384ccd979bdSMark Fasheh * next_leaf on the previously last-extent-block. */ 385ccd979bdSMark Fasheh fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); 386ccd979bdSMark Fasheh 387ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 388ccd979bdSMark Fasheh eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); 389ccd979bdSMark Fasheh 390ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, last_eb_bh); 391ccd979bdSMark Fasheh if (status < 0) 392ccd979bdSMark Fasheh mlog_errno(status); 393ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, fe_bh); 394ccd979bdSMark Fasheh if (status < 0) 395ccd979bdSMark Fasheh mlog_errno(status); 396ccd979bdSMark Fasheh if (eb_bh) { 397ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, eb_bh); 398ccd979bdSMark Fasheh if (status < 0) 399ccd979bdSMark Fasheh mlog_errno(status); 400ccd979bdSMark Fasheh } 401ccd979bdSMark Fasheh 402ccd979bdSMark Fasheh status = 0; 403ccd979bdSMark Fasheh bail: 404ccd979bdSMark Fasheh if (new_eb_bhs) { 405ccd979bdSMark Fasheh for (i = 0; i < new_blocks; i++) 406ccd979bdSMark Fasheh if (new_eb_bhs[i]) 407ccd979bdSMark Fasheh brelse(new_eb_bhs[i]); 408ccd979bdSMark Fasheh kfree(new_eb_bhs); 409ccd979bdSMark Fasheh } 410ccd979bdSMark Fasheh 411ccd979bdSMark Fasheh mlog_exit(status); 412ccd979bdSMark Fasheh return status; 413ccd979bdSMark Fasheh } 414ccd979bdSMark Fasheh 415ccd979bdSMark Fasheh /* 416ccd979bdSMark Fasheh * adds another level to the allocation tree. 417ccd979bdSMark Fasheh * returns back the new extent block so you can add a branch to it 418ccd979bdSMark Fasheh * after this call. 419ccd979bdSMark Fasheh */ 420ccd979bdSMark Fasheh static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, 421ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 422ccd979bdSMark Fasheh struct inode *inode, 423ccd979bdSMark Fasheh struct buffer_head *fe_bh, 424ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac, 425ccd979bdSMark Fasheh struct buffer_head **ret_new_eb_bh) 426ccd979bdSMark Fasheh { 427ccd979bdSMark Fasheh int status, i; 428ccd979bdSMark Fasheh struct buffer_head *new_eb_bh = NULL; 429ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 430ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 431ccd979bdSMark Fasheh struct ocfs2_extent_list *fe_el; 432ccd979bdSMark Fasheh struct ocfs2_extent_list *eb_el; 433ccd979bdSMark Fasheh 434ccd979bdSMark Fasheh mlog_entry_void(); 435ccd979bdSMark Fasheh 436ccd979bdSMark Fasheh status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac, 437ccd979bdSMark Fasheh &new_eb_bh); 438ccd979bdSMark Fasheh if (status < 0) { 439ccd979bdSMark Fasheh mlog_errno(status); 440ccd979bdSMark Fasheh goto bail; 441ccd979bdSMark Fasheh } 442ccd979bdSMark Fasheh 443ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) new_eb_bh->b_data; 444ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 445ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 446ccd979bdSMark Fasheh status = -EIO; 447ccd979bdSMark Fasheh goto bail; 448ccd979bdSMark Fasheh } 449ccd979bdSMark Fasheh 450ccd979bdSMark Fasheh eb_el = &eb->h_list; 451ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 452ccd979bdSMark Fasheh fe_el = &fe->id2.i_list; 453ccd979bdSMark Fasheh 454ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, new_eb_bh, 455ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE); 456ccd979bdSMark Fasheh if (status < 0) { 457ccd979bdSMark Fasheh mlog_errno(status); 458ccd979bdSMark Fasheh goto bail; 459ccd979bdSMark Fasheh } 460ccd979bdSMark Fasheh 461ccd979bdSMark Fasheh /* copy the fe data into the new extent block */ 462ccd979bdSMark Fasheh eb_el->l_tree_depth = fe_el->l_tree_depth; 463ccd979bdSMark Fasheh eb_el->l_next_free_rec = fe_el->l_next_free_rec; 464ccd979bdSMark Fasheh for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) { 465ccd979bdSMark Fasheh eb_el->l_recs[i].e_cpos = fe_el->l_recs[i].e_cpos; 466ccd979bdSMark Fasheh eb_el->l_recs[i].e_clusters = fe_el->l_recs[i].e_clusters; 467ccd979bdSMark Fasheh eb_el->l_recs[i].e_blkno = fe_el->l_recs[i].e_blkno; 468ccd979bdSMark Fasheh } 469ccd979bdSMark Fasheh 470ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, new_eb_bh); 471ccd979bdSMark Fasheh if (status < 0) { 472ccd979bdSMark Fasheh mlog_errno(status); 473ccd979bdSMark Fasheh goto bail; 474ccd979bdSMark Fasheh } 475ccd979bdSMark Fasheh 476ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, fe_bh, 477ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 478ccd979bdSMark Fasheh if (status < 0) { 479ccd979bdSMark Fasheh mlog_errno(status); 480ccd979bdSMark Fasheh goto bail; 481ccd979bdSMark Fasheh } 482ccd979bdSMark Fasheh 483ccd979bdSMark Fasheh /* update fe now */ 484ccd979bdSMark Fasheh le16_add_cpu(&fe_el->l_tree_depth, 1); 485ccd979bdSMark Fasheh fe_el->l_recs[0].e_cpos = 0; 486ccd979bdSMark Fasheh fe_el->l_recs[0].e_blkno = eb->h_blkno; 487ccd979bdSMark Fasheh fe_el->l_recs[0].e_clusters = fe->i_clusters; 488ccd979bdSMark Fasheh for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) { 489ccd979bdSMark Fasheh fe_el->l_recs[i].e_cpos = 0; 490ccd979bdSMark Fasheh fe_el->l_recs[i].e_clusters = 0; 491ccd979bdSMark Fasheh fe_el->l_recs[i].e_blkno = 0; 492ccd979bdSMark Fasheh } 493ccd979bdSMark Fasheh fe_el->l_next_free_rec = cpu_to_le16(1); 494ccd979bdSMark Fasheh 495ccd979bdSMark Fasheh /* If this is our 1st tree depth shift, then last_eb_blk 496ccd979bdSMark Fasheh * becomes the allocated extent block */ 497ccd979bdSMark Fasheh if (fe_el->l_tree_depth == cpu_to_le16(1)) 498ccd979bdSMark Fasheh fe->i_last_eb_blk = eb->h_blkno; 499ccd979bdSMark Fasheh 500ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, fe_bh); 501ccd979bdSMark Fasheh if (status < 0) { 502ccd979bdSMark Fasheh mlog_errno(status); 503ccd979bdSMark Fasheh goto bail; 504ccd979bdSMark Fasheh } 505ccd979bdSMark Fasheh 506ccd979bdSMark Fasheh *ret_new_eb_bh = new_eb_bh; 507ccd979bdSMark Fasheh new_eb_bh = NULL; 508ccd979bdSMark Fasheh status = 0; 509ccd979bdSMark Fasheh bail: 510ccd979bdSMark Fasheh if (new_eb_bh) 511ccd979bdSMark Fasheh brelse(new_eb_bh); 512ccd979bdSMark Fasheh 513ccd979bdSMark Fasheh mlog_exit(status); 514ccd979bdSMark Fasheh return status; 515ccd979bdSMark Fasheh } 516ccd979bdSMark Fasheh 517ccd979bdSMark Fasheh /* 518ccd979bdSMark Fasheh * Expects the tree to already have room in the rightmost leaf for the 519ccd979bdSMark Fasheh * extent. Updates all the extent blocks (and the dinode) on the way 520ccd979bdSMark Fasheh * down. 521ccd979bdSMark Fasheh */ 522ccd979bdSMark Fasheh static int ocfs2_do_insert_extent(struct ocfs2_super *osb, 523ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 524ccd979bdSMark Fasheh struct inode *inode, 525ccd979bdSMark Fasheh struct buffer_head *fe_bh, 526ccd979bdSMark Fasheh u64 start_blk, 527ccd979bdSMark Fasheh u32 new_clusters) 528ccd979bdSMark Fasheh { 529ccd979bdSMark Fasheh int status, i, num_bhs = 0; 530ccd979bdSMark Fasheh u64 next_blkno; 531ccd979bdSMark Fasheh u16 next_free; 532ccd979bdSMark Fasheh struct buffer_head **eb_bhs = NULL; 533ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 534ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 535ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 536ccd979bdSMark Fasheh 537ccd979bdSMark Fasheh mlog_entry_void(); 538ccd979bdSMark Fasheh 539ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, fe_bh, 540ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 541ccd979bdSMark Fasheh if (status < 0) { 542ccd979bdSMark Fasheh mlog_errno(status); 543ccd979bdSMark Fasheh goto bail; 544ccd979bdSMark Fasheh } 545ccd979bdSMark Fasheh 546ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 547ccd979bdSMark Fasheh el = &fe->id2.i_list; 548ccd979bdSMark Fasheh if (el->l_tree_depth) { 549ccd979bdSMark Fasheh /* This is another operation where we want to be 550ccd979bdSMark Fasheh * careful about our tree updates. An error here means 551ccd979bdSMark Fasheh * none of the previous changes we made should roll 552ccd979bdSMark Fasheh * forward. As a result, we have to record the buffers 553ccd979bdSMark Fasheh * for this part of the tree in an array and reserve a 554ccd979bdSMark Fasheh * journal write to them before making any changes. */ 555ccd979bdSMark Fasheh num_bhs = le16_to_cpu(fe->id2.i_list.l_tree_depth); 556ccd979bdSMark Fasheh eb_bhs = kcalloc(num_bhs, sizeof(struct buffer_head *), 557ccd979bdSMark Fasheh GFP_KERNEL); 558ccd979bdSMark Fasheh if (!eb_bhs) { 559ccd979bdSMark Fasheh status = -ENOMEM; 560ccd979bdSMark Fasheh mlog_errno(status); 561ccd979bdSMark Fasheh goto bail; 562ccd979bdSMark Fasheh } 563ccd979bdSMark Fasheh 564ccd979bdSMark Fasheh i = 0; 565ccd979bdSMark Fasheh while(el->l_tree_depth) { 566ccd979bdSMark Fasheh next_free = le16_to_cpu(el->l_next_free_rec); 567ccd979bdSMark Fasheh if (next_free == 0) { 568ccd979bdSMark Fasheh ocfs2_error(inode->i_sb, 569b0697053SMark Fasheh "Dinode %llu has a bad extent list", 570b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 571ccd979bdSMark Fasheh status = -EIO; 572ccd979bdSMark Fasheh goto bail; 573ccd979bdSMark Fasheh } 574ccd979bdSMark Fasheh next_blkno = le64_to_cpu(el->l_recs[next_free - 1].e_blkno); 575ccd979bdSMark Fasheh 576ccd979bdSMark Fasheh BUG_ON(i >= num_bhs); 577ccd979bdSMark Fasheh status = ocfs2_read_block(osb, next_blkno, &eb_bhs[i], 578ccd979bdSMark Fasheh OCFS2_BH_CACHED, inode); 579ccd979bdSMark Fasheh if (status < 0) { 580ccd979bdSMark Fasheh mlog_errno(status); 581ccd979bdSMark Fasheh goto bail; 582ccd979bdSMark Fasheh } 583ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data; 584ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 585ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, 586ccd979bdSMark Fasheh eb); 587ccd979bdSMark Fasheh status = -EIO; 588ccd979bdSMark Fasheh goto bail; 589ccd979bdSMark Fasheh } 590ccd979bdSMark Fasheh 591ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, eb_bhs[i], 592ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 593ccd979bdSMark Fasheh if (status < 0) { 594ccd979bdSMark Fasheh mlog_errno(status); 595ccd979bdSMark Fasheh goto bail; 596ccd979bdSMark Fasheh } 597ccd979bdSMark Fasheh 598ccd979bdSMark Fasheh el = &eb->h_list; 599ccd979bdSMark Fasheh i++; 600ccd979bdSMark Fasheh /* When we leave this loop, eb_bhs[num_bhs - 1] will 601ccd979bdSMark Fasheh * hold the bottom-most leaf extent block. */ 602ccd979bdSMark Fasheh } 603ccd979bdSMark Fasheh BUG_ON(el->l_tree_depth); 604ccd979bdSMark Fasheh 605ccd979bdSMark Fasheh el = &fe->id2.i_list; 606ccd979bdSMark Fasheh /* If we have tree depth, then the fe update is 607ccd979bdSMark Fasheh * trivial, and we want to switch el out for the 608ccd979bdSMark Fasheh * bottom-most leaf in order to update it with the 609ccd979bdSMark Fasheh * actual extent data below. */ 610ccd979bdSMark Fasheh next_free = le16_to_cpu(el->l_next_free_rec); 611ccd979bdSMark Fasheh if (next_free == 0) { 612ccd979bdSMark Fasheh ocfs2_error(inode->i_sb, 613b0697053SMark Fasheh "Dinode %llu has a bad extent list", 614b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 615ccd979bdSMark Fasheh status = -EIO; 616ccd979bdSMark Fasheh goto bail; 617ccd979bdSMark Fasheh } 618ccd979bdSMark Fasheh le32_add_cpu(&el->l_recs[next_free - 1].e_clusters, 619ccd979bdSMark Fasheh new_clusters); 620ccd979bdSMark Fasheh /* (num_bhs - 1) to avoid the leaf */ 621ccd979bdSMark Fasheh for(i = 0; i < (num_bhs - 1); i++) { 622ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data; 623ccd979bdSMark Fasheh el = &eb->h_list; 624ccd979bdSMark Fasheh 625ccd979bdSMark Fasheh /* finally, make our actual change to the 626ccd979bdSMark Fasheh * intermediate extent blocks. */ 627ccd979bdSMark Fasheh next_free = le16_to_cpu(el->l_next_free_rec); 628ccd979bdSMark Fasheh le32_add_cpu(&el->l_recs[next_free - 1].e_clusters, 629ccd979bdSMark Fasheh new_clusters); 630ccd979bdSMark Fasheh 631ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, eb_bhs[i]); 632ccd979bdSMark Fasheh if (status < 0) 633ccd979bdSMark Fasheh mlog_errno(status); 634ccd979bdSMark Fasheh } 635ccd979bdSMark Fasheh BUG_ON(i != (num_bhs - 1)); 636ccd979bdSMark Fasheh /* note that the leaf block wasn't touched in 637ccd979bdSMark Fasheh * the loop above */ 638ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) eb_bhs[num_bhs - 1]->b_data; 639ccd979bdSMark Fasheh el = &eb->h_list; 640ccd979bdSMark Fasheh BUG_ON(el->l_tree_depth); 641ccd979bdSMark Fasheh } 642ccd979bdSMark Fasheh 643ccd979bdSMark Fasheh /* yay, we can finally add the actual extent now! */ 644ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 645ccd979bdSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) && 646ccd979bdSMark Fasheh ocfs2_extent_contig(inode, &el->l_recs[i], start_blk)) { 647ccd979bdSMark Fasheh le32_add_cpu(&el->l_recs[i].e_clusters, new_clusters); 648ccd979bdSMark Fasheh } else if (le16_to_cpu(el->l_next_free_rec) && 649ccd979bdSMark Fasheh (le32_to_cpu(el->l_recs[i].e_clusters) == 0)) { 650ccd979bdSMark Fasheh /* having an empty extent at eof is legal. */ 651ccd979bdSMark Fasheh if (el->l_recs[i].e_cpos != fe->i_clusters) { 652ccd979bdSMark Fasheh ocfs2_error(inode->i_sb, 653b0697053SMark Fasheh "Dinode %llu trailing extent is bad: " 654ccd979bdSMark Fasheh "cpos (%u) != number of clusters (%u)", 655b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 656ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_cpos), 657ccd979bdSMark Fasheh le32_to_cpu(fe->i_clusters)); 658ccd979bdSMark Fasheh status = -EIO; 659ccd979bdSMark Fasheh goto bail; 660ccd979bdSMark Fasheh } 661ccd979bdSMark Fasheh el->l_recs[i].e_blkno = cpu_to_le64(start_blk); 662ccd979bdSMark Fasheh el->l_recs[i].e_clusters = cpu_to_le32(new_clusters); 663ccd979bdSMark Fasheh } else { 664ccd979bdSMark Fasheh /* No contiguous record, or no empty record at eof, so 665ccd979bdSMark Fasheh * we add a new one. */ 666ccd979bdSMark Fasheh 667ccd979bdSMark Fasheh BUG_ON(le16_to_cpu(el->l_next_free_rec) >= 668ccd979bdSMark Fasheh le16_to_cpu(el->l_count)); 669ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec); 670ccd979bdSMark Fasheh 671ccd979bdSMark Fasheh el->l_recs[i].e_blkno = cpu_to_le64(start_blk); 672ccd979bdSMark Fasheh el->l_recs[i].e_clusters = cpu_to_le32(new_clusters); 673ccd979bdSMark Fasheh el->l_recs[i].e_cpos = fe->i_clusters; 674ccd979bdSMark Fasheh le16_add_cpu(&el->l_next_free_rec, 1); 675ccd979bdSMark Fasheh } 676ccd979bdSMark Fasheh 677ccd979bdSMark Fasheh /* 678ccd979bdSMark Fasheh * extent_map errors are not fatal, so they are ignored outside 679ccd979bdSMark Fasheh * of flushing the thing. 680ccd979bdSMark Fasheh */ 681ccd979bdSMark Fasheh status = ocfs2_extent_map_append(inode, &el->l_recs[i], 682ccd979bdSMark Fasheh new_clusters); 683ccd979bdSMark Fasheh if (status) { 684ccd979bdSMark Fasheh mlog_errno(status); 685ccd979bdSMark Fasheh ocfs2_extent_map_drop(inode, le32_to_cpu(fe->i_clusters)); 686ccd979bdSMark Fasheh } 687ccd979bdSMark Fasheh 688ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, fe_bh); 689ccd979bdSMark Fasheh if (status < 0) 690ccd979bdSMark Fasheh mlog_errno(status); 691ccd979bdSMark Fasheh if (fe->id2.i_list.l_tree_depth) { 692ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, eb_bhs[num_bhs - 1]); 693ccd979bdSMark Fasheh if (status < 0) 694ccd979bdSMark Fasheh mlog_errno(status); 695ccd979bdSMark Fasheh } 696ccd979bdSMark Fasheh 697ccd979bdSMark Fasheh status = 0; 698ccd979bdSMark Fasheh bail: 699ccd979bdSMark Fasheh if (eb_bhs) { 700ccd979bdSMark Fasheh for (i = 0; i < num_bhs; i++) 701ccd979bdSMark Fasheh if (eb_bhs[i]) 702ccd979bdSMark Fasheh brelse(eb_bhs[i]); 703ccd979bdSMark Fasheh kfree(eb_bhs); 704ccd979bdSMark Fasheh } 705ccd979bdSMark Fasheh 706ccd979bdSMark Fasheh mlog_exit(status); 707ccd979bdSMark Fasheh return status; 708ccd979bdSMark Fasheh } 709ccd979bdSMark Fasheh 710ccd979bdSMark Fasheh /* 711ccd979bdSMark Fasheh * Should only be called when there is no space left in any of the 712ccd979bdSMark Fasheh * leaf nodes. What we want to do is find the lowest tree depth 713ccd979bdSMark Fasheh * non-leaf extent block with room for new records. There are three 714ccd979bdSMark Fasheh * valid results of this search: 715ccd979bdSMark Fasheh * 716ccd979bdSMark Fasheh * 1) a lowest extent block is found, then we pass it back in 717ccd979bdSMark Fasheh * *lowest_eb_bh and return '0' 718ccd979bdSMark Fasheh * 719ccd979bdSMark Fasheh * 2) the search fails to find anything, but the dinode has room. We 720ccd979bdSMark Fasheh * pass NULL back in *lowest_eb_bh, but still return '0' 721ccd979bdSMark Fasheh * 722ccd979bdSMark Fasheh * 3) the search fails to find anything AND the dinode is full, in 723ccd979bdSMark Fasheh * which case we return > 0 724ccd979bdSMark Fasheh * 725ccd979bdSMark Fasheh * return status < 0 indicates an error. 726ccd979bdSMark Fasheh */ 727ccd979bdSMark Fasheh static int ocfs2_find_branch_target(struct ocfs2_super *osb, 728ccd979bdSMark Fasheh struct inode *inode, 729ccd979bdSMark Fasheh struct buffer_head *fe_bh, 730ccd979bdSMark Fasheh struct buffer_head **target_bh) 731ccd979bdSMark Fasheh { 732ccd979bdSMark Fasheh int status = 0, i; 733ccd979bdSMark Fasheh u64 blkno; 734ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 735ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 736ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 737ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 738ccd979bdSMark Fasheh struct buffer_head *lowest_bh = NULL; 739ccd979bdSMark Fasheh 740ccd979bdSMark Fasheh mlog_entry_void(); 741ccd979bdSMark Fasheh 742ccd979bdSMark Fasheh *target_bh = NULL; 743ccd979bdSMark Fasheh 744ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 745ccd979bdSMark Fasheh el = &fe->id2.i_list; 746ccd979bdSMark Fasheh 747ccd979bdSMark Fasheh while(le16_to_cpu(el->l_tree_depth) > 1) { 748ccd979bdSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 0) { 749b0697053SMark Fasheh ocfs2_error(inode->i_sb, "Dinode %llu has empty " 750ccd979bdSMark Fasheh "extent list (next_free_rec == 0)", 751b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 752ccd979bdSMark Fasheh status = -EIO; 753ccd979bdSMark Fasheh goto bail; 754ccd979bdSMark Fasheh } 755ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 756ccd979bdSMark Fasheh blkno = le64_to_cpu(el->l_recs[i].e_blkno); 757ccd979bdSMark Fasheh if (!blkno) { 758b0697053SMark Fasheh ocfs2_error(inode->i_sb, "Dinode %llu has extent " 759ccd979bdSMark Fasheh "list where extent # %d has no physical " 760ccd979bdSMark Fasheh "block start", 761b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, i); 762ccd979bdSMark Fasheh status = -EIO; 763ccd979bdSMark Fasheh goto bail; 764ccd979bdSMark Fasheh } 765ccd979bdSMark Fasheh 766ccd979bdSMark Fasheh if (bh) { 767ccd979bdSMark Fasheh brelse(bh); 768ccd979bdSMark Fasheh bh = NULL; 769ccd979bdSMark Fasheh } 770ccd979bdSMark Fasheh 771ccd979bdSMark Fasheh status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED, 772ccd979bdSMark Fasheh inode); 773ccd979bdSMark Fasheh if (status < 0) { 774ccd979bdSMark Fasheh mlog_errno(status); 775ccd979bdSMark Fasheh goto bail; 776ccd979bdSMark Fasheh } 777ccd979bdSMark Fasheh 778ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 779ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 780ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 781ccd979bdSMark Fasheh status = -EIO; 782ccd979bdSMark Fasheh goto bail; 783ccd979bdSMark Fasheh } 784ccd979bdSMark Fasheh el = &eb->h_list; 785ccd979bdSMark Fasheh 786ccd979bdSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) < 787ccd979bdSMark Fasheh le16_to_cpu(el->l_count)) { 788ccd979bdSMark Fasheh if (lowest_bh) 789ccd979bdSMark Fasheh brelse(lowest_bh); 790ccd979bdSMark Fasheh lowest_bh = bh; 791ccd979bdSMark Fasheh get_bh(lowest_bh); 792ccd979bdSMark Fasheh } 793ccd979bdSMark Fasheh } 794ccd979bdSMark Fasheh 795ccd979bdSMark Fasheh /* If we didn't find one and the fe doesn't have any room, 796ccd979bdSMark Fasheh * then return '1' */ 797ccd979bdSMark Fasheh if (!lowest_bh 798ccd979bdSMark Fasheh && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count)) 799ccd979bdSMark Fasheh status = 1; 800ccd979bdSMark Fasheh 801ccd979bdSMark Fasheh *target_bh = lowest_bh; 802ccd979bdSMark Fasheh bail: 803ccd979bdSMark Fasheh if (bh) 804ccd979bdSMark Fasheh brelse(bh); 805ccd979bdSMark Fasheh 806ccd979bdSMark Fasheh mlog_exit(status); 807ccd979bdSMark Fasheh return status; 808ccd979bdSMark Fasheh } 809ccd979bdSMark Fasheh 810ccd979bdSMark Fasheh /* the caller needs to update fe->i_clusters */ 811ccd979bdSMark Fasheh int ocfs2_insert_extent(struct ocfs2_super *osb, 812ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 813ccd979bdSMark Fasheh struct inode *inode, 814ccd979bdSMark Fasheh struct buffer_head *fe_bh, 815ccd979bdSMark Fasheh u64 start_blk, 816ccd979bdSMark Fasheh u32 new_clusters, 817ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac) 818ccd979bdSMark Fasheh { 819ccd979bdSMark Fasheh int status, i, shift; 820ccd979bdSMark Fasheh struct buffer_head *last_eb_bh = NULL; 821ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 822ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 823ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 824ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 825ccd979bdSMark Fasheh 826ccd979bdSMark Fasheh mlog_entry_void(); 827ccd979bdSMark Fasheh 828b0697053SMark Fasheh mlog(0, "add %u clusters starting at block %llu to inode %llu\n", 829b0697053SMark Fasheh new_clusters, (unsigned long long)start_blk, 830b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 831ccd979bdSMark Fasheh 832ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 833ccd979bdSMark Fasheh el = &fe->id2.i_list; 834ccd979bdSMark Fasheh 835ccd979bdSMark Fasheh if (el->l_tree_depth) { 836ccd979bdSMark Fasheh /* jump to end of tree */ 837ccd979bdSMark Fasheh status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 838ccd979bdSMark Fasheh &last_eb_bh, OCFS2_BH_CACHED, inode); 839ccd979bdSMark Fasheh if (status < 0) { 840ccd979bdSMark Fasheh mlog_exit(status); 841ccd979bdSMark Fasheh goto bail; 842ccd979bdSMark Fasheh } 843ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 844ccd979bdSMark Fasheh el = &eb->h_list; 845ccd979bdSMark Fasheh } 846ccd979bdSMark Fasheh 847ccd979bdSMark Fasheh /* Can we allocate without adding/shifting tree bits? */ 848ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 849ccd979bdSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 0 850ccd979bdSMark Fasheh || (le16_to_cpu(el->l_next_free_rec) < le16_to_cpu(el->l_count)) 851ccd979bdSMark Fasheh || le32_to_cpu(el->l_recs[i].e_clusters) == 0 852ccd979bdSMark Fasheh || ocfs2_extent_contig(inode, &el->l_recs[i], start_blk)) 853ccd979bdSMark Fasheh goto out_add; 854ccd979bdSMark Fasheh 855ccd979bdSMark Fasheh mlog(0, "ocfs2_allocate_extent: couldn't do a simple add, traversing " 856ccd979bdSMark Fasheh "tree now.\n"); 857ccd979bdSMark Fasheh 858ccd979bdSMark Fasheh shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh); 859ccd979bdSMark Fasheh if (shift < 0) { 860ccd979bdSMark Fasheh status = shift; 861ccd979bdSMark Fasheh mlog_errno(status); 862ccd979bdSMark Fasheh goto bail; 863ccd979bdSMark Fasheh } 864ccd979bdSMark Fasheh 865ccd979bdSMark Fasheh /* We traveled all the way to the bottom of the allocation tree 866ccd979bdSMark Fasheh * and didn't find room for any more extents - we need to add 867ccd979bdSMark Fasheh * another tree level */ 868ccd979bdSMark Fasheh if (shift) { 869ccd979bdSMark Fasheh /* if we hit a leaf, we'd better be empty :) */ 870ccd979bdSMark Fasheh BUG_ON(le16_to_cpu(el->l_next_free_rec) != 871ccd979bdSMark Fasheh le16_to_cpu(el->l_count)); 872ccd979bdSMark Fasheh BUG_ON(bh); 873ccd979bdSMark Fasheh mlog(0, "ocfs2_allocate_extent: need to shift tree depth " 874ccd979bdSMark Fasheh "(current = %u)\n", 875ccd979bdSMark Fasheh le16_to_cpu(fe->id2.i_list.l_tree_depth)); 876ccd979bdSMark Fasheh 877ccd979bdSMark Fasheh /* ocfs2_shift_tree_depth will return us a buffer with 878ccd979bdSMark Fasheh * the new extent block (so we can pass that to 879ccd979bdSMark Fasheh * ocfs2_add_branch). */ 880ccd979bdSMark Fasheh status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh, 881ccd979bdSMark Fasheh meta_ac, &bh); 882ccd979bdSMark Fasheh if (status < 0) { 883ccd979bdSMark Fasheh mlog_errno(status); 884ccd979bdSMark Fasheh goto bail; 885ccd979bdSMark Fasheh } 886ccd979bdSMark Fasheh /* Special case: we have room now if we shifted from 887ccd979bdSMark Fasheh * tree_depth 0 */ 888ccd979bdSMark Fasheh if (fe->id2.i_list.l_tree_depth == cpu_to_le16(1)) 889ccd979bdSMark Fasheh goto out_add; 890ccd979bdSMark Fasheh } 891ccd979bdSMark Fasheh 892ccd979bdSMark Fasheh /* call ocfs2_add_branch to add the final part of the tree with 893ccd979bdSMark Fasheh * the new data. */ 894ccd979bdSMark Fasheh mlog(0, "ocfs2_allocate_extent: add branch. bh = %p\n", bh); 895ccd979bdSMark Fasheh status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh, 896ccd979bdSMark Fasheh meta_ac); 897ccd979bdSMark Fasheh if (status < 0) { 898ccd979bdSMark Fasheh mlog_errno(status); 899ccd979bdSMark Fasheh goto bail; 900ccd979bdSMark Fasheh } 901ccd979bdSMark Fasheh 902ccd979bdSMark Fasheh out_add: 903ccd979bdSMark Fasheh /* Finally, we can add clusters. */ 904ccd979bdSMark Fasheh status = ocfs2_do_insert_extent(osb, handle, inode, fe_bh, 905ccd979bdSMark Fasheh start_blk, new_clusters); 906ccd979bdSMark Fasheh if (status < 0) 907ccd979bdSMark Fasheh mlog_errno(status); 908ccd979bdSMark Fasheh 909ccd979bdSMark Fasheh bail: 910ccd979bdSMark Fasheh if (bh) 911ccd979bdSMark Fasheh brelse(bh); 912ccd979bdSMark Fasheh 913ccd979bdSMark Fasheh if (last_eb_bh) 914ccd979bdSMark Fasheh brelse(last_eb_bh); 915ccd979bdSMark Fasheh 916ccd979bdSMark Fasheh mlog_exit(status); 917ccd979bdSMark Fasheh return status; 918ccd979bdSMark Fasheh } 919ccd979bdSMark Fasheh 920ccd979bdSMark Fasheh static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) 921ccd979bdSMark Fasheh { 922ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 923ccd979bdSMark Fasheh struct ocfs2_dinode *di; 924ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 925ccd979bdSMark Fasheh 926ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 927ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 928ccd979bdSMark Fasheh 929ccd979bdSMark Fasheh mlog_bug_on_msg(le16_to_cpu(tl->tl_used) > le16_to_cpu(tl->tl_count), 930ccd979bdSMark Fasheh "slot %d, invalid truncate log parameters: used = " 931ccd979bdSMark Fasheh "%u, count = %u\n", osb->slot_num, 932ccd979bdSMark Fasheh le16_to_cpu(tl->tl_used), le16_to_cpu(tl->tl_count)); 933ccd979bdSMark Fasheh return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count); 934ccd979bdSMark Fasheh } 935ccd979bdSMark Fasheh 936ccd979bdSMark Fasheh static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl, 937ccd979bdSMark Fasheh unsigned int new_start) 938ccd979bdSMark Fasheh { 939ccd979bdSMark Fasheh unsigned int tail_index; 940ccd979bdSMark Fasheh unsigned int current_tail; 941ccd979bdSMark Fasheh 942ccd979bdSMark Fasheh /* No records, nothing to coalesce */ 943ccd979bdSMark Fasheh if (!le16_to_cpu(tl->tl_used)) 944ccd979bdSMark Fasheh return 0; 945ccd979bdSMark Fasheh 946ccd979bdSMark Fasheh tail_index = le16_to_cpu(tl->tl_used) - 1; 947ccd979bdSMark Fasheh current_tail = le32_to_cpu(tl->tl_recs[tail_index].t_start); 948ccd979bdSMark Fasheh current_tail += le32_to_cpu(tl->tl_recs[tail_index].t_clusters); 949ccd979bdSMark Fasheh 950ccd979bdSMark Fasheh return current_tail == new_start; 951ccd979bdSMark Fasheh } 952ccd979bdSMark Fasheh 953ccd979bdSMark Fasheh static int ocfs2_truncate_log_append(struct ocfs2_super *osb, 954ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 955ccd979bdSMark Fasheh u64 start_blk, 956ccd979bdSMark Fasheh unsigned int num_clusters) 957ccd979bdSMark Fasheh { 958ccd979bdSMark Fasheh int status, index; 959ccd979bdSMark Fasheh unsigned int start_cluster, tl_count; 960ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 961ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 962ccd979bdSMark Fasheh struct ocfs2_dinode *di; 963ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 964ccd979bdSMark Fasheh 965b0697053SMark Fasheh mlog_entry("start_blk = %llu, num_clusters = %u\n", 966b0697053SMark Fasheh (unsigned long long)start_blk, num_clusters); 967ccd979bdSMark Fasheh 9681b1dcc1bSJes Sorensen BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 969ccd979bdSMark Fasheh 970ccd979bdSMark Fasheh start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); 971ccd979bdSMark Fasheh 972ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 973ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 974ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(di)) { 975ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); 976ccd979bdSMark Fasheh status = -EIO; 977ccd979bdSMark Fasheh goto bail; 978ccd979bdSMark Fasheh } 979ccd979bdSMark Fasheh 980ccd979bdSMark Fasheh tl_count = le16_to_cpu(tl->tl_count); 981ccd979bdSMark Fasheh mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) || 982ccd979bdSMark Fasheh tl_count == 0, 983b0697053SMark Fasheh "Truncate record count on #%llu invalid " 984b0697053SMark Fasheh "wanted %u, actual %u\n", 985b0697053SMark Fasheh (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, 986ccd979bdSMark Fasheh ocfs2_truncate_recs_per_inode(osb->sb), 987ccd979bdSMark Fasheh le16_to_cpu(tl->tl_count)); 988ccd979bdSMark Fasheh 989ccd979bdSMark Fasheh /* Caller should have known to flush before calling us. */ 990ccd979bdSMark Fasheh index = le16_to_cpu(tl->tl_used); 991ccd979bdSMark Fasheh if (index >= tl_count) { 992ccd979bdSMark Fasheh status = -ENOSPC; 993ccd979bdSMark Fasheh mlog_errno(status); 994ccd979bdSMark Fasheh goto bail; 995ccd979bdSMark Fasheh } 996ccd979bdSMark Fasheh 997ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, tl_inode, tl_bh, 998ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 999ccd979bdSMark Fasheh if (status < 0) { 1000ccd979bdSMark Fasheh mlog_errno(status); 1001ccd979bdSMark Fasheh goto bail; 1002ccd979bdSMark Fasheh } 1003ccd979bdSMark Fasheh 1004ccd979bdSMark Fasheh mlog(0, "Log truncate of %u clusters starting at cluster %u to " 1005b0697053SMark Fasheh "%llu (index = %d)\n", num_clusters, start_cluster, 1006b0697053SMark Fasheh (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index); 1007ccd979bdSMark Fasheh 1008ccd979bdSMark Fasheh if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) { 1009ccd979bdSMark Fasheh /* 1010ccd979bdSMark Fasheh * Move index back to the record we are coalescing with. 1011ccd979bdSMark Fasheh * ocfs2_truncate_log_can_coalesce() guarantees nonzero 1012ccd979bdSMark Fasheh */ 1013ccd979bdSMark Fasheh index--; 1014ccd979bdSMark Fasheh 1015ccd979bdSMark Fasheh num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters); 1016ccd979bdSMark Fasheh mlog(0, "Coalesce with index %u (start = %u, clusters = %u)\n", 1017ccd979bdSMark Fasheh index, le32_to_cpu(tl->tl_recs[index].t_start), 1018ccd979bdSMark Fasheh num_clusters); 1019ccd979bdSMark Fasheh } else { 1020ccd979bdSMark Fasheh tl->tl_recs[index].t_start = cpu_to_le32(start_cluster); 1021ccd979bdSMark Fasheh tl->tl_used = cpu_to_le16(index + 1); 1022ccd979bdSMark Fasheh } 1023ccd979bdSMark Fasheh tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters); 1024ccd979bdSMark Fasheh 1025ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, tl_bh); 1026ccd979bdSMark Fasheh if (status < 0) { 1027ccd979bdSMark Fasheh mlog_errno(status); 1028ccd979bdSMark Fasheh goto bail; 1029ccd979bdSMark Fasheh } 1030ccd979bdSMark Fasheh 1031ccd979bdSMark Fasheh bail: 1032ccd979bdSMark Fasheh mlog_exit(status); 1033ccd979bdSMark Fasheh return status; 1034ccd979bdSMark Fasheh } 1035ccd979bdSMark Fasheh 1036ccd979bdSMark Fasheh static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, 1037ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 1038ccd979bdSMark Fasheh struct inode *data_alloc_inode, 1039ccd979bdSMark Fasheh struct buffer_head *data_alloc_bh) 1040ccd979bdSMark Fasheh { 1041ccd979bdSMark Fasheh int status = 0; 1042ccd979bdSMark Fasheh int i; 1043ccd979bdSMark Fasheh unsigned int num_clusters; 1044ccd979bdSMark Fasheh u64 start_blk; 1045ccd979bdSMark Fasheh struct ocfs2_truncate_rec rec; 1046ccd979bdSMark Fasheh struct ocfs2_dinode *di; 1047ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 1048ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 1049ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 1050ccd979bdSMark Fasheh 1051ccd979bdSMark Fasheh mlog_entry_void(); 1052ccd979bdSMark Fasheh 1053ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 1054ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 1055ccd979bdSMark Fasheh i = le16_to_cpu(tl->tl_used) - 1; 1056ccd979bdSMark Fasheh while (i >= 0) { 1057ccd979bdSMark Fasheh /* Caller has given us at least enough credits to 1058ccd979bdSMark Fasheh * update the truncate log dinode */ 1059ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, tl_inode, tl_bh, 1060ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1061ccd979bdSMark Fasheh if (status < 0) { 1062ccd979bdSMark Fasheh mlog_errno(status); 1063ccd979bdSMark Fasheh goto bail; 1064ccd979bdSMark Fasheh } 1065ccd979bdSMark Fasheh 1066ccd979bdSMark Fasheh tl->tl_used = cpu_to_le16(i); 1067ccd979bdSMark Fasheh 1068ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, tl_bh); 1069ccd979bdSMark Fasheh if (status < 0) { 1070ccd979bdSMark Fasheh mlog_errno(status); 1071ccd979bdSMark Fasheh goto bail; 1072ccd979bdSMark Fasheh } 1073ccd979bdSMark Fasheh 1074ccd979bdSMark Fasheh /* TODO: Perhaps we can calculate the bulk of the 1075ccd979bdSMark Fasheh * credits up front rather than extending like 1076ccd979bdSMark Fasheh * this. */ 10771fc58146SMark Fasheh status = ocfs2_extend_trans(handle->k_handle, 1078ccd979bdSMark Fasheh OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); 1079ccd979bdSMark Fasheh if (status < 0) { 1080ccd979bdSMark Fasheh mlog_errno(status); 1081ccd979bdSMark Fasheh goto bail; 1082ccd979bdSMark Fasheh } 1083ccd979bdSMark Fasheh 1084ccd979bdSMark Fasheh rec = tl->tl_recs[i]; 1085ccd979bdSMark Fasheh start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb, 1086ccd979bdSMark Fasheh le32_to_cpu(rec.t_start)); 1087ccd979bdSMark Fasheh num_clusters = le32_to_cpu(rec.t_clusters); 1088ccd979bdSMark Fasheh 1089ccd979bdSMark Fasheh /* if start_blk is not set, we ignore the record as 1090ccd979bdSMark Fasheh * invalid. */ 1091ccd979bdSMark Fasheh if (start_blk) { 1092ccd979bdSMark Fasheh mlog(0, "free record %d, start = %u, clusters = %u\n", 1093ccd979bdSMark Fasheh i, le32_to_cpu(rec.t_start), num_clusters); 1094ccd979bdSMark Fasheh 1095ccd979bdSMark Fasheh status = ocfs2_free_clusters(handle, data_alloc_inode, 1096ccd979bdSMark Fasheh data_alloc_bh, start_blk, 1097ccd979bdSMark Fasheh num_clusters); 1098ccd979bdSMark Fasheh if (status < 0) { 1099ccd979bdSMark Fasheh mlog_errno(status); 1100ccd979bdSMark Fasheh goto bail; 1101ccd979bdSMark Fasheh } 1102ccd979bdSMark Fasheh } 1103ccd979bdSMark Fasheh i--; 1104ccd979bdSMark Fasheh } 1105ccd979bdSMark Fasheh 1106ccd979bdSMark Fasheh bail: 1107ccd979bdSMark Fasheh mlog_exit(status); 1108ccd979bdSMark Fasheh return status; 1109ccd979bdSMark Fasheh } 1110ccd979bdSMark Fasheh 11111b1dcc1bSJes Sorensen /* Expects you to already be holding tl_inode->i_mutex */ 1112ccd979bdSMark Fasheh static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) 1113ccd979bdSMark Fasheh { 1114ccd979bdSMark Fasheh int status; 1115ccd979bdSMark Fasheh unsigned int num_to_flush; 1116e08dc8b9SMark Fasheh struct ocfs2_journal_handle *handle; 1117ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 1118ccd979bdSMark Fasheh struct inode *data_alloc_inode = NULL; 1119ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 1120ccd979bdSMark Fasheh struct buffer_head *data_alloc_bh = NULL; 1121ccd979bdSMark Fasheh struct ocfs2_dinode *di; 1122ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 1123ccd979bdSMark Fasheh 1124ccd979bdSMark Fasheh mlog_entry_void(); 1125ccd979bdSMark Fasheh 11261b1dcc1bSJes Sorensen BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 1127ccd979bdSMark Fasheh 1128ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 1129ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 1130ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(di)) { 1131ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); 1132ccd979bdSMark Fasheh status = -EIO; 1133e08dc8b9SMark Fasheh goto out; 1134ccd979bdSMark Fasheh } 1135ccd979bdSMark Fasheh 1136ccd979bdSMark Fasheh num_to_flush = le16_to_cpu(tl->tl_used); 1137b0697053SMark Fasheh mlog(0, "Flush %u records from truncate log #%llu\n", 1138b0697053SMark Fasheh num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno); 1139ccd979bdSMark Fasheh if (!num_to_flush) { 1140ccd979bdSMark Fasheh status = 0; 1141e08dc8b9SMark Fasheh goto out; 1142ccd979bdSMark Fasheh } 1143ccd979bdSMark Fasheh 1144ccd979bdSMark Fasheh data_alloc_inode = ocfs2_get_system_file_inode(osb, 1145ccd979bdSMark Fasheh GLOBAL_BITMAP_SYSTEM_INODE, 1146ccd979bdSMark Fasheh OCFS2_INVALID_SLOT); 1147ccd979bdSMark Fasheh if (!data_alloc_inode) { 1148ccd979bdSMark Fasheh status = -EINVAL; 1149ccd979bdSMark Fasheh mlog(ML_ERROR, "Could not get bitmap inode!\n"); 1150e08dc8b9SMark Fasheh goto out; 1151ccd979bdSMark Fasheh } 1152ccd979bdSMark Fasheh 1153e08dc8b9SMark Fasheh mutex_lock(&data_alloc_inode->i_mutex); 1154e08dc8b9SMark Fasheh 11554bcec184SMark Fasheh status = ocfs2_meta_lock(data_alloc_inode, &data_alloc_bh, 1); 1156ccd979bdSMark Fasheh if (status < 0) { 1157ccd979bdSMark Fasheh mlog_errno(status); 1158e08dc8b9SMark Fasheh goto out_mutex; 1159ccd979bdSMark Fasheh } 1160ccd979bdSMark Fasheh 1161e08dc8b9SMark Fasheh handle = ocfs2_start_trans(osb, NULL, OCFS2_TRUNCATE_LOG_UPDATE); 1162ccd979bdSMark Fasheh if (IS_ERR(handle)) { 1163ccd979bdSMark Fasheh status = PTR_ERR(handle); 1164ccd979bdSMark Fasheh mlog_errno(status); 1165e08dc8b9SMark Fasheh goto out_unlock; 1166ccd979bdSMark Fasheh } 1167ccd979bdSMark Fasheh 1168ccd979bdSMark Fasheh status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode, 1169ccd979bdSMark Fasheh data_alloc_bh); 1170e08dc8b9SMark Fasheh if (status < 0) 1171ccd979bdSMark Fasheh mlog_errno(status); 1172ccd979bdSMark Fasheh 1173*02dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle); 1174ccd979bdSMark Fasheh 1175e08dc8b9SMark Fasheh out_unlock: 1176e08dc8b9SMark Fasheh brelse(data_alloc_bh); 1177e08dc8b9SMark Fasheh ocfs2_meta_unlock(data_alloc_inode, 1); 1178e08dc8b9SMark Fasheh 1179e08dc8b9SMark Fasheh out_mutex: 1180e08dc8b9SMark Fasheh mutex_unlock(&data_alloc_inode->i_mutex); 1181ccd979bdSMark Fasheh iput(data_alloc_inode); 1182ccd979bdSMark Fasheh 1183e08dc8b9SMark Fasheh out: 1184ccd979bdSMark Fasheh mlog_exit(status); 1185ccd979bdSMark Fasheh return status; 1186ccd979bdSMark Fasheh } 1187ccd979bdSMark Fasheh 1188ccd979bdSMark Fasheh int ocfs2_flush_truncate_log(struct ocfs2_super *osb) 1189ccd979bdSMark Fasheh { 1190ccd979bdSMark Fasheh int status; 1191ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 1192ccd979bdSMark Fasheh 11931b1dcc1bSJes Sorensen mutex_lock(&tl_inode->i_mutex); 1194ccd979bdSMark Fasheh status = __ocfs2_flush_truncate_log(osb); 11951b1dcc1bSJes Sorensen mutex_unlock(&tl_inode->i_mutex); 1196ccd979bdSMark Fasheh 1197ccd979bdSMark Fasheh return status; 1198ccd979bdSMark Fasheh } 1199ccd979bdSMark Fasheh 1200ccd979bdSMark Fasheh static void ocfs2_truncate_log_worker(void *data) 1201ccd979bdSMark Fasheh { 1202ccd979bdSMark Fasheh int status; 1203ccd979bdSMark Fasheh struct ocfs2_super *osb = data; 1204ccd979bdSMark Fasheh 1205ccd979bdSMark Fasheh mlog_entry_void(); 1206ccd979bdSMark Fasheh 1207ccd979bdSMark Fasheh status = ocfs2_flush_truncate_log(osb); 1208ccd979bdSMark Fasheh if (status < 0) 1209ccd979bdSMark Fasheh mlog_errno(status); 1210ccd979bdSMark Fasheh 1211ccd979bdSMark Fasheh mlog_exit(status); 1212ccd979bdSMark Fasheh } 1213ccd979bdSMark Fasheh 1214ccd979bdSMark Fasheh #define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ) 1215ccd979bdSMark Fasheh void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb, 1216ccd979bdSMark Fasheh int cancel) 1217ccd979bdSMark Fasheh { 1218ccd979bdSMark Fasheh if (osb->osb_tl_inode) { 1219ccd979bdSMark Fasheh /* We want to push off log flushes while truncates are 1220ccd979bdSMark Fasheh * still running. */ 1221ccd979bdSMark Fasheh if (cancel) 1222ccd979bdSMark Fasheh cancel_delayed_work(&osb->osb_truncate_log_wq); 1223ccd979bdSMark Fasheh 1224ccd979bdSMark Fasheh queue_delayed_work(ocfs2_wq, &osb->osb_truncate_log_wq, 1225ccd979bdSMark Fasheh OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL); 1226ccd979bdSMark Fasheh } 1227ccd979bdSMark Fasheh } 1228ccd979bdSMark Fasheh 1229ccd979bdSMark Fasheh static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, 1230ccd979bdSMark Fasheh int slot_num, 1231ccd979bdSMark Fasheh struct inode **tl_inode, 1232ccd979bdSMark Fasheh struct buffer_head **tl_bh) 1233ccd979bdSMark Fasheh { 1234ccd979bdSMark Fasheh int status; 1235ccd979bdSMark Fasheh struct inode *inode = NULL; 1236ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 1237ccd979bdSMark Fasheh 1238ccd979bdSMark Fasheh inode = ocfs2_get_system_file_inode(osb, 1239ccd979bdSMark Fasheh TRUNCATE_LOG_SYSTEM_INODE, 1240ccd979bdSMark Fasheh slot_num); 1241ccd979bdSMark Fasheh if (!inode) { 1242ccd979bdSMark Fasheh status = -EINVAL; 1243ccd979bdSMark Fasheh mlog(ML_ERROR, "Could not get load truncate log inode!\n"); 1244ccd979bdSMark Fasheh goto bail; 1245ccd979bdSMark Fasheh } 1246ccd979bdSMark Fasheh 1247ccd979bdSMark Fasheh status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, 1248ccd979bdSMark Fasheh OCFS2_BH_CACHED, inode); 1249ccd979bdSMark Fasheh if (status < 0) { 1250ccd979bdSMark Fasheh iput(inode); 1251ccd979bdSMark Fasheh mlog_errno(status); 1252ccd979bdSMark Fasheh goto bail; 1253ccd979bdSMark Fasheh } 1254ccd979bdSMark Fasheh 1255ccd979bdSMark Fasheh *tl_inode = inode; 1256ccd979bdSMark Fasheh *tl_bh = bh; 1257ccd979bdSMark Fasheh bail: 1258ccd979bdSMark Fasheh mlog_exit(status); 1259ccd979bdSMark Fasheh return status; 1260ccd979bdSMark Fasheh } 1261ccd979bdSMark Fasheh 1262ccd979bdSMark Fasheh /* called during the 1st stage of node recovery. we stamp a clean 1263ccd979bdSMark Fasheh * truncate log and pass back a copy for processing later. if the 1264ccd979bdSMark Fasheh * truncate log does not require processing, a *tl_copy is set to 1265ccd979bdSMark Fasheh * NULL. */ 1266ccd979bdSMark Fasheh int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, 1267ccd979bdSMark Fasheh int slot_num, 1268ccd979bdSMark Fasheh struct ocfs2_dinode **tl_copy) 1269ccd979bdSMark Fasheh { 1270ccd979bdSMark Fasheh int status; 1271ccd979bdSMark Fasheh struct inode *tl_inode = NULL; 1272ccd979bdSMark Fasheh struct buffer_head *tl_bh = NULL; 1273ccd979bdSMark Fasheh struct ocfs2_dinode *di; 1274ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 1275ccd979bdSMark Fasheh 1276ccd979bdSMark Fasheh *tl_copy = NULL; 1277ccd979bdSMark Fasheh 1278ccd979bdSMark Fasheh mlog(0, "recover truncate log from slot %d\n", slot_num); 1279ccd979bdSMark Fasheh 1280ccd979bdSMark Fasheh status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh); 1281ccd979bdSMark Fasheh if (status < 0) { 1282ccd979bdSMark Fasheh mlog_errno(status); 1283ccd979bdSMark Fasheh goto bail; 1284ccd979bdSMark Fasheh } 1285ccd979bdSMark Fasheh 1286ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 1287ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 1288ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(di)) { 1289ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di); 1290ccd979bdSMark Fasheh status = -EIO; 1291ccd979bdSMark Fasheh goto bail; 1292ccd979bdSMark Fasheh } 1293ccd979bdSMark Fasheh 1294ccd979bdSMark Fasheh if (le16_to_cpu(tl->tl_used)) { 1295ccd979bdSMark Fasheh mlog(0, "We'll have %u logs to recover\n", 1296ccd979bdSMark Fasheh le16_to_cpu(tl->tl_used)); 1297ccd979bdSMark Fasheh 1298ccd979bdSMark Fasheh *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL); 1299ccd979bdSMark Fasheh if (!(*tl_copy)) { 1300ccd979bdSMark Fasheh status = -ENOMEM; 1301ccd979bdSMark Fasheh mlog_errno(status); 1302ccd979bdSMark Fasheh goto bail; 1303ccd979bdSMark Fasheh } 1304ccd979bdSMark Fasheh 1305ccd979bdSMark Fasheh /* Assuming the write-out below goes well, this copy 1306ccd979bdSMark Fasheh * will be passed back to recovery for processing. */ 1307ccd979bdSMark Fasheh memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size); 1308ccd979bdSMark Fasheh 1309ccd979bdSMark Fasheh /* All we need to do to clear the truncate log is set 1310ccd979bdSMark Fasheh * tl_used. */ 1311ccd979bdSMark Fasheh tl->tl_used = 0; 1312ccd979bdSMark Fasheh 1313ccd979bdSMark Fasheh status = ocfs2_write_block(osb, tl_bh, tl_inode); 1314ccd979bdSMark Fasheh if (status < 0) { 1315ccd979bdSMark Fasheh mlog_errno(status); 1316ccd979bdSMark Fasheh goto bail; 1317ccd979bdSMark Fasheh } 1318ccd979bdSMark Fasheh } 1319ccd979bdSMark Fasheh 1320ccd979bdSMark Fasheh bail: 1321ccd979bdSMark Fasheh if (tl_inode) 1322ccd979bdSMark Fasheh iput(tl_inode); 1323ccd979bdSMark Fasheh if (tl_bh) 1324ccd979bdSMark Fasheh brelse(tl_bh); 1325ccd979bdSMark Fasheh 1326ccd979bdSMark Fasheh if (status < 0 && (*tl_copy)) { 1327ccd979bdSMark Fasheh kfree(*tl_copy); 1328ccd979bdSMark Fasheh *tl_copy = NULL; 1329ccd979bdSMark Fasheh } 1330ccd979bdSMark Fasheh 1331ccd979bdSMark Fasheh mlog_exit(status); 1332ccd979bdSMark Fasheh return status; 1333ccd979bdSMark Fasheh } 1334ccd979bdSMark Fasheh 1335ccd979bdSMark Fasheh int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, 1336ccd979bdSMark Fasheh struct ocfs2_dinode *tl_copy) 1337ccd979bdSMark Fasheh { 1338ccd979bdSMark Fasheh int status = 0; 1339ccd979bdSMark Fasheh int i; 1340ccd979bdSMark Fasheh unsigned int clusters, num_recs, start_cluster; 1341ccd979bdSMark Fasheh u64 start_blk; 1342ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle; 1343ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 1344ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 1345ccd979bdSMark Fasheh 1346ccd979bdSMark Fasheh mlog_entry_void(); 1347ccd979bdSMark Fasheh 1348ccd979bdSMark Fasheh if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) { 1349ccd979bdSMark Fasheh mlog(ML_ERROR, "Asked to recover my own truncate log!\n"); 1350ccd979bdSMark Fasheh return -EINVAL; 1351ccd979bdSMark Fasheh } 1352ccd979bdSMark Fasheh 1353ccd979bdSMark Fasheh tl = &tl_copy->id2.i_dealloc; 1354ccd979bdSMark Fasheh num_recs = le16_to_cpu(tl->tl_used); 1355b0697053SMark Fasheh mlog(0, "cleanup %u records from %llu\n", num_recs, 1356b0697053SMark Fasheh (unsigned long long)tl_copy->i_blkno); 1357ccd979bdSMark Fasheh 13581b1dcc1bSJes Sorensen mutex_lock(&tl_inode->i_mutex); 1359ccd979bdSMark Fasheh for(i = 0; i < num_recs; i++) { 1360ccd979bdSMark Fasheh if (ocfs2_truncate_log_needs_flush(osb)) { 1361ccd979bdSMark Fasheh status = __ocfs2_flush_truncate_log(osb); 1362ccd979bdSMark Fasheh if (status < 0) { 1363ccd979bdSMark Fasheh mlog_errno(status); 1364ccd979bdSMark Fasheh goto bail_up; 1365ccd979bdSMark Fasheh } 1366ccd979bdSMark Fasheh } 1367ccd979bdSMark Fasheh 1368ccd979bdSMark Fasheh handle = ocfs2_start_trans(osb, NULL, 1369ccd979bdSMark Fasheh OCFS2_TRUNCATE_LOG_UPDATE); 1370ccd979bdSMark Fasheh if (IS_ERR(handle)) { 1371ccd979bdSMark Fasheh status = PTR_ERR(handle); 1372ccd979bdSMark Fasheh mlog_errno(status); 1373ccd979bdSMark Fasheh goto bail_up; 1374ccd979bdSMark Fasheh } 1375ccd979bdSMark Fasheh 1376ccd979bdSMark Fasheh clusters = le32_to_cpu(tl->tl_recs[i].t_clusters); 1377ccd979bdSMark Fasheh start_cluster = le32_to_cpu(tl->tl_recs[i].t_start); 1378ccd979bdSMark Fasheh start_blk = ocfs2_clusters_to_blocks(osb->sb, start_cluster); 1379ccd979bdSMark Fasheh 1380ccd979bdSMark Fasheh status = ocfs2_truncate_log_append(osb, handle, 1381ccd979bdSMark Fasheh start_blk, clusters); 1382*02dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle); 1383ccd979bdSMark Fasheh if (status < 0) { 1384ccd979bdSMark Fasheh mlog_errno(status); 1385ccd979bdSMark Fasheh goto bail_up; 1386ccd979bdSMark Fasheh } 1387ccd979bdSMark Fasheh } 1388ccd979bdSMark Fasheh 1389ccd979bdSMark Fasheh bail_up: 13901b1dcc1bSJes Sorensen mutex_unlock(&tl_inode->i_mutex); 1391ccd979bdSMark Fasheh 1392ccd979bdSMark Fasheh mlog_exit(status); 1393ccd979bdSMark Fasheh return status; 1394ccd979bdSMark Fasheh } 1395ccd979bdSMark Fasheh 1396ccd979bdSMark Fasheh void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb) 1397ccd979bdSMark Fasheh { 1398ccd979bdSMark Fasheh int status; 1399ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 1400ccd979bdSMark Fasheh 1401ccd979bdSMark Fasheh mlog_entry_void(); 1402ccd979bdSMark Fasheh 1403ccd979bdSMark Fasheh if (tl_inode) { 1404ccd979bdSMark Fasheh cancel_delayed_work(&osb->osb_truncate_log_wq); 1405ccd979bdSMark Fasheh flush_workqueue(ocfs2_wq); 1406ccd979bdSMark Fasheh 1407ccd979bdSMark Fasheh status = ocfs2_flush_truncate_log(osb); 1408ccd979bdSMark Fasheh if (status < 0) 1409ccd979bdSMark Fasheh mlog_errno(status); 1410ccd979bdSMark Fasheh 1411ccd979bdSMark Fasheh brelse(osb->osb_tl_bh); 1412ccd979bdSMark Fasheh iput(osb->osb_tl_inode); 1413ccd979bdSMark Fasheh } 1414ccd979bdSMark Fasheh 1415ccd979bdSMark Fasheh mlog_exit_void(); 1416ccd979bdSMark Fasheh } 1417ccd979bdSMark Fasheh 1418ccd979bdSMark Fasheh int ocfs2_truncate_log_init(struct ocfs2_super *osb) 1419ccd979bdSMark Fasheh { 1420ccd979bdSMark Fasheh int status; 1421ccd979bdSMark Fasheh struct inode *tl_inode = NULL; 1422ccd979bdSMark Fasheh struct buffer_head *tl_bh = NULL; 1423ccd979bdSMark Fasheh 1424ccd979bdSMark Fasheh mlog_entry_void(); 1425ccd979bdSMark Fasheh 1426ccd979bdSMark Fasheh status = ocfs2_get_truncate_log_info(osb, 1427ccd979bdSMark Fasheh osb->slot_num, 1428ccd979bdSMark Fasheh &tl_inode, 1429ccd979bdSMark Fasheh &tl_bh); 1430ccd979bdSMark Fasheh if (status < 0) 1431ccd979bdSMark Fasheh mlog_errno(status); 1432ccd979bdSMark Fasheh 1433ccd979bdSMark Fasheh /* ocfs2_truncate_log_shutdown keys on the existence of 1434ccd979bdSMark Fasheh * osb->osb_tl_inode so we don't set any of the osb variables 1435ccd979bdSMark Fasheh * until we're sure all is well. */ 1436ccd979bdSMark Fasheh INIT_WORK(&osb->osb_truncate_log_wq, ocfs2_truncate_log_worker, osb); 1437ccd979bdSMark Fasheh osb->osb_tl_bh = tl_bh; 1438ccd979bdSMark Fasheh osb->osb_tl_inode = tl_inode; 1439ccd979bdSMark Fasheh 1440ccd979bdSMark Fasheh mlog_exit(status); 1441ccd979bdSMark Fasheh return status; 1442ccd979bdSMark Fasheh } 1443ccd979bdSMark Fasheh 1444ccd979bdSMark Fasheh /* This function will figure out whether the currently last extent 1445ccd979bdSMark Fasheh * block will be deleted, and if it will, what the new last extent 1446ccd979bdSMark Fasheh * block will be so we can update his h_next_leaf_blk field, as well 1447ccd979bdSMark Fasheh * as the dinodes i_last_eb_blk */ 1448ccd979bdSMark Fasheh static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb, 1449ccd979bdSMark Fasheh struct inode *inode, 1450ccd979bdSMark Fasheh struct ocfs2_dinode *fe, 1451ccd979bdSMark Fasheh u32 new_i_clusters, 1452ccd979bdSMark Fasheh struct buffer_head *old_last_eb, 1453ccd979bdSMark Fasheh struct buffer_head **new_last_eb) 1454ccd979bdSMark Fasheh { 1455ccd979bdSMark Fasheh int i, status = 0; 1456ccd979bdSMark Fasheh u64 block = 0; 1457ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 1458ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 1459ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 1460ccd979bdSMark Fasheh 1461ccd979bdSMark Fasheh *new_last_eb = NULL; 1462ccd979bdSMark Fasheh 1463ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(fe)) { 1464ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 1465ccd979bdSMark Fasheh status = -EIO; 1466ccd979bdSMark Fasheh goto bail; 1467ccd979bdSMark Fasheh } 1468ccd979bdSMark Fasheh 1469ccd979bdSMark Fasheh /* we have no tree, so of course, no last_eb. */ 1470ccd979bdSMark Fasheh if (!fe->id2.i_list.l_tree_depth) 1471ccd979bdSMark Fasheh goto bail; 1472ccd979bdSMark Fasheh 1473ccd979bdSMark Fasheh /* trunc to zero special case - this makes tree_depth = 0 1474ccd979bdSMark Fasheh * regardless of what it is. */ 1475ccd979bdSMark Fasheh if (!new_i_clusters) 1476ccd979bdSMark Fasheh goto bail; 1477ccd979bdSMark Fasheh 1478ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) old_last_eb->b_data; 1479ccd979bdSMark Fasheh el = &(eb->h_list); 1480ccd979bdSMark Fasheh BUG_ON(!el->l_next_free_rec); 1481ccd979bdSMark Fasheh 1482ccd979bdSMark Fasheh /* Make sure that this guy will actually be empty after we 1483ccd979bdSMark Fasheh * clear away the data. */ 1484ccd979bdSMark Fasheh if (le32_to_cpu(el->l_recs[0].e_cpos) < new_i_clusters) 1485ccd979bdSMark Fasheh goto bail; 1486ccd979bdSMark Fasheh 1487ccd979bdSMark Fasheh /* Ok, at this point, we know that last_eb will definitely 1488ccd979bdSMark Fasheh * change, so lets traverse the tree and find the second to 1489ccd979bdSMark Fasheh * last extent block. */ 1490ccd979bdSMark Fasheh el = &(fe->id2.i_list); 1491ccd979bdSMark Fasheh /* go down the tree, */ 1492ccd979bdSMark Fasheh do { 1493ccd979bdSMark Fasheh for(i = (le16_to_cpu(el->l_next_free_rec) - 1); i >= 0; i--) { 1494ccd979bdSMark Fasheh if (le32_to_cpu(el->l_recs[i].e_cpos) < 1495ccd979bdSMark Fasheh new_i_clusters) { 1496ccd979bdSMark Fasheh block = le64_to_cpu(el->l_recs[i].e_blkno); 1497ccd979bdSMark Fasheh break; 1498ccd979bdSMark Fasheh } 1499ccd979bdSMark Fasheh } 1500ccd979bdSMark Fasheh BUG_ON(i < 0); 1501ccd979bdSMark Fasheh 1502ccd979bdSMark Fasheh if (bh) { 1503ccd979bdSMark Fasheh brelse(bh); 1504ccd979bdSMark Fasheh bh = NULL; 1505ccd979bdSMark Fasheh } 1506ccd979bdSMark Fasheh 1507ccd979bdSMark Fasheh status = ocfs2_read_block(osb, block, &bh, OCFS2_BH_CACHED, 1508ccd979bdSMark Fasheh inode); 1509ccd979bdSMark Fasheh if (status < 0) { 1510ccd979bdSMark Fasheh mlog_errno(status); 1511ccd979bdSMark Fasheh goto bail; 1512ccd979bdSMark Fasheh } 1513ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 1514ccd979bdSMark Fasheh el = &eb->h_list; 1515ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1516ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1517ccd979bdSMark Fasheh status = -EIO; 1518ccd979bdSMark Fasheh goto bail; 1519ccd979bdSMark Fasheh } 1520ccd979bdSMark Fasheh } while (el->l_tree_depth); 1521ccd979bdSMark Fasheh 1522ccd979bdSMark Fasheh *new_last_eb = bh; 1523ccd979bdSMark Fasheh get_bh(*new_last_eb); 1524b0697053SMark Fasheh mlog(0, "returning block %llu\n", 1525b0697053SMark Fasheh (unsigned long long)le64_to_cpu(eb->h_blkno)); 1526ccd979bdSMark Fasheh bail: 1527ccd979bdSMark Fasheh if (bh) 1528ccd979bdSMark Fasheh brelse(bh); 1529ccd979bdSMark Fasheh 1530ccd979bdSMark Fasheh return status; 1531ccd979bdSMark Fasheh } 1532ccd979bdSMark Fasheh 1533ccd979bdSMark Fasheh static int ocfs2_do_truncate(struct ocfs2_super *osb, 1534ccd979bdSMark Fasheh unsigned int clusters_to_del, 1535ccd979bdSMark Fasheh struct inode *inode, 1536ccd979bdSMark Fasheh struct buffer_head *fe_bh, 1537ccd979bdSMark Fasheh struct buffer_head *old_last_eb_bh, 1538ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle, 1539ccd979bdSMark Fasheh struct ocfs2_truncate_context *tc) 1540ccd979bdSMark Fasheh { 1541ccd979bdSMark Fasheh int status, i, depth; 1542ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 1543ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 1544ccd979bdSMark Fasheh struct ocfs2_extent_block *last_eb = NULL; 1545ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 1546ccd979bdSMark Fasheh struct buffer_head *eb_bh = NULL; 1547ccd979bdSMark Fasheh struct buffer_head *last_eb_bh = NULL; 1548ccd979bdSMark Fasheh u64 next_eb = 0; 1549ccd979bdSMark Fasheh u64 delete_blk = 0; 1550ccd979bdSMark Fasheh 1551ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 1552ccd979bdSMark Fasheh 1553ccd979bdSMark Fasheh status = ocfs2_find_new_last_ext_blk(osb, 1554ccd979bdSMark Fasheh inode, 1555ccd979bdSMark Fasheh fe, 1556ccd979bdSMark Fasheh le32_to_cpu(fe->i_clusters) - 1557ccd979bdSMark Fasheh clusters_to_del, 1558ccd979bdSMark Fasheh old_last_eb_bh, 1559ccd979bdSMark Fasheh &last_eb_bh); 1560ccd979bdSMark Fasheh if (status < 0) { 1561ccd979bdSMark Fasheh mlog_errno(status); 1562ccd979bdSMark Fasheh goto bail; 1563ccd979bdSMark Fasheh } 1564ccd979bdSMark Fasheh if (last_eb_bh) 1565ccd979bdSMark Fasheh last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 1566ccd979bdSMark Fasheh 1567ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, fe_bh, 1568ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1569ccd979bdSMark Fasheh if (status < 0) { 1570ccd979bdSMark Fasheh mlog_errno(status); 1571ccd979bdSMark Fasheh goto bail; 1572ccd979bdSMark Fasheh } 1573ccd979bdSMark Fasheh el = &(fe->id2.i_list); 1574ccd979bdSMark Fasheh 1575ccd979bdSMark Fasheh spin_lock(&OCFS2_I(inode)->ip_lock); 1576ccd979bdSMark Fasheh OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - 1577ccd979bdSMark Fasheh clusters_to_del; 1578ccd979bdSMark Fasheh spin_unlock(&OCFS2_I(inode)->ip_lock); 1579ccd979bdSMark Fasheh le32_add_cpu(&fe->i_clusters, -clusters_to_del); 1580ccd979bdSMark Fasheh fe->i_mtime = cpu_to_le64(CURRENT_TIME.tv_sec); 1581ccd979bdSMark Fasheh fe->i_mtime_nsec = cpu_to_le32(CURRENT_TIME.tv_nsec); 1582ccd979bdSMark Fasheh 1583ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 1584ccd979bdSMark Fasheh 1585ccd979bdSMark Fasheh BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del); 1586ccd979bdSMark Fasheh le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del); 1587ccd979bdSMark Fasheh /* tree depth zero, we can just delete the clusters, otherwise 1588ccd979bdSMark Fasheh * we need to record the offset of the next level extent block 1589ccd979bdSMark Fasheh * as we may overwrite it. */ 1590ccd979bdSMark Fasheh if (!el->l_tree_depth) 1591ccd979bdSMark Fasheh delete_blk = le64_to_cpu(el->l_recs[i].e_blkno) 1592ccd979bdSMark Fasheh + ocfs2_clusters_to_blocks(osb->sb, 1593ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_clusters)); 1594ccd979bdSMark Fasheh else 1595ccd979bdSMark Fasheh next_eb = le64_to_cpu(el->l_recs[i].e_blkno); 1596ccd979bdSMark Fasheh 1597ccd979bdSMark Fasheh if (!el->l_recs[i].e_clusters) { 1598ccd979bdSMark Fasheh /* if we deleted the whole extent record, then clear 1599ccd979bdSMark Fasheh * out the other fields and update the extent 1600ccd979bdSMark Fasheh * list. For depth > 0 trees, we've already recorded 1601ccd979bdSMark Fasheh * the extent block in 'next_eb' */ 1602ccd979bdSMark Fasheh el->l_recs[i].e_cpos = 0; 1603ccd979bdSMark Fasheh el->l_recs[i].e_blkno = 0; 1604ccd979bdSMark Fasheh BUG_ON(!el->l_next_free_rec); 1605ccd979bdSMark Fasheh le16_add_cpu(&el->l_next_free_rec, -1); 1606ccd979bdSMark Fasheh } 1607ccd979bdSMark Fasheh 1608ccd979bdSMark Fasheh depth = le16_to_cpu(el->l_tree_depth); 1609ccd979bdSMark Fasheh if (!fe->i_clusters) { 1610ccd979bdSMark Fasheh /* trunc to zero is a special case. */ 1611ccd979bdSMark Fasheh el->l_tree_depth = 0; 1612ccd979bdSMark Fasheh fe->i_last_eb_blk = 0; 1613ccd979bdSMark Fasheh } else if (last_eb) 1614ccd979bdSMark Fasheh fe->i_last_eb_blk = last_eb->h_blkno; 1615ccd979bdSMark Fasheh 1616ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, fe_bh); 1617ccd979bdSMark Fasheh if (status < 0) { 1618ccd979bdSMark Fasheh mlog_errno(status); 1619ccd979bdSMark Fasheh goto bail; 1620ccd979bdSMark Fasheh } 1621ccd979bdSMark Fasheh 1622ccd979bdSMark Fasheh if (last_eb) { 1623ccd979bdSMark Fasheh /* If there will be a new last extent block, then by 1624ccd979bdSMark Fasheh * definition, there cannot be any leaves to the right of 1625ccd979bdSMark Fasheh * him. */ 1626ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, last_eb_bh, 1627ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1628ccd979bdSMark Fasheh if (status < 0) { 1629ccd979bdSMark Fasheh mlog_errno(status); 1630ccd979bdSMark Fasheh goto bail; 1631ccd979bdSMark Fasheh } 1632ccd979bdSMark Fasheh last_eb->h_next_leaf_blk = 0; 1633ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, last_eb_bh); 1634ccd979bdSMark Fasheh if (status < 0) { 1635ccd979bdSMark Fasheh mlog_errno(status); 1636ccd979bdSMark Fasheh goto bail; 1637ccd979bdSMark Fasheh } 1638ccd979bdSMark Fasheh } 1639ccd979bdSMark Fasheh 1640ccd979bdSMark Fasheh /* if our tree depth > 0, update all the tree blocks below us. */ 1641ccd979bdSMark Fasheh while (depth) { 1642b0697053SMark Fasheh mlog(0, "traveling tree (depth = %d, next_eb = %llu)\n", 1643b0697053SMark Fasheh depth, (unsigned long long)next_eb); 1644ccd979bdSMark Fasheh status = ocfs2_read_block(osb, next_eb, &eb_bh, 1645ccd979bdSMark Fasheh OCFS2_BH_CACHED, inode); 1646ccd979bdSMark Fasheh if (status < 0) { 1647ccd979bdSMark Fasheh mlog_errno(status); 1648ccd979bdSMark Fasheh goto bail; 1649ccd979bdSMark Fasheh } 1650ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *)eb_bh->b_data; 1651ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1652ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1653ccd979bdSMark Fasheh status = -EIO; 1654ccd979bdSMark Fasheh goto bail; 1655ccd979bdSMark Fasheh } 1656ccd979bdSMark Fasheh el = &(eb->h_list); 1657ccd979bdSMark Fasheh 1658ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, eb_bh, 1659ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1660ccd979bdSMark Fasheh if (status < 0) { 1661ccd979bdSMark Fasheh mlog_errno(status); 1662ccd979bdSMark Fasheh goto bail; 1663ccd979bdSMark Fasheh } 1664ccd979bdSMark Fasheh 1665ccd979bdSMark Fasheh BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); 1666ccd979bdSMark Fasheh BUG_ON(depth != (le16_to_cpu(el->l_tree_depth) + 1)); 1667ccd979bdSMark Fasheh 1668ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 1669ccd979bdSMark Fasheh 1670b0697053SMark Fasheh mlog(0, "extent block %llu, before: record %d: " 1671b0697053SMark Fasheh "(%u, %u, %llu), next = %u\n", 1672b0697053SMark Fasheh (unsigned long long)le64_to_cpu(eb->h_blkno), i, 1673ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_cpos), 1674ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_clusters), 1675b0697053SMark Fasheh (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno), 1676ccd979bdSMark Fasheh le16_to_cpu(el->l_next_free_rec)); 1677ccd979bdSMark Fasheh 1678ccd979bdSMark Fasheh BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del); 1679ccd979bdSMark Fasheh le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del); 1680ccd979bdSMark Fasheh 1681ccd979bdSMark Fasheh next_eb = le64_to_cpu(el->l_recs[i].e_blkno); 1682ccd979bdSMark Fasheh /* bottom-most block requires us to delete data.*/ 1683ccd979bdSMark Fasheh if (!el->l_tree_depth) 1684ccd979bdSMark Fasheh delete_blk = le64_to_cpu(el->l_recs[i].e_blkno) 1685ccd979bdSMark Fasheh + ocfs2_clusters_to_blocks(osb->sb, 1686ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_clusters)); 1687ccd979bdSMark Fasheh if (!el->l_recs[i].e_clusters) { 1688ccd979bdSMark Fasheh el->l_recs[i].e_cpos = 0; 1689ccd979bdSMark Fasheh el->l_recs[i].e_blkno = 0; 1690ccd979bdSMark Fasheh BUG_ON(!el->l_next_free_rec); 1691ccd979bdSMark Fasheh le16_add_cpu(&el->l_next_free_rec, -1); 1692ccd979bdSMark Fasheh } 1693b0697053SMark Fasheh mlog(0, "extent block %llu, after: record %d: " 1694b0697053SMark Fasheh "(%u, %u, %llu), next = %u\n", 1695b0697053SMark Fasheh (unsigned long long)le64_to_cpu(eb->h_blkno), i, 1696ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_cpos), 1697ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_clusters), 1698b0697053SMark Fasheh (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno), 1699ccd979bdSMark Fasheh le16_to_cpu(el->l_next_free_rec)); 1700ccd979bdSMark Fasheh 1701ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, eb_bh); 1702ccd979bdSMark Fasheh if (status < 0) { 1703ccd979bdSMark Fasheh mlog_errno(status); 1704ccd979bdSMark Fasheh goto bail; 1705ccd979bdSMark Fasheh } 1706ccd979bdSMark Fasheh 1707ccd979bdSMark Fasheh if (!el->l_next_free_rec) { 1708ccd979bdSMark Fasheh mlog(0, "deleting this extent block.\n"); 1709ccd979bdSMark Fasheh 1710ccd979bdSMark Fasheh ocfs2_remove_from_cache(inode, eb_bh); 1711ccd979bdSMark Fasheh 1712ccd979bdSMark Fasheh BUG_ON(el->l_recs[0].e_clusters); 1713ccd979bdSMark Fasheh BUG_ON(el->l_recs[0].e_cpos); 1714ccd979bdSMark Fasheh BUG_ON(el->l_recs[0].e_blkno); 1715eb35746cSMark Fasheh if (eb->h_suballoc_slot == 0) { 1716eb35746cSMark Fasheh /* 1717eb35746cSMark Fasheh * This code only understands how to 1718eb35746cSMark Fasheh * lock the suballocator in slot 0, 1719eb35746cSMark Fasheh * which is fine because allocation is 1720eb35746cSMark Fasheh * only ever done out of that 1721eb35746cSMark Fasheh * suballocator too. A future version 1722eb35746cSMark Fasheh * might change that however, so avoid 1723eb35746cSMark Fasheh * a free if we don't know how to 1724eb35746cSMark Fasheh * handle it. This way an fs incompat 1725eb35746cSMark Fasheh * bit will not be necessary. 1726eb35746cSMark Fasheh */ 1727ccd979bdSMark Fasheh status = ocfs2_free_extent_block(handle, 1728ccd979bdSMark Fasheh tc->tc_ext_alloc_inode, 1729ccd979bdSMark Fasheh tc->tc_ext_alloc_bh, 1730ccd979bdSMark Fasheh eb); 1731ccd979bdSMark Fasheh if (status < 0) { 1732ccd979bdSMark Fasheh mlog_errno(status); 1733ccd979bdSMark Fasheh goto bail; 1734ccd979bdSMark Fasheh } 1735ccd979bdSMark Fasheh } 1736eb35746cSMark Fasheh } 1737ccd979bdSMark Fasheh brelse(eb_bh); 1738ccd979bdSMark Fasheh eb_bh = NULL; 1739ccd979bdSMark Fasheh depth--; 1740ccd979bdSMark Fasheh } 1741ccd979bdSMark Fasheh 1742ccd979bdSMark Fasheh BUG_ON(!delete_blk); 1743ccd979bdSMark Fasheh status = ocfs2_truncate_log_append(osb, handle, delete_blk, 1744ccd979bdSMark Fasheh clusters_to_del); 1745ccd979bdSMark Fasheh if (status < 0) { 1746ccd979bdSMark Fasheh mlog_errno(status); 1747ccd979bdSMark Fasheh goto bail; 1748ccd979bdSMark Fasheh } 1749ccd979bdSMark Fasheh status = 0; 1750ccd979bdSMark Fasheh bail: 1751ccd979bdSMark Fasheh if (!status) 1752ccd979bdSMark Fasheh ocfs2_extent_map_trunc(inode, le32_to_cpu(fe->i_clusters)); 1753ccd979bdSMark Fasheh else 1754ccd979bdSMark Fasheh ocfs2_extent_map_drop(inode, 0); 1755ccd979bdSMark Fasheh mlog_exit(status); 1756ccd979bdSMark Fasheh return status; 1757ccd979bdSMark Fasheh } 1758ccd979bdSMark Fasheh 1759ccd979bdSMark Fasheh /* 1760ccd979bdSMark Fasheh * It is expected, that by the time you call this function, 1761ccd979bdSMark Fasheh * inode->i_size and fe->i_size have been adjusted. 1762ccd979bdSMark Fasheh * 1763ccd979bdSMark Fasheh * WARNING: This will kfree the truncate context 1764ccd979bdSMark Fasheh */ 1765ccd979bdSMark Fasheh int ocfs2_commit_truncate(struct ocfs2_super *osb, 1766ccd979bdSMark Fasheh struct inode *inode, 1767ccd979bdSMark Fasheh struct buffer_head *fe_bh, 1768ccd979bdSMark Fasheh struct ocfs2_truncate_context *tc) 1769ccd979bdSMark Fasheh { 1770ccd979bdSMark Fasheh int status, i, credits, tl_sem = 0; 1771ccd979bdSMark Fasheh u32 clusters_to_del, target_i_clusters; 1772ccd979bdSMark Fasheh u64 last_eb = 0; 1773ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 1774ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 1775ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 1776ccd979bdSMark Fasheh struct buffer_head *last_eb_bh; 1777ccd979bdSMark Fasheh struct ocfs2_journal_handle *handle = NULL; 1778ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 1779ccd979bdSMark Fasheh 1780ccd979bdSMark Fasheh mlog_entry_void(); 1781ccd979bdSMark Fasheh 1782ccd979bdSMark Fasheh down_write(&OCFS2_I(inode)->ip_alloc_sem); 1783ccd979bdSMark Fasheh 1784ccd979bdSMark Fasheh target_i_clusters = ocfs2_clusters_for_bytes(osb->sb, 1785ccd979bdSMark Fasheh i_size_read(inode)); 1786ccd979bdSMark Fasheh 1787ccd979bdSMark Fasheh last_eb_bh = tc->tc_last_eb_bh; 1788ccd979bdSMark Fasheh tc->tc_last_eb_bh = NULL; 1789ccd979bdSMark Fasheh 1790ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 1791ccd979bdSMark Fasheh 1792ccd979bdSMark Fasheh if (fe->id2.i_list.l_tree_depth) { 1793ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 1794ccd979bdSMark Fasheh el = &eb->h_list; 1795ccd979bdSMark Fasheh } else 1796ccd979bdSMark Fasheh el = &fe->id2.i_list; 1797ccd979bdSMark Fasheh last_eb = le64_to_cpu(fe->i_last_eb_blk); 1798ccd979bdSMark Fasheh start: 1799ccd979bdSMark Fasheh mlog(0, "ocfs2_commit_truncate: fe->i_clusters = %u, " 1800b0697053SMark Fasheh "last_eb = %llu, fe->i_last_eb_blk = %llu, " 1801ccd979bdSMark Fasheh "fe->id2.i_list.l_tree_depth = %u last_eb_bh = %p\n", 1802b0697053SMark Fasheh le32_to_cpu(fe->i_clusters), (unsigned long long)last_eb, 1803b0697053SMark Fasheh (unsigned long long)le64_to_cpu(fe->i_last_eb_blk), 1804ccd979bdSMark Fasheh le16_to_cpu(fe->id2.i_list.l_tree_depth), last_eb_bh); 1805ccd979bdSMark Fasheh 1806ccd979bdSMark Fasheh if (last_eb != le64_to_cpu(fe->i_last_eb_blk)) { 1807ccd979bdSMark Fasheh mlog(0, "last_eb changed!\n"); 1808ccd979bdSMark Fasheh BUG_ON(!fe->id2.i_list.l_tree_depth); 1809ccd979bdSMark Fasheh last_eb = le64_to_cpu(fe->i_last_eb_blk); 1810ccd979bdSMark Fasheh /* i_last_eb_blk may have changed, read it if 1811ccd979bdSMark Fasheh * necessary. We don't have to worry about the 1812ccd979bdSMark Fasheh * truncate to zero case here (where there becomes no 1813ccd979bdSMark Fasheh * last_eb) because we never loop back after our work 1814ccd979bdSMark Fasheh * is done. */ 1815ccd979bdSMark Fasheh if (last_eb_bh) { 1816ccd979bdSMark Fasheh brelse(last_eb_bh); 1817ccd979bdSMark Fasheh last_eb_bh = NULL; 1818ccd979bdSMark Fasheh } 1819ccd979bdSMark Fasheh 1820ccd979bdSMark Fasheh status = ocfs2_read_block(osb, last_eb, 1821ccd979bdSMark Fasheh &last_eb_bh, OCFS2_BH_CACHED, 1822ccd979bdSMark Fasheh inode); 1823ccd979bdSMark Fasheh if (status < 0) { 1824ccd979bdSMark Fasheh mlog_errno(status); 1825ccd979bdSMark Fasheh goto bail; 1826ccd979bdSMark Fasheh } 1827ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 1828ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1829ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1830ccd979bdSMark Fasheh status = -EIO; 1831ccd979bdSMark Fasheh goto bail; 1832ccd979bdSMark Fasheh } 1833ccd979bdSMark Fasheh el = &(eb->h_list); 1834ccd979bdSMark Fasheh } 1835ccd979bdSMark Fasheh 1836ccd979bdSMark Fasheh /* by now, el will point to the extent list on the bottom most 1837ccd979bdSMark Fasheh * portion of this tree. */ 1838ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 1839ccd979bdSMark Fasheh if (le32_to_cpu(el->l_recs[i].e_cpos) >= target_i_clusters) 1840ccd979bdSMark Fasheh clusters_to_del = le32_to_cpu(el->l_recs[i].e_clusters); 1841ccd979bdSMark Fasheh else 1842ccd979bdSMark Fasheh clusters_to_del = (le32_to_cpu(el->l_recs[i].e_clusters) + 1843ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_cpos)) - 1844ccd979bdSMark Fasheh target_i_clusters; 1845ccd979bdSMark Fasheh 1846ccd979bdSMark Fasheh mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del); 1847ccd979bdSMark Fasheh 18481b1dcc1bSJes Sorensen mutex_lock(&tl_inode->i_mutex); 1849ccd979bdSMark Fasheh tl_sem = 1; 1850ccd979bdSMark Fasheh /* ocfs2_truncate_log_needs_flush guarantees us at least one 1851ccd979bdSMark Fasheh * record is free for use. If there isn't any, we flush to get 1852ccd979bdSMark Fasheh * an empty truncate log. */ 1853ccd979bdSMark Fasheh if (ocfs2_truncate_log_needs_flush(osb)) { 1854ccd979bdSMark Fasheh status = __ocfs2_flush_truncate_log(osb); 1855ccd979bdSMark Fasheh if (status < 0) { 1856ccd979bdSMark Fasheh mlog_errno(status); 1857ccd979bdSMark Fasheh goto bail; 1858ccd979bdSMark Fasheh } 1859ccd979bdSMark Fasheh } 1860ccd979bdSMark Fasheh 1861ccd979bdSMark Fasheh credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, 1862ccd979bdSMark Fasheh fe, el); 1863ccd979bdSMark Fasheh handle = ocfs2_start_trans(osb, NULL, credits); 1864ccd979bdSMark Fasheh if (IS_ERR(handle)) { 1865ccd979bdSMark Fasheh status = PTR_ERR(handle); 1866ccd979bdSMark Fasheh handle = NULL; 1867ccd979bdSMark Fasheh mlog_errno(status); 1868ccd979bdSMark Fasheh goto bail; 1869ccd979bdSMark Fasheh } 1870ccd979bdSMark Fasheh 1871ccd979bdSMark Fasheh inode->i_ctime = inode->i_mtime = CURRENT_TIME; 1872ccd979bdSMark Fasheh status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); 1873ccd979bdSMark Fasheh if (status < 0) 1874ccd979bdSMark Fasheh mlog_errno(status); 1875ccd979bdSMark Fasheh 1876ccd979bdSMark Fasheh status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, 1877ccd979bdSMark Fasheh last_eb_bh, handle, tc); 1878ccd979bdSMark Fasheh if (status < 0) { 1879ccd979bdSMark Fasheh mlog_errno(status); 1880ccd979bdSMark Fasheh goto bail; 1881ccd979bdSMark Fasheh } 1882ccd979bdSMark Fasheh 18831b1dcc1bSJes Sorensen mutex_unlock(&tl_inode->i_mutex); 1884ccd979bdSMark Fasheh tl_sem = 0; 1885ccd979bdSMark Fasheh 1886*02dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle); 1887ccd979bdSMark Fasheh handle = NULL; 1888ccd979bdSMark Fasheh 1889ccd979bdSMark Fasheh BUG_ON(le32_to_cpu(fe->i_clusters) < target_i_clusters); 1890ccd979bdSMark Fasheh if (le32_to_cpu(fe->i_clusters) > target_i_clusters) 1891ccd979bdSMark Fasheh goto start; 1892ccd979bdSMark Fasheh bail: 1893ccd979bdSMark Fasheh up_write(&OCFS2_I(inode)->ip_alloc_sem); 1894ccd979bdSMark Fasheh 1895ccd979bdSMark Fasheh ocfs2_schedule_truncate_log_flush(osb, 1); 1896ccd979bdSMark Fasheh 1897ccd979bdSMark Fasheh if (tl_sem) 18981b1dcc1bSJes Sorensen mutex_unlock(&tl_inode->i_mutex); 1899ccd979bdSMark Fasheh 1900ccd979bdSMark Fasheh if (handle) 1901*02dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle); 1902ccd979bdSMark Fasheh 1903ccd979bdSMark Fasheh if (last_eb_bh) 1904ccd979bdSMark Fasheh brelse(last_eb_bh); 1905ccd979bdSMark Fasheh 1906ccd979bdSMark Fasheh /* This will drop the ext_alloc cluster lock for us */ 1907ccd979bdSMark Fasheh ocfs2_free_truncate_context(tc); 1908ccd979bdSMark Fasheh 1909ccd979bdSMark Fasheh mlog_exit(status); 1910ccd979bdSMark Fasheh return status; 1911ccd979bdSMark Fasheh } 1912ccd979bdSMark Fasheh 1913ccd979bdSMark Fasheh 1914ccd979bdSMark Fasheh /* 1915ccd979bdSMark Fasheh * Expects the inode to already be locked. This will figure out which 1916ccd979bdSMark Fasheh * inodes need to be locked and will put them on the returned truncate 1917ccd979bdSMark Fasheh * context. 1918ccd979bdSMark Fasheh */ 1919ccd979bdSMark Fasheh int ocfs2_prepare_truncate(struct ocfs2_super *osb, 1920ccd979bdSMark Fasheh struct inode *inode, 1921ccd979bdSMark Fasheh struct buffer_head *fe_bh, 1922ccd979bdSMark Fasheh struct ocfs2_truncate_context **tc) 1923ccd979bdSMark Fasheh { 1924ccd979bdSMark Fasheh int status, metadata_delete; 1925ccd979bdSMark Fasheh unsigned int new_i_clusters; 1926ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 1927ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 1928ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 1929ccd979bdSMark Fasheh struct buffer_head *last_eb_bh = NULL; 1930ccd979bdSMark Fasheh struct inode *ext_alloc_inode = NULL; 1931ccd979bdSMark Fasheh struct buffer_head *ext_alloc_bh = NULL; 1932ccd979bdSMark Fasheh 1933ccd979bdSMark Fasheh mlog_entry_void(); 1934ccd979bdSMark Fasheh 1935ccd979bdSMark Fasheh *tc = NULL; 1936ccd979bdSMark Fasheh 1937ccd979bdSMark Fasheh new_i_clusters = ocfs2_clusters_for_bytes(osb->sb, 1938ccd979bdSMark Fasheh i_size_read(inode)); 1939ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 1940ccd979bdSMark Fasheh 1941ccd979bdSMark Fasheh mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size =" 1942b0697053SMark Fasheh "%llu\n", fe->i_clusters, new_i_clusters, 1943b0697053SMark Fasheh (unsigned long long)fe->i_size); 1944ccd979bdSMark Fasheh 1945ccd979bdSMark Fasheh if (le32_to_cpu(fe->i_clusters) <= new_i_clusters) { 1946b0697053SMark Fasheh ocfs2_error(inode->i_sb, "Dinode %llu has cluster count " 1947b0697053SMark Fasheh "%u and size %llu whereas struct inode has " 1948ccd979bdSMark Fasheh "cluster count %u and size %llu which caused an " 1949ccd979bdSMark Fasheh "invalid truncate to %u clusters.", 1950b0697053SMark Fasheh (unsigned long long)le64_to_cpu(fe->i_blkno), 1951ccd979bdSMark Fasheh le32_to_cpu(fe->i_clusters), 1952b0697053SMark Fasheh (unsigned long long)le64_to_cpu(fe->i_size), 1953ccd979bdSMark Fasheh OCFS2_I(inode)->ip_clusters, i_size_read(inode), 1954ccd979bdSMark Fasheh new_i_clusters); 1955ccd979bdSMark Fasheh mlog_meta_lvb(ML_ERROR, &OCFS2_I(inode)->ip_meta_lockres); 1956ccd979bdSMark Fasheh status = -EIO; 1957ccd979bdSMark Fasheh goto bail; 1958ccd979bdSMark Fasheh } 1959ccd979bdSMark Fasheh 1960ccd979bdSMark Fasheh *tc = kcalloc(1, sizeof(struct ocfs2_truncate_context), GFP_KERNEL); 1961ccd979bdSMark Fasheh if (!(*tc)) { 1962ccd979bdSMark Fasheh status = -ENOMEM; 1963ccd979bdSMark Fasheh mlog_errno(status); 1964ccd979bdSMark Fasheh goto bail; 1965ccd979bdSMark Fasheh } 1966ccd979bdSMark Fasheh 1967ccd979bdSMark Fasheh metadata_delete = 0; 1968ccd979bdSMark Fasheh if (fe->id2.i_list.l_tree_depth) { 1969ccd979bdSMark Fasheh /* If we have a tree, then the truncate may result in 1970ccd979bdSMark Fasheh * metadata deletes. Figure this out from the 1971ccd979bdSMark Fasheh * rightmost leaf block.*/ 1972ccd979bdSMark Fasheh status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 1973ccd979bdSMark Fasheh &last_eb_bh, OCFS2_BH_CACHED, inode); 1974ccd979bdSMark Fasheh if (status < 0) { 1975ccd979bdSMark Fasheh mlog_errno(status); 1976ccd979bdSMark Fasheh goto bail; 1977ccd979bdSMark Fasheh } 1978ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 1979ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1980ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1981ccd979bdSMark Fasheh 1982ccd979bdSMark Fasheh brelse(last_eb_bh); 1983ccd979bdSMark Fasheh status = -EIO; 1984ccd979bdSMark Fasheh goto bail; 1985ccd979bdSMark Fasheh } 1986ccd979bdSMark Fasheh el = &(eb->h_list); 1987ccd979bdSMark Fasheh if (le32_to_cpu(el->l_recs[0].e_cpos) >= new_i_clusters) 1988ccd979bdSMark Fasheh metadata_delete = 1; 1989ccd979bdSMark Fasheh } 1990ccd979bdSMark Fasheh 1991ccd979bdSMark Fasheh (*tc)->tc_last_eb_bh = last_eb_bh; 1992ccd979bdSMark Fasheh 1993ccd979bdSMark Fasheh if (metadata_delete) { 1994ccd979bdSMark Fasheh mlog(0, "Will have to delete metadata for this trunc. " 1995ccd979bdSMark Fasheh "locking allocator.\n"); 1996ccd979bdSMark Fasheh ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0); 1997ccd979bdSMark Fasheh if (!ext_alloc_inode) { 1998ccd979bdSMark Fasheh status = -ENOMEM; 1999ccd979bdSMark Fasheh mlog_errno(status); 2000ccd979bdSMark Fasheh goto bail; 2001ccd979bdSMark Fasheh } 2002ccd979bdSMark Fasheh 20031b1dcc1bSJes Sorensen mutex_lock(&ext_alloc_inode->i_mutex); 2004ccd979bdSMark Fasheh (*tc)->tc_ext_alloc_inode = ext_alloc_inode; 2005ccd979bdSMark Fasheh 20064bcec184SMark Fasheh status = ocfs2_meta_lock(ext_alloc_inode, &ext_alloc_bh, 1); 2007ccd979bdSMark Fasheh if (status < 0) { 2008ccd979bdSMark Fasheh mlog_errno(status); 2009ccd979bdSMark Fasheh goto bail; 2010ccd979bdSMark Fasheh } 2011ccd979bdSMark Fasheh (*tc)->tc_ext_alloc_bh = ext_alloc_bh; 2012ccd979bdSMark Fasheh (*tc)->tc_ext_alloc_locked = 1; 2013ccd979bdSMark Fasheh } 2014ccd979bdSMark Fasheh 2015ccd979bdSMark Fasheh status = 0; 2016ccd979bdSMark Fasheh bail: 2017ccd979bdSMark Fasheh if (status < 0) { 2018ccd979bdSMark Fasheh if (*tc) 2019ccd979bdSMark Fasheh ocfs2_free_truncate_context(*tc); 2020ccd979bdSMark Fasheh *tc = NULL; 2021ccd979bdSMark Fasheh } 2022ccd979bdSMark Fasheh mlog_exit_void(); 2023ccd979bdSMark Fasheh return status; 2024ccd979bdSMark Fasheh } 2025ccd979bdSMark Fasheh 2026ccd979bdSMark Fasheh static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) 2027ccd979bdSMark Fasheh { 2028ccd979bdSMark Fasheh if (tc->tc_ext_alloc_inode) { 2029ccd979bdSMark Fasheh if (tc->tc_ext_alloc_locked) 2030ccd979bdSMark Fasheh ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1); 2031ccd979bdSMark Fasheh 20321b1dcc1bSJes Sorensen mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex); 2033ccd979bdSMark Fasheh iput(tc->tc_ext_alloc_inode); 2034ccd979bdSMark Fasheh } 2035ccd979bdSMark Fasheh 2036ccd979bdSMark Fasheh if (tc->tc_ext_alloc_bh) 2037ccd979bdSMark Fasheh brelse(tc->tc_ext_alloc_bh); 2038ccd979bdSMark Fasheh 2039ccd979bdSMark Fasheh if (tc->tc_last_eb_bh) 2040ccd979bdSMark Fasheh brelse(tc->tc_last_eb_bh); 2041ccd979bdSMark Fasheh 2042ccd979bdSMark Fasheh kfree(tc); 2043ccd979bdSMark Fasheh } 2044