1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*- 2ccd979bdSMark Fasheh * vim: noexpandtab sw=8 ts=8 sts=0: 3ccd979bdSMark Fasheh * 4ccd979bdSMark Fasheh * alloc.c 5ccd979bdSMark Fasheh * 6ccd979bdSMark Fasheh * Extent allocs and frees 7ccd979bdSMark Fasheh * 8ccd979bdSMark Fasheh * Copyright (C) 2002, 2004 Oracle. All rights reserved. 9ccd979bdSMark Fasheh * 10ccd979bdSMark Fasheh * This program is free software; you can redistribute it and/or 11ccd979bdSMark Fasheh * modify it under the terms of the GNU General Public 12ccd979bdSMark Fasheh * License as published by the Free Software Foundation; either 13ccd979bdSMark Fasheh * version 2 of the License, or (at your option) any later version. 14ccd979bdSMark Fasheh * 15ccd979bdSMark Fasheh * This program is distributed in the hope that it will be useful, 16ccd979bdSMark Fasheh * but WITHOUT ANY WARRANTY; without even the implied warranty of 17ccd979bdSMark Fasheh * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18ccd979bdSMark Fasheh * General Public License for more details. 19ccd979bdSMark Fasheh * 20ccd979bdSMark Fasheh * You should have received a copy of the GNU General Public 21ccd979bdSMark Fasheh * License along with this program; if not, write to the 22ccd979bdSMark Fasheh * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23ccd979bdSMark Fasheh * Boston, MA 021110-1307, USA. 24ccd979bdSMark Fasheh */ 25ccd979bdSMark Fasheh 26ccd979bdSMark Fasheh #include <linux/fs.h> 27ccd979bdSMark Fasheh #include <linux/types.h> 28ccd979bdSMark Fasheh #include <linux/slab.h> 29ccd979bdSMark Fasheh #include <linux/highmem.h> 3060b11392SMark Fasheh #include <linux/swap.h> 31ccd979bdSMark Fasheh 32ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DISK_ALLOC 33ccd979bdSMark Fasheh #include <cluster/masklog.h> 34ccd979bdSMark Fasheh 35ccd979bdSMark Fasheh #include "ocfs2.h" 36ccd979bdSMark Fasheh 37ccd979bdSMark Fasheh #include "alloc.h" 3860b11392SMark Fasheh #include "aops.h" 39ccd979bdSMark Fasheh #include "dlmglue.h" 40ccd979bdSMark Fasheh #include "extent_map.h" 41ccd979bdSMark Fasheh #include "inode.h" 42ccd979bdSMark Fasheh #include "journal.h" 43ccd979bdSMark Fasheh #include "localalloc.h" 44ccd979bdSMark Fasheh #include "suballoc.h" 45ccd979bdSMark Fasheh #include "sysfile.h" 46ccd979bdSMark Fasheh #include "file.h" 47ccd979bdSMark Fasheh #include "super.h" 48ccd979bdSMark Fasheh #include "uptodate.h" 49ccd979bdSMark Fasheh 50ccd979bdSMark Fasheh #include "buffer_head_io.h" 51ccd979bdSMark Fasheh 52ccd979bdSMark Fasheh static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); 53ccd979bdSMark Fasheh 54dcd0538fSMark Fasheh /* 55dcd0538fSMark Fasheh * Structures which describe a path through a btree, and functions to 56dcd0538fSMark Fasheh * manipulate them. 57dcd0538fSMark Fasheh * 58dcd0538fSMark Fasheh * The idea here is to be as generic as possible with the tree 59dcd0538fSMark Fasheh * manipulation code. 60dcd0538fSMark Fasheh */ 61dcd0538fSMark Fasheh struct ocfs2_path_item { 62dcd0538fSMark Fasheh struct buffer_head *bh; 63dcd0538fSMark Fasheh struct ocfs2_extent_list *el; 64dcd0538fSMark Fasheh }; 65dcd0538fSMark Fasheh 66dcd0538fSMark Fasheh #define OCFS2_MAX_PATH_DEPTH 5 67dcd0538fSMark Fasheh 68dcd0538fSMark Fasheh struct ocfs2_path { 69dcd0538fSMark Fasheh int p_tree_depth; 70dcd0538fSMark Fasheh struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH]; 71dcd0538fSMark Fasheh }; 72dcd0538fSMark Fasheh 73dcd0538fSMark Fasheh #define path_root_bh(_path) ((_path)->p_node[0].bh) 74dcd0538fSMark Fasheh #define path_root_el(_path) ((_path)->p_node[0].el) 75dcd0538fSMark Fasheh #define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh) 76dcd0538fSMark Fasheh #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el) 77dcd0538fSMark Fasheh #define path_num_items(_path) ((_path)->p_tree_depth + 1) 78dcd0538fSMark Fasheh 79dcd0538fSMark Fasheh /* 80dcd0538fSMark Fasheh * Reset the actual path elements so that we can re-use the structure 81dcd0538fSMark Fasheh * to build another path. Generally, this involves freeing the buffer 82dcd0538fSMark Fasheh * heads. 83dcd0538fSMark Fasheh */ 84dcd0538fSMark Fasheh static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root) 85dcd0538fSMark Fasheh { 86dcd0538fSMark Fasheh int i, start = 0, depth = 0; 87dcd0538fSMark Fasheh struct ocfs2_path_item *node; 88dcd0538fSMark Fasheh 89dcd0538fSMark Fasheh if (keep_root) 90dcd0538fSMark Fasheh start = 1; 91dcd0538fSMark Fasheh 92dcd0538fSMark Fasheh for(i = start; i < path_num_items(path); i++) { 93dcd0538fSMark Fasheh node = &path->p_node[i]; 94dcd0538fSMark Fasheh 95dcd0538fSMark Fasheh brelse(node->bh); 96dcd0538fSMark Fasheh node->bh = NULL; 97dcd0538fSMark Fasheh node->el = NULL; 98dcd0538fSMark Fasheh } 99dcd0538fSMark Fasheh 100dcd0538fSMark Fasheh /* 101dcd0538fSMark Fasheh * Tree depth may change during truncate, or insert. If we're 102dcd0538fSMark Fasheh * keeping the root extent list, then make sure that our path 103dcd0538fSMark Fasheh * structure reflects the proper depth. 104dcd0538fSMark Fasheh */ 105dcd0538fSMark Fasheh if (keep_root) 106dcd0538fSMark Fasheh depth = le16_to_cpu(path_root_el(path)->l_tree_depth); 107dcd0538fSMark Fasheh 108dcd0538fSMark Fasheh path->p_tree_depth = depth; 109dcd0538fSMark Fasheh } 110dcd0538fSMark Fasheh 111dcd0538fSMark Fasheh static void ocfs2_free_path(struct ocfs2_path *path) 112dcd0538fSMark Fasheh { 113dcd0538fSMark Fasheh if (path) { 114dcd0538fSMark Fasheh ocfs2_reinit_path(path, 0); 115dcd0538fSMark Fasheh kfree(path); 116dcd0538fSMark Fasheh } 117dcd0538fSMark Fasheh } 118dcd0538fSMark Fasheh 119dcd0538fSMark Fasheh /* 120dcd0538fSMark Fasheh * Make the *dest path the same as src and re-initialize src path to 121dcd0538fSMark Fasheh * have a root only. 122dcd0538fSMark Fasheh */ 123dcd0538fSMark Fasheh static void ocfs2_mv_path(struct ocfs2_path *dest, struct ocfs2_path *src) 124dcd0538fSMark Fasheh { 125dcd0538fSMark Fasheh int i; 126dcd0538fSMark Fasheh 127dcd0538fSMark Fasheh BUG_ON(path_root_bh(dest) != path_root_bh(src)); 128dcd0538fSMark Fasheh 129dcd0538fSMark Fasheh for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) { 130dcd0538fSMark Fasheh brelse(dest->p_node[i].bh); 131dcd0538fSMark Fasheh 132dcd0538fSMark Fasheh dest->p_node[i].bh = src->p_node[i].bh; 133dcd0538fSMark Fasheh dest->p_node[i].el = src->p_node[i].el; 134dcd0538fSMark Fasheh 135dcd0538fSMark Fasheh src->p_node[i].bh = NULL; 136dcd0538fSMark Fasheh src->p_node[i].el = NULL; 137dcd0538fSMark Fasheh } 138dcd0538fSMark Fasheh } 139dcd0538fSMark Fasheh 140dcd0538fSMark Fasheh /* 141dcd0538fSMark Fasheh * Insert an extent block at given index. 142dcd0538fSMark Fasheh * 143dcd0538fSMark Fasheh * This will not take an additional reference on eb_bh. 144dcd0538fSMark Fasheh */ 145dcd0538fSMark Fasheh static inline void ocfs2_path_insert_eb(struct ocfs2_path *path, int index, 146dcd0538fSMark Fasheh struct buffer_head *eb_bh) 147dcd0538fSMark Fasheh { 148dcd0538fSMark Fasheh struct ocfs2_extent_block *eb = (struct ocfs2_extent_block *)eb_bh->b_data; 149dcd0538fSMark Fasheh 150dcd0538fSMark Fasheh /* 151dcd0538fSMark Fasheh * Right now, no root bh is an extent block, so this helps 152dcd0538fSMark Fasheh * catch code errors with dinode trees. The assertion can be 153dcd0538fSMark Fasheh * safely removed if we ever need to insert extent block 154dcd0538fSMark Fasheh * structures at the root. 155dcd0538fSMark Fasheh */ 156dcd0538fSMark Fasheh BUG_ON(index == 0); 157dcd0538fSMark Fasheh 158dcd0538fSMark Fasheh path->p_node[index].bh = eb_bh; 159dcd0538fSMark Fasheh path->p_node[index].el = &eb->h_list; 160dcd0538fSMark Fasheh } 161dcd0538fSMark Fasheh 162dcd0538fSMark Fasheh static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, 163dcd0538fSMark Fasheh struct ocfs2_extent_list *root_el) 164dcd0538fSMark Fasheh { 165dcd0538fSMark Fasheh struct ocfs2_path *path; 166dcd0538fSMark Fasheh 167dcd0538fSMark Fasheh BUG_ON(le16_to_cpu(root_el->l_tree_depth) >= OCFS2_MAX_PATH_DEPTH); 168dcd0538fSMark Fasheh 169dcd0538fSMark Fasheh path = kzalloc(sizeof(*path), GFP_NOFS); 170dcd0538fSMark Fasheh if (path) { 171dcd0538fSMark Fasheh path->p_tree_depth = le16_to_cpu(root_el->l_tree_depth); 172dcd0538fSMark Fasheh get_bh(root_bh); 173dcd0538fSMark Fasheh path_root_bh(path) = root_bh; 174dcd0538fSMark Fasheh path_root_el(path) = root_el; 175dcd0538fSMark Fasheh } 176dcd0538fSMark Fasheh 177dcd0538fSMark Fasheh return path; 178dcd0538fSMark Fasheh } 179dcd0538fSMark Fasheh 180dcd0538fSMark Fasheh /* 181dcd0538fSMark Fasheh * Allocate and initialize a new path based on a disk inode tree. 182dcd0538fSMark Fasheh */ 183dcd0538fSMark Fasheh static struct ocfs2_path *ocfs2_new_inode_path(struct buffer_head *di_bh) 184dcd0538fSMark Fasheh { 185dcd0538fSMark Fasheh struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 186dcd0538fSMark Fasheh struct ocfs2_extent_list *el = &di->id2.i_list; 187dcd0538fSMark Fasheh 188dcd0538fSMark Fasheh return ocfs2_new_path(di_bh, el); 189dcd0538fSMark Fasheh } 190dcd0538fSMark Fasheh 191dcd0538fSMark Fasheh /* 192dcd0538fSMark Fasheh * Convenience function to journal all components in a path. 193dcd0538fSMark Fasheh */ 194dcd0538fSMark Fasheh static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, 195dcd0538fSMark Fasheh struct ocfs2_path *path) 196dcd0538fSMark Fasheh { 197dcd0538fSMark Fasheh int i, ret = 0; 198dcd0538fSMark Fasheh 199dcd0538fSMark Fasheh if (!path) 200dcd0538fSMark Fasheh goto out; 201dcd0538fSMark Fasheh 202dcd0538fSMark Fasheh for(i = 0; i < path_num_items(path); i++) { 203dcd0538fSMark Fasheh ret = ocfs2_journal_access(handle, inode, path->p_node[i].bh, 204dcd0538fSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 205dcd0538fSMark Fasheh if (ret < 0) { 206dcd0538fSMark Fasheh mlog_errno(ret); 207dcd0538fSMark Fasheh goto out; 208dcd0538fSMark Fasheh } 209dcd0538fSMark Fasheh } 210dcd0538fSMark Fasheh 211dcd0538fSMark Fasheh out: 212dcd0538fSMark Fasheh return ret; 213dcd0538fSMark Fasheh } 214dcd0538fSMark Fasheh 215dcd0538fSMark Fasheh enum ocfs2_contig_type { 216dcd0538fSMark Fasheh CONTIG_NONE = 0, 217dcd0538fSMark Fasheh CONTIG_LEFT, 218dcd0538fSMark Fasheh CONTIG_RIGHT 219dcd0538fSMark Fasheh }; 220dcd0538fSMark Fasheh 221dcd0538fSMark Fasheh static int ocfs2_block_extent_contig(struct super_block *sb, 222ccd979bdSMark Fasheh struct ocfs2_extent_rec *ext, 223ccd979bdSMark Fasheh u64 blkno) 224ccd979bdSMark Fasheh { 225ccd979bdSMark Fasheh return blkno == (le64_to_cpu(ext->e_blkno) + 226dcd0538fSMark Fasheh ocfs2_clusters_to_blocks(sb, 227ccd979bdSMark Fasheh le32_to_cpu(ext->e_clusters))); 228ccd979bdSMark Fasheh } 229ccd979bdSMark Fasheh 230dcd0538fSMark Fasheh static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left, 231dcd0538fSMark Fasheh struct ocfs2_extent_rec *right) 232dcd0538fSMark Fasheh { 233dcd0538fSMark Fasheh return (le32_to_cpu(left->e_cpos) + le32_to_cpu(left->e_clusters) == 234dcd0538fSMark Fasheh le32_to_cpu(right->e_cpos)); 235dcd0538fSMark Fasheh } 236dcd0538fSMark Fasheh 237dcd0538fSMark Fasheh static enum ocfs2_contig_type 238dcd0538fSMark Fasheh ocfs2_extent_contig(struct inode *inode, 239dcd0538fSMark Fasheh struct ocfs2_extent_rec *ext, 240dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec) 241dcd0538fSMark Fasheh { 242dcd0538fSMark Fasheh u64 blkno = le64_to_cpu(insert_rec->e_blkno); 243dcd0538fSMark Fasheh 244dcd0538fSMark Fasheh if (ocfs2_extents_adjacent(ext, insert_rec) && 245dcd0538fSMark Fasheh ocfs2_block_extent_contig(inode->i_sb, ext, blkno)) 246dcd0538fSMark Fasheh return CONTIG_RIGHT; 247dcd0538fSMark Fasheh 248dcd0538fSMark Fasheh blkno = le64_to_cpu(ext->e_blkno); 249dcd0538fSMark Fasheh if (ocfs2_extents_adjacent(insert_rec, ext) && 250dcd0538fSMark Fasheh ocfs2_block_extent_contig(inode->i_sb, insert_rec, blkno)) 251dcd0538fSMark Fasheh return CONTIG_LEFT; 252dcd0538fSMark Fasheh 253dcd0538fSMark Fasheh return CONTIG_NONE; 254dcd0538fSMark Fasheh } 255dcd0538fSMark Fasheh 256dcd0538fSMark Fasheh /* 257dcd0538fSMark Fasheh * NOTE: We can have pretty much any combination of contiguousness and 258dcd0538fSMark Fasheh * appending. 259dcd0538fSMark Fasheh * 260dcd0538fSMark Fasheh * The usefulness of APPEND_TAIL is more in that it lets us know that 261dcd0538fSMark Fasheh * we'll have to update the path to that leaf. 262dcd0538fSMark Fasheh */ 263dcd0538fSMark Fasheh enum ocfs2_append_type { 264dcd0538fSMark Fasheh APPEND_NONE = 0, 265dcd0538fSMark Fasheh APPEND_TAIL, 266dcd0538fSMark Fasheh }; 267dcd0538fSMark Fasheh 268dcd0538fSMark Fasheh struct ocfs2_insert_type { 269dcd0538fSMark Fasheh enum ocfs2_append_type ins_appending; 270dcd0538fSMark Fasheh enum ocfs2_contig_type ins_contig; 271dcd0538fSMark Fasheh int ins_contig_index; 272dcd0538fSMark Fasheh int ins_free_records; 273dcd0538fSMark Fasheh int ins_tree_depth; 274dcd0538fSMark Fasheh }; 275dcd0538fSMark Fasheh 276ccd979bdSMark Fasheh /* 277ccd979bdSMark Fasheh * How many free extents have we got before we need more meta data? 278ccd979bdSMark Fasheh */ 279ccd979bdSMark Fasheh int ocfs2_num_free_extents(struct ocfs2_super *osb, 280ccd979bdSMark Fasheh struct inode *inode, 281ccd979bdSMark Fasheh struct ocfs2_dinode *fe) 282ccd979bdSMark Fasheh { 283ccd979bdSMark Fasheh int retval; 284ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 285ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 286ccd979bdSMark Fasheh struct buffer_head *eb_bh = NULL; 287ccd979bdSMark Fasheh 288ccd979bdSMark Fasheh mlog_entry_void(); 289ccd979bdSMark Fasheh 290ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(fe)) { 291ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 292ccd979bdSMark Fasheh retval = -EIO; 293ccd979bdSMark Fasheh goto bail; 294ccd979bdSMark Fasheh } 295ccd979bdSMark Fasheh 296ccd979bdSMark Fasheh if (fe->i_last_eb_blk) { 297ccd979bdSMark Fasheh retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 298ccd979bdSMark Fasheh &eb_bh, OCFS2_BH_CACHED, inode); 299ccd979bdSMark Fasheh if (retval < 0) { 300ccd979bdSMark Fasheh mlog_errno(retval); 301ccd979bdSMark Fasheh goto bail; 302ccd979bdSMark Fasheh } 303ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) eb_bh->b_data; 304ccd979bdSMark Fasheh el = &eb->h_list; 305ccd979bdSMark Fasheh } else 306ccd979bdSMark Fasheh el = &fe->id2.i_list; 307ccd979bdSMark Fasheh 308ccd979bdSMark Fasheh BUG_ON(el->l_tree_depth != 0); 309ccd979bdSMark Fasheh 310ccd979bdSMark Fasheh retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); 311ccd979bdSMark Fasheh bail: 312ccd979bdSMark Fasheh if (eb_bh) 313ccd979bdSMark Fasheh brelse(eb_bh); 314ccd979bdSMark Fasheh 315ccd979bdSMark Fasheh mlog_exit(retval); 316ccd979bdSMark Fasheh return retval; 317ccd979bdSMark Fasheh } 318ccd979bdSMark Fasheh 319ccd979bdSMark Fasheh /* expects array to already be allocated 320ccd979bdSMark Fasheh * 321ccd979bdSMark Fasheh * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and 322ccd979bdSMark Fasheh * l_count for you 323ccd979bdSMark Fasheh */ 324ccd979bdSMark Fasheh static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, 3251fabe148SMark Fasheh handle_t *handle, 326ccd979bdSMark Fasheh struct inode *inode, 327ccd979bdSMark Fasheh int wanted, 328ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac, 329ccd979bdSMark Fasheh struct buffer_head *bhs[]) 330ccd979bdSMark Fasheh { 331ccd979bdSMark Fasheh int count, status, i; 332ccd979bdSMark Fasheh u16 suballoc_bit_start; 333ccd979bdSMark Fasheh u32 num_got; 334ccd979bdSMark Fasheh u64 first_blkno; 335ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 336ccd979bdSMark Fasheh 337ccd979bdSMark Fasheh mlog_entry_void(); 338ccd979bdSMark Fasheh 339ccd979bdSMark Fasheh count = 0; 340ccd979bdSMark Fasheh while (count < wanted) { 341ccd979bdSMark Fasheh status = ocfs2_claim_metadata(osb, 342ccd979bdSMark Fasheh handle, 343ccd979bdSMark Fasheh meta_ac, 344ccd979bdSMark Fasheh wanted - count, 345ccd979bdSMark Fasheh &suballoc_bit_start, 346ccd979bdSMark Fasheh &num_got, 347ccd979bdSMark Fasheh &first_blkno); 348ccd979bdSMark Fasheh if (status < 0) { 349ccd979bdSMark Fasheh mlog_errno(status); 350ccd979bdSMark Fasheh goto bail; 351ccd979bdSMark Fasheh } 352ccd979bdSMark Fasheh 353ccd979bdSMark Fasheh for(i = count; i < (num_got + count); i++) { 354ccd979bdSMark Fasheh bhs[i] = sb_getblk(osb->sb, first_blkno); 355ccd979bdSMark Fasheh if (bhs[i] == NULL) { 356ccd979bdSMark Fasheh status = -EIO; 357ccd979bdSMark Fasheh mlog_errno(status); 358ccd979bdSMark Fasheh goto bail; 359ccd979bdSMark Fasheh } 360ccd979bdSMark Fasheh ocfs2_set_new_buffer_uptodate(inode, bhs[i]); 361ccd979bdSMark Fasheh 362ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, bhs[i], 363ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE); 364ccd979bdSMark Fasheh if (status < 0) { 365ccd979bdSMark Fasheh mlog_errno(status); 366ccd979bdSMark Fasheh goto bail; 367ccd979bdSMark Fasheh } 368ccd979bdSMark Fasheh 369ccd979bdSMark Fasheh memset(bhs[i]->b_data, 0, osb->sb->s_blocksize); 370ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bhs[i]->b_data; 371ccd979bdSMark Fasheh /* Ok, setup the minimal stuff here. */ 372ccd979bdSMark Fasheh strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); 373ccd979bdSMark Fasheh eb->h_blkno = cpu_to_le64(first_blkno); 374ccd979bdSMark Fasheh eb->h_fs_generation = cpu_to_le32(osb->fs_generation); 375ccd979bdSMark Fasheh 376ccd979bdSMark Fasheh #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS 377ccd979bdSMark Fasheh /* we always use slot zero's suballocator */ 378ccd979bdSMark Fasheh eb->h_suballoc_slot = 0; 379ccd979bdSMark Fasheh #else 380ccd979bdSMark Fasheh eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); 381ccd979bdSMark Fasheh #endif 382ccd979bdSMark Fasheh eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); 383ccd979bdSMark Fasheh eb->h_list.l_count = 384ccd979bdSMark Fasheh cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); 385ccd979bdSMark Fasheh 386ccd979bdSMark Fasheh suballoc_bit_start++; 387ccd979bdSMark Fasheh first_blkno++; 388ccd979bdSMark Fasheh 389ccd979bdSMark Fasheh /* We'll also be dirtied by the caller, so 390ccd979bdSMark Fasheh * this isn't absolutely necessary. */ 391ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, bhs[i]); 392ccd979bdSMark Fasheh if (status < 0) { 393ccd979bdSMark Fasheh mlog_errno(status); 394ccd979bdSMark Fasheh goto bail; 395ccd979bdSMark Fasheh } 396ccd979bdSMark Fasheh } 397ccd979bdSMark Fasheh 398ccd979bdSMark Fasheh count += num_got; 399ccd979bdSMark Fasheh } 400ccd979bdSMark Fasheh 401ccd979bdSMark Fasheh status = 0; 402ccd979bdSMark Fasheh bail: 403ccd979bdSMark Fasheh if (status < 0) { 404ccd979bdSMark Fasheh for(i = 0; i < wanted; i++) { 405ccd979bdSMark Fasheh if (bhs[i]) 406ccd979bdSMark Fasheh brelse(bhs[i]); 407ccd979bdSMark Fasheh bhs[i] = NULL; 408ccd979bdSMark Fasheh } 409ccd979bdSMark Fasheh } 410ccd979bdSMark Fasheh mlog_exit(status); 411ccd979bdSMark Fasheh return status; 412ccd979bdSMark Fasheh } 413ccd979bdSMark Fasheh 414ccd979bdSMark Fasheh /* 415dcd0538fSMark Fasheh * Helper function for ocfs2_add_branch() and ocfs2_shift_tree_depth(). 416dcd0538fSMark Fasheh * 417dcd0538fSMark Fasheh * Returns the sum of the rightmost extent rec logical offset and 418dcd0538fSMark Fasheh * cluster count. 419dcd0538fSMark Fasheh * 420dcd0538fSMark Fasheh * ocfs2_add_branch() uses this to determine what logical cluster 421dcd0538fSMark Fasheh * value should be populated into the leftmost new branch records. 422dcd0538fSMark Fasheh * 423dcd0538fSMark Fasheh * ocfs2_shift_tree_depth() uses this to determine the # clusters 424dcd0538fSMark Fasheh * value for the new topmost tree record. 425dcd0538fSMark Fasheh */ 426dcd0538fSMark Fasheh static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el) 427dcd0538fSMark Fasheh { 428dcd0538fSMark Fasheh int i; 429dcd0538fSMark Fasheh 430dcd0538fSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 431dcd0538fSMark Fasheh 432dcd0538fSMark Fasheh return le32_to_cpu(el->l_recs[i].e_cpos) + 433dcd0538fSMark Fasheh le32_to_cpu(el->l_recs[i].e_clusters); 434dcd0538fSMark Fasheh } 435dcd0538fSMark Fasheh 436dcd0538fSMark Fasheh /* 437ccd979bdSMark Fasheh * Add an entire tree branch to our inode. eb_bh is the extent block 438ccd979bdSMark Fasheh * to start at, if we don't want to start the branch at the dinode 439ccd979bdSMark Fasheh * structure. 440ccd979bdSMark Fasheh * 441ccd979bdSMark Fasheh * last_eb_bh is required as we have to update it's next_leaf pointer 442ccd979bdSMark Fasheh * for the new last extent block. 443ccd979bdSMark Fasheh * 444ccd979bdSMark Fasheh * the new branch will be 'empty' in the sense that every block will 445ccd979bdSMark Fasheh * contain a single record with e_clusters == 0. 446ccd979bdSMark Fasheh */ 447ccd979bdSMark Fasheh static int ocfs2_add_branch(struct ocfs2_super *osb, 4481fabe148SMark Fasheh handle_t *handle, 449ccd979bdSMark Fasheh struct inode *inode, 450ccd979bdSMark Fasheh struct buffer_head *fe_bh, 451ccd979bdSMark Fasheh struct buffer_head *eb_bh, 452ccd979bdSMark Fasheh struct buffer_head *last_eb_bh, 453ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac) 454ccd979bdSMark Fasheh { 455ccd979bdSMark Fasheh int status, new_blocks, i; 456ccd979bdSMark Fasheh u64 next_blkno, new_last_eb_blk; 457ccd979bdSMark Fasheh struct buffer_head *bh; 458ccd979bdSMark Fasheh struct buffer_head **new_eb_bhs = NULL; 459ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 460ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 461ccd979bdSMark Fasheh struct ocfs2_extent_list *eb_el; 462ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 463dcd0538fSMark Fasheh u32 new_cpos; 464ccd979bdSMark Fasheh 465ccd979bdSMark Fasheh mlog_entry_void(); 466ccd979bdSMark Fasheh 467ccd979bdSMark Fasheh BUG_ON(!last_eb_bh); 468ccd979bdSMark Fasheh 469ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 470ccd979bdSMark Fasheh 471ccd979bdSMark Fasheh if (eb_bh) { 472ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) eb_bh->b_data; 473ccd979bdSMark Fasheh el = &eb->h_list; 474ccd979bdSMark Fasheh } else 475ccd979bdSMark Fasheh el = &fe->id2.i_list; 476ccd979bdSMark Fasheh 477ccd979bdSMark Fasheh /* we never add a branch to a leaf. */ 478ccd979bdSMark Fasheh BUG_ON(!el->l_tree_depth); 479ccd979bdSMark Fasheh 480ccd979bdSMark Fasheh new_blocks = le16_to_cpu(el->l_tree_depth); 481ccd979bdSMark Fasheh 482ccd979bdSMark Fasheh /* allocate the number of new eb blocks we need */ 483ccd979bdSMark Fasheh new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *), 484ccd979bdSMark Fasheh GFP_KERNEL); 485ccd979bdSMark Fasheh if (!new_eb_bhs) { 486ccd979bdSMark Fasheh status = -ENOMEM; 487ccd979bdSMark Fasheh mlog_errno(status); 488ccd979bdSMark Fasheh goto bail; 489ccd979bdSMark Fasheh } 490ccd979bdSMark Fasheh 491ccd979bdSMark Fasheh status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks, 492ccd979bdSMark Fasheh meta_ac, new_eb_bhs); 493ccd979bdSMark Fasheh if (status < 0) { 494ccd979bdSMark Fasheh mlog_errno(status); 495ccd979bdSMark Fasheh goto bail; 496ccd979bdSMark Fasheh } 497ccd979bdSMark Fasheh 498dcd0538fSMark Fasheh eb = (struct ocfs2_extent_block *)last_eb_bh->b_data; 499dcd0538fSMark Fasheh new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); 500dcd0538fSMark Fasheh 501ccd979bdSMark Fasheh /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be 502ccd979bdSMark Fasheh * linked with the rest of the tree. 503ccd979bdSMark Fasheh * conversly, new_eb_bhs[0] is the new bottommost leaf. 504ccd979bdSMark Fasheh * 505ccd979bdSMark Fasheh * when we leave the loop, new_last_eb_blk will point to the 506ccd979bdSMark Fasheh * newest leaf, and next_blkno will point to the topmost extent 507ccd979bdSMark Fasheh * block. */ 508ccd979bdSMark Fasheh next_blkno = new_last_eb_blk = 0; 509ccd979bdSMark Fasheh for(i = 0; i < new_blocks; i++) { 510ccd979bdSMark Fasheh bh = new_eb_bhs[i]; 511ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 512ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 513ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 514ccd979bdSMark Fasheh status = -EIO; 515ccd979bdSMark Fasheh goto bail; 516ccd979bdSMark Fasheh } 517ccd979bdSMark Fasheh eb_el = &eb->h_list; 518ccd979bdSMark Fasheh 519ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, bh, 520ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE); 521ccd979bdSMark Fasheh if (status < 0) { 522ccd979bdSMark Fasheh mlog_errno(status); 523ccd979bdSMark Fasheh goto bail; 524ccd979bdSMark Fasheh } 525ccd979bdSMark Fasheh 526ccd979bdSMark Fasheh eb->h_next_leaf_blk = 0; 527ccd979bdSMark Fasheh eb_el->l_tree_depth = cpu_to_le16(i); 528ccd979bdSMark Fasheh eb_el->l_next_free_rec = cpu_to_le16(1); 529dcd0538fSMark Fasheh /* 530dcd0538fSMark Fasheh * This actually counts as an empty extent as 531dcd0538fSMark Fasheh * c_clusters == 0 532dcd0538fSMark Fasheh */ 533dcd0538fSMark Fasheh eb_el->l_recs[0].e_cpos = cpu_to_le32(new_cpos); 534ccd979bdSMark Fasheh eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno); 535ccd979bdSMark Fasheh eb_el->l_recs[0].e_clusters = cpu_to_le32(0); 536ccd979bdSMark Fasheh if (!eb_el->l_tree_depth) 537ccd979bdSMark Fasheh new_last_eb_blk = le64_to_cpu(eb->h_blkno); 538ccd979bdSMark Fasheh 539ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, bh); 540ccd979bdSMark Fasheh if (status < 0) { 541ccd979bdSMark Fasheh mlog_errno(status); 542ccd979bdSMark Fasheh goto bail; 543ccd979bdSMark Fasheh } 544ccd979bdSMark Fasheh 545ccd979bdSMark Fasheh next_blkno = le64_to_cpu(eb->h_blkno); 546ccd979bdSMark Fasheh } 547ccd979bdSMark Fasheh 548ccd979bdSMark Fasheh /* This is a bit hairy. We want to update up to three blocks 549ccd979bdSMark Fasheh * here without leaving any of them in an inconsistent state 550ccd979bdSMark Fasheh * in case of error. We don't have to worry about 551ccd979bdSMark Fasheh * journal_dirty erroring as it won't unless we've aborted the 552ccd979bdSMark Fasheh * handle (in which case we would never be here) so reserving 553ccd979bdSMark Fasheh * the write with journal_access is all we need to do. */ 554ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, last_eb_bh, 555ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 556ccd979bdSMark Fasheh if (status < 0) { 557ccd979bdSMark Fasheh mlog_errno(status); 558ccd979bdSMark Fasheh goto bail; 559ccd979bdSMark Fasheh } 560ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, fe_bh, 561ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 562ccd979bdSMark Fasheh if (status < 0) { 563ccd979bdSMark Fasheh mlog_errno(status); 564ccd979bdSMark Fasheh goto bail; 565ccd979bdSMark Fasheh } 566ccd979bdSMark Fasheh if (eb_bh) { 567ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, eb_bh, 568ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 569ccd979bdSMark Fasheh if (status < 0) { 570ccd979bdSMark Fasheh mlog_errno(status); 571ccd979bdSMark Fasheh goto bail; 572ccd979bdSMark Fasheh } 573ccd979bdSMark Fasheh } 574ccd979bdSMark Fasheh 575ccd979bdSMark Fasheh /* Link the new branch into the rest of the tree (el will 576ccd979bdSMark Fasheh * either be on the fe, or the extent block passed in. */ 577ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec); 578ccd979bdSMark Fasheh el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); 579dcd0538fSMark Fasheh el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); 580ccd979bdSMark Fasheh el->l_recs[i].e_clusters = 0; 581ccd979bdSMark Fasheh le16_add_cpu(&el->l_next_free_rec, 1); 582ccd979bdSMark Fasheh 583ccd979bdSMark Fasheh /* fe needs a new last extent block pointer, as does the 584ccd979bdSMark Fasheh * next_leaf on the previously last-extent-block. */ 585ccd979bdSMark Fasheh fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); 586ccd979bdSMark Fasheh 587ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 588ccd979bdSMark Fasheh eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); 589ccd979bdSMark Fasheh 590ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, last_eb_bh); 591ccd979bdSMark Fasheh if (status < 0) 592ccd979bdSMark Fasheh mlog_errno(status); 593ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, fe_bh); 594ccd979bdSMark Fasheh if (status < 0) 595ccd979bdSMark Fasheh mlog_errno(status); 596ccd979bdSMark Fasheh if (eb_bh) { 597ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, eb_bh); 598ccd979bdSMark Fasheh if (status < 0) 599ccd979bdSMark Fasheh mlog_errno(status); 600ccd979bdSMark Fasheh } 601ccd979bdSMark Fasheh 602ccd979bdSMark Fasheh status = 0; 603ccd979bdSMark Fasheh bail: 604ccd979bdSMark Fasheh if (new_eb_bhs) { 605ccd979bdSMark Fasheh for (i = 0; i < new_blocks; i++) 606ccd979bdSMark Fasheh if (new_eb_bhs[i]) 607ccd979bdSMark Fasheh brelse(new_eb_bhs[i]); 608ccd979bdSMark Fasheh kfree(new_eb_bhs); 609ccd979bdSMark Fasheh } 610ccd979bdSMark Fasheh 611ccd979bdSMark Fasheh mlog_exit(status); 612ccd979bdSMark Fasheh return status; 613ccd979bdSMark Fasheh } 614ccd979bdSMark Fasheh 615ccd979bdSMark Fasheh /* 616ccd979bdSMark Fasheh * adds another level to the allocation tree. 617ccd979bdSMark Fasheh * returns back the new extent block so you can add a branch to it 618ccd979bdSMark Fasheh * after this call. 619ccd979bdSMark Fasheh */ 620ccd979bdSMark Fasheh static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, 6211fabe148SMark Fasheh handle_t *handle, 622ccd979bdSMark Fasheh struct inode *inode, 623ccd979bdSMark Fasheh struct buffer_head *fe_bh, 624ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac, 625ccd979bdSMark Fasheh struct buffer_head **ret_new_eb_bh) 626ccd979bdSMark Fasheh { 627ccd979bdSMark Fasheh int status, i; 628dcd0538fSMark Fasheh u32 new_clusters; 629ccd979bdSMark Fasheh struct buffer_head *new_eb_bh = NULL; 630ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 631ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 632ccd979bdSMark Fasheh struct ocfs2_extent_list *fe_el; 633ccd979bdSMark Fasheh struct ocfs2_extent_list *eb_el; 634ccd979bdSMark Fasheh 635ccd979bdSMark Fasheh mlog_entry_void(); 636ccd979bdSMark Fasheh 637ccd979bdSMark Fasheh status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac, 638ccd979bdSMark Fasheh &new_eb_bh); 639ccd979bdSMark Fasheh if (status < 0) { 640ccd979bdSMark Fasheh mlog_errno(status); 641ccd979bdSMark Fasheh goto bail; 642ccd979bdSMark Fasheh } 643ccd979bdSMark Fasheh 644ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) new_eb_bh->b_data; 645ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 646ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 647ccd979bdSMark Fasheh status = -EIO; 648ccd979bdSMark Fasheh goto bail; 649ccd979bdSMark Fasheh } 650ccd979bdSMark Fasheh 651ccd979bdSMark Fasheh eb_el = &eb->h_list; 652ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 653ccd979bdSMark Fasheh fe_el = &fe->id2.i_list; 654ccd979bdSMark Fasheh 655ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, new_eb_bh, 656ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE); 657ccd979bdSMark Fasheh if (status < 0) { 658ccd979bdSMark Fasheh mlog_errno(status); 659ccd979bdSMark Fasheh goto bail; 660ccd979bdSMark Fasheh } 661ccd979bdSMark Fasheh 662ccd979bdSMark Fasheh /* copy the fe data into the new extent block */ 663ccd979bdSMark Fasheh eb_el->l_tree_depth = fe_el->l_tree_depth; 664ccd979bdSMark Fasheh eb_el->l_next_free_rec = fe_el->l_next_free_rec; 665ccd979bdSMark Fasheh for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) { 666ccd979bdSMark Fasheh eb_el->l_recs[i].e_cpos = fe_el->l_recs[i].e_cpos; 667ccd979bdSMark Fasheh eb_el->l_recs[i].e_clusters = fe_el->l_recs[i].e_clusters; 668ccd979bdSMark Fasheh eb_el->l_recs[i].e_blkno = fe_el->l_recs[i].e_blkno; 669ccd979bdSMark Fasheh } 670ccd979bdSMark Fasheh 671ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, new_eb_bh); 672ccd979bdSMark Fasheh if (status < 0) { 673ccd979bdSMark Fasheh mlog_errno(status); 674ccd979bdSMark Fasheh goto bail; 675ccd979bdSMark Fasheh } 676ccd979bdSMark Fasheh 677ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, fe_bh, 678ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 679ccd979bdSMark Fasheh if (status < 0) { 680ccd979bdSMark Fasheh mlog_errno(status); 681ccd979bdSMark Fasheh goto bail; 682ccd979bdSMark Fasheh } 683ccd979bdSMark Fasheh 684dcd0538fSMark Fasheh new_clusters = ocfs2_sum_rightmost_rec(eb_el); 685dcd0538fSMark Fasheh 686ccd979bdSMark Fasheh /* update fe now */ 687ccd979bdSMark Fasheh le16_add_cpu(&fe_el->l_tree_depth, 1); 688ccd979bdSMark Fasheh fe_el->l_recs[0].e_cpos = 0; 689ccd979bdSMark Fasheh fe_el->l_recs[0].e_blkno = eb->h_blkno; 690dcd0538fSMark Fasheh fe_el->l_recs[0].e_clusters = cpu_to_le32(new_clusters); 691ccd979bdSMark Fasheh for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) { 692ccd979bdSMark Fasheh fe_el->l_recs[i].e_cpos = 0; 693ccd979bdSMark Fasheh fe_el->l_recs[i].e_clusters = 0; 694ccd979bdSMark Fasheh fe_el->l_recs[i].e_blkno = 0; 695ccd979bdSMark Fasheh } 696ccd979bdSMark Fasheh fe_el->l_next_free_rec = cpu_to_le16(1); 697ccd979bdSMark Fasheh 698ccd979bdSMark Fasheh /* If this is our 1st tree depth shift, then last_eb_blk 699ccd979bdSMark Fasheh * becomes the allocated extent block */ 700ccd979bdSMark Fasheh if (fe_el->l_tree_depth == cpu_to_le16(1)) 701ccd979bdSMark Fasheh fe->i_last_eb_blk = eb->h_blkno; 702ccd979bdSMark Fasheh 703ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, fe_bh); 704ccd979bdSMark Fasheh if (status < 0) { 705ccd979bdSMark Fasheh mlog_errno(status); 706ccd979bdSMark Fasheh goto bail; 707ccd979bdSMark Fasheh } 708ccd979bdSMark Fasheh 709ccd979bdSMark Fasheh *ret_new_eb_bh = new_eb_bh; 710ccd979bdSMark Fasheh new_eb_bh = NULL; 711ccd979bdSMark Fasheh status = 0; 712ccd979bdSMark Fasheh bail: 713ccd979bdSMark Fasheh if (new_eb_bh) 714ccd979bdSMark Fasheh brelse(new_eb_bh); 715ccd979bdSMark Fasheh 716ccd979bdSMark Fasheh mlog_exit(status); 717ccd979bdSMark Fasheh return status; 718ccd979bdSMark Fasheh } 719ccd979bdSMark Fasheh 720ccd979bdSMark Fasheh /* 721ccd979bdSMark Fasheh * Should only be called when there is no space left in any of the 722ccd979bdSMark Fasheh * leaf nodes. What we want to do is find the lowest tree depth 723ccd979bdSMark Fasheh * non-leaf extent block with room for new records. There are three 724ccd979bdSMark Fasheh * valid results of this search: 725ccd979bdSMark Fasheh * 726ccd979bdSMark Fasheh * 1) a lowest extent block is found, then we pass it back in 727ccd979bdSMark Fasheh * *lowest_eb_bh and return '0' 728ccd979bdSMark Fasheh * 729ccd979bdSMark Fasheh * 2) the search fails to find anything, but the dinode has room. We 730ccd979bdSMark Fasheh * pass NULL back in *lowest_eb_bh, but still return '0' 731ccd979bdSMark Fasheh * 732ccd979bdSMark Fasheh * 3) the search fails to find anything AND the dinode is full, in 733ccd979bdSMark Fasheh * which case we return > 0 734ccd979bdSMark Fasheh * 735ccd979bdSMark Fasheh * return status < 0 indicates an error. 736ccd979bdSMark Fasheh */ 737ccd979bdSMark Fasheh static int ocfs2_find_branch_target(struct ocfs2_super *osb, 738ccd979bdSMark Fasheh struct inode *inode, 739ccd979bdSMark Fasheh struct buffer_head *fe_bh, 740ccd979bdSMark Fasheh struct buffer_head **target_bh) 741ccd979bdSMark Fasheh { 742ccd979bdSMark Fasheh int status = 0, i; 743ccd979bdSMark Fasheh u64 blkno; 744ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 745ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 746ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 747ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 748ccd979bdSMark Fasheh struct buffer_head *lowest_bh = NULL; 749ccd979bdSMark Fasheh 750ccd979bdSMark Fasheh mlog_entry_void(); 751ccd979bdSMark Fasheh 752ccd979bdSMark Fasheh *target_bh = NULL; 753ccd979bdSMark Fasheh 754ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 755ccd979bdSMark Fasheh el = &fe->id2.i_list; 756ccd979bdSMark Fasheh 757ccd979bdSMark Fasheh while(le16_to_cpu(el->l_tree_depth) > 1) { 758ccd979bdSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 0) { 759b0697053SMark Fasheh ocfs2_error(inode->i_sb, "Dinode %llu has empty " 760ccd979bdSMark Fasheh "extent list (next_free_rec == 0)", 761b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 762ccd979bdSMark Fasheh status = -EIO; 763ccd979bdSMark Fasheh goto bail; 764ccd979bdSMark Fasheh } 765ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 766ccd979bdSMark Fasheh blkno = le64_to_cpu(el->l_recs[i].e_blkno); 767ccd979bdSMark Fasheh if (!blkno) { 768b0697053SMark Fasheh ocfs2_error(inode->i_sb, "Dinode %llu has extent " 769ccd979bdSMark Fasheh "list where extent # %d has no physical " 770ccd979bdSMark Fasheh "block start", 771b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, i); 772ccd979bdSMark Fasheh status = -EIO; 773ccd979bdSMark Fasheh goto bail; 774ccd979bdSMark Fasheh } 775ccd979bdSMark Fasheh 776ccd979bdSMark Fasheh if (bh) { 777ccd979bdSMark Fasheh brelse(bh); 778ccd979bdSMark Fasheh bh = NULL; 779ccd979bdSMark Fasheh } 780ccd979bdSMark Fasheh 781ccd979bdSMark Fasheh status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED, 782ccd979bdSMark Fasheh inode); 783ccd979bdSMark Fasheh if (status < 0) { 784ccd979bdSMark Fasheh mlog_errno(status); 785ccd979bdSMark Fasheh goto bail; 786ccd979bdSMark Fasheh } 787ccd979bdSMark Fasheh 788ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 789ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 790ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 791ccd979bdSMark Fasheh status = -EIO; 792ccd979bdSMark Fasheh goto bail; 793ccd979bdSMark Fasheh } 794ccd979bdSMark Fasheh el = &eb->h_list; 795ccd979bdSMark Fasheh 796ccd979bdSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) < 797ccd979bdSMark Fasheh le16_to_cpu(el->l_count)) { 798ccd979bdSMark Fasheh if (lowest_bh) 799ccd979bdSMark Fasheh brelse(lowest_bh); 800ccd979bdSMark Fasheh lowest_bh = bh; 801ccd979bdSMark Fasheh get_bh(lowest_bh); 802ccd979bdSMark Fasheh } 803ccd979bdSMark Fasheh } 804ccd979bdSMark Fasheh 805ccd979bdSMark Fasheh /* If we didn't find one and the fe doesn't have any room, 806ccd979bdSMark Fasheh * then return '1' */ 807ccd979bdSMark Fasheh if (!lowest_bh 808ccd979bdSMark Fasheh && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count)) 809ccd979bdSMark Fasheh status = 1; 810ccd979bdSMark Fasheh 811ccd979bdSMark Fasheh *target_bh = lowest_bh; 812ccd979bdSMark Fasheh bail: 813ccd979bdSMark Fasheh if (bh) 814ccd979bdSMark Fasheh brelse(bh); 815ccd979bdSMark Fasheh 816ccd979bdSMark Fasheh mlog_exit(status); 817ccd979bdSMark Fasheh return status; 818ccd979bdSMark Fasheh } 819ccd979bdSMark Fasheh 820dcd0538fSMark Fasheh static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) 821dcd0538fSMark Fasheh { 822dcd0538fSMark Fasheh return !rec->e_clusters; 823dcd0538fSMark Fasheh } 824dcd0538fSMark Fasheh 825dcd0538fSMark Fasheh /* 826dcd0538fSMark Fasheh * This function will discard the rightmost extent record. 827dcd0538fSMark Fasheh */ 828dcd0538fSMark Fasheh static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) 829dcd0538fSMark Fasheh { 830dcd0538fSMark Fasheh int next_free = le16_to_cpu(el->l_next_free_rec); 831dcd0538fSMark Fasheh int count = le16_to_cpu(el->l_count); 832dcd0538fSMark Fasheh unsigned int num_bytes; 833dcd0538fSMark Fasheh 834dcd0538fSMark Fasheh BUG_ON(!next_free); 835dcd0538fSMark Fasheh /* This will cause us to go off the end of our extent list. */ 836dcd0538fSMark Fasheh BUG_ON(next_free >= count); 837dcd0538fSMark Fasheh 838dcd0538fSMark Fasheh num_bytes = sizeof(struct ocfs2_extent_rec) * next_free; 839dcd0538fSMark Fasheh 840dcd0538fSMark Fasheh memmove(&el->l_recs[1], &el->l_recs[0], num_bytes); 841dcd0538fSMark Fasheh } 842dcd0538fSMark Fasheh 843dcd0538fSMark Fasheh static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el, 844dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec) 845dcd0538fSMark Fasheh { 846dcd0538fSMark Fasheh int i, insert_index, next_free, has_empty, num_bytes; 847dcd0538fSMark Fasheh u32 insert_cpos = le32_to_cpu(insert_rec->e_cpos); 848dcd0538fSMark Fasheh struct ocfs2_extent_rec *rec; 849dcd0538fSMark Fasheh 850dcd0538fSMark Fasheh next_free = le16_to_cpu(el->l_next_free_rec); 851dcd0538fSMark Fasheh has_empty = ocfs2_is_empty_extent(&el->l_recs[0]); 852dcd0538fSMark Fasheh 853dcd0538fSMark Fasheh BUG_ON(!next_free); 854dcd0538fSMark Fasheh 855dcd0538fSMark Fasheh /* The tree code before us didn't allow enough room in the leaf. */ 856dcd0538fSMark Fasheh if (el->l_next_free_rec == el->l_count && !has_empty) 857dcd0538fSMark Fasheh BUG(); 858dcd0538fSMark Fasheh 859dcd0538fSMark Fasheh /* 860dcd0538fSMark Fasheh * The easiest way to approach this is to just remove the 861dcd0538fSMark Fasheh * empty extent and temporarily decrement next_free. 862dcd0538fSMark Fasheh */ 863dcd0538fSMark Fasheh if (has_empty) { 864dcd0538fSMark Fasheh /* 865dcd0538fSMark Fasheh * If next_free was 1 (only an empty extent), this 866dcd0538fSMark Fasheh * loop won't execute, which is fine. We still want 867dcd0538fSMark Fasheh * the decrement above to happen. 868dcd0538fSMark Fasheh */ 869dcd0538fSMark Fasheh for(i = 0; i < (next_free - 1); i++) 870dcd0538fSMark Fasheh el->l_recs[i] = el->l_recs[i+1]; 871dcd0538fSMark Fasheh 872dcd0538fSMark Fasheh next_free--; 873dcd0538fSMark Fasheh } 874dcd0538fSMark Fasheh 875dcd0538fSMark Fasheh /* 876dcd0538fSMark Fasheh * Figure out what the new record index should be. 877dcd0538fSMark Fasheh */ 878dcd0538fSMark Fasheh for(i = 0; i < next_free; i++) { 879dcd0538fSMark Fasheh rec = &el->l_recs[i]; 880dcd0538fSMark Fasheh 881dcd0538fSMark Fasheh if (insert_cpos < le32_to_cpu(rec->e_cpos)) 882dcd0538fSMark Fasheh break; 883dcd0538fSMark Fasheh } 884dcd0538fSMark Fasheh insert_index = i; 885dcd0538fSMark Fasheh 886dcd0538fSMark Fasheh mlog(0, "ins %u: index %d, has_empty %d, next_free %d, count %d\n", 887dcd0538fSMark Fasheh insert_cpos, insert_index, has_empty, next_free, le16_to_cpu(el->l_count)); 888dcd0538fSMark Fasheh 889dcd0538fSMark Fasheh BUG_ON(insert_index < 0); 890dcd0538fSMark Fasheh BUG_ON(insert_index >= le16_to_cpu(el->l_count)); 891dcd0538fSMark Fasheh BUG_ON(insert_index > next_free); 892dcd0538fSMark Fasheh 893dcd0538fSMark Fasheh /* 894dcd0538fSMark Fasheh * No need to memmove if we're just adding to the tail. 895dcd0538fSMark Fasheh */ 896dcd0538fSMark Fasheh if (insert_index != next_free) { 897dcd0538fSMark Fasheh BUG_ON(next_free >= le16_to_cpu(el->l_count)); 898dcd0538fSMark Fasheh 899dcd0538fSMark Fasheh num_bytes = next_free - insert_index; 900dcd0538fSMark Fasheh num_bytes *= sizeof(struct ocfs2_extent_rec); 901dcd0538fSMark Fasheh memmove(&el->l_recs[insert_index + 1], 902dcd0538fSMark Fasheh &el->l_recs[insert_index], 903dcd0538fSMark Fasheh num_bytes); 904dcd0538fSMark Fasheh } 905dcd0538fSMark Fasheh 906dcd0538fSMark Fasheh /* 907dcd0538fSMark Fasheh * Either we had an empty extent, and need to re-increment or 908dcd0538fSMark Fasheh * there was no empty extent on a non full rightmost leaf node, 909dcd0538fSMark Fasheh * in which case we still need to increment. 910dcd0538fSMark Fasheh */ 911dcd0538fSMark Fasheh next_free++; 912dcd0538fSMark Fasheh el->l_next_free_rec = cpu_to_le16(next_free); 913dcd0538fSMark Fasheh /* 914dcd0538fSMark Fasheh * Make sure none of the math above just messed up our tree. 915dcd0538fSMark Fasheh */ 916dcd0538fSMark Fasheh BUG_ON(le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count)); 917dcd0538fSMark Fasheh 918dcd0538fSMark Fasheh el->l_recs[insert_index] = *insert_rec; 919dcd0538fSMark Fasheh 920dcd0538fSMark Fasheh } 921dcd0538fSMark Fasheh 922dcd0538fSMark Fasheh /* 923dcd0538fSMark Fasheh * Create an empty extent record . 924dcd0538fSMark Fasheh * 925dcd0538fSMark Fasheh * l_next_free_rec may be updated. 926dcd0538fSMark Fasheh * 927dcd0538fSMark Fasheh * If an empty extent already exists do nothing. 928dcd0538fSMark Fasheh */ 929dcd0538fSMark Fasheh static void ocfs2_create_empty_extent(struct ocfs2_extent_list *el) 930dcd0538fSMark Fasheh { 931dcd0538fSMark Fasheh int next_free = le16_to_cpu(el->l_next_free_rec); 932dcd0538fSMark Fasheh 933dcd0538fSMark Fasheh if (next_free == 0) 934dcd0538fSMark Fasheh goto set_and_inc; 935dcd0538fSMark Fasheh 936dcd0538fSMark Fasheh if (ocfs2_is_empty_extent(&el->l_recs[0])) 937dcd0538fSMark Fasheh return; 938dcd0538fSMark Fasheh 939dcd0538fSMark Fasheh mlog_bug_on_msg(el->l_count == el->l_next_free_rec, 940dcd0538fSMark Fasheh "Asked to create an empty extent in a full list:\n" 941dcd0538fSMark Fasheh "count = %u, tree depth = %u", 942dcd0538fSMark Fasheh le16_to_cpu(el->l_count), 943dcd0538fSMark Fasheh le16_to_cpu(el->l_tree_depth)); 944dcd0538fSMark Fasheh 945dcd0538fSMark Fasheh ocfs2_shift_records_right(el); 946dcd0538fSMark Fasheh 947dcd0538fSMark Fasheh set_and_inc: 948dcd0538fSMark Fasheh le16_add_cpu(&el->l_next_free_rec, 1); 949dcd0538fSMark Fasheh memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); 950dcd0538fSMark Fasheh } 951dcd0538fSMark Fasheh 952dcd0538fSMark Fasheh /* 953dcd0538fSMark Fasheh * For a rotation which involves two leaf nodes, the "root node" is 954dcd0538fSMark Fasheh * the lowest level tree node which contains a path to both leafs. This 955dcd0538fSMark Fasheh * resulting set of information can be used to form a complete "subtree" 956dcd0538fSMark Fasheh * 957dcd0538fSMark Fasheh * This function is passed two full paths from the dinode down to a 958dcd0538fSMark Fasheh * pair of adjacent leaves. It's task is to figure out which path 959dcd0538fSMark Fasheh * index contains the subtree root - this can be the root index itself 960dcd0538fSMark Fasheh * in a worst-case rotation. 961dcd0538fSMark Fasheh * 962dcd0538fSMark Fasheh * The array index of the subtree root is passed back. 963dcd0538fSMark Fasheh */ 964dcd0538fSMark Fasheh static int ocfs2_find_subtree_root(struct inode *inode, 965dcd0538fSMark Fasheh struct ocfs2_path *left, 966dcd0538fSMark Fasheh struct ocfs2_path *right) 967dcd0538fSMark Fasheh { 968dcd0538fSMark Fasheh int i = 0; 969dcd0538fSMark Fasheh 970dcd0538fSMark Fasheh /* 971dcd0538fSMark Fasheh * Check that the caller passed in two paths from the same tree. 972dcd0538fSMark Fasheh */ 973dcd0538fSMark Fasheh BUG_ON(path_root_bh(left) != path_root_bh(right)); 974dcd0538fSMark Fasheh 975dcd0538fSMark Fasheh do { 976dcd0538fSMark Fasheh i++; 977dcd0538fSMark Fasheh 978dcd0538fSMark Fasheh /* 979dcd0538fSMark Fasheh * The caller didn't pass two adjacent paths. 980dcd0538fSMark Fasheh */ 981dcd0538fSMark Fasheh mlog_bug_on_msg(i > left->p_tree_depth, 982dcd0538fSMark Fasheh "Inode %lu, left depth %u, right depth %u\n" 983dcd0538fSMark Fasheh "left leaf blk %llu, right leaf blk %llu\n", 984dcd0538fSMark Fasheh inode->i_ino, left->p_tree_depth, 985dcd0538fSMark Fasheh right->p_tree_depth, 986dcd0538fSMark Fasheh (unsigned long long)path_leaf_bh(left)->b_blocknr, 987dcd0538fSMark Fasheh (unsigned long long)path_leaf_bh(right)->b_blocknr); 988dcd0538fSMark Fasheh } while (left->p_node[i].bh->b_blocknr == 989dcd0538fSMark Fasheh right->p_node[i].bh->b_blocknr); 990dcd0538fSMark Fasheh 991dcd0538fSMark Fasheh return i - 1; 992dcd0538fSMark Fasheh } 993dcd0538fSMark Fasheh 994dcd0538fSMark Fasheh typedef void (path_insert_t)(void *, struct buffer_head *); 995dcd0538fSMark Fasheh 996dcd0538fSMark Fasheh /* 997dcd0538fSMark Fasheh * Traverse a btree path in search of cpos, starting at root_el. 998dcd0538fSMark Fasheh * 999dcd0538fSMark Fasheh * This code can be called with a cpos larger than the tree, in which 1000dcd0538fSMark Fasheh * case it will return the rightmost path. 1001dcd0538fSMark Fasheh */ 1002dcd0538fSMark Fasheh static int __ocfs2_find_path(struct inode *inode, 1003dcd0538fSMark Fasheh struct ocfs2_extent_list *root_el, u32 cpos, 1004dcd0538fSMark Fasheh path_insert_t *func, void *data) 1005dcd0538fSMark Fasheh { 1006dcd0538fSMark Fasheh int i, ret = 0; 1007dcd0538fSMark Fasheh u32 range; 1008dcd0538fSMark Fasheh u64 blkno; 1009dcd0538fSMark Fasheh struct buffer_head *bh = NULL; 1010dcd0538fSMark Fasheh struct ocfs2_extent_block *eb; 1011dcd0538fSMark Fasheh struct ocfs2_extent_list *el; 1012dcd0538fSMark Fasheh struct ocfs2_extent_rec *rec; 1013dcd0538fSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 1014dcd0538fSMark Fasheh 1015dcd0538fSMark Fasheh el = root_el; 1016dcd0538fSMark Fasheh while (el->l_tree_depth) { 1017dcd0538fSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 0) { 1018dcd0538fSMark Fasheh ocfs2_error(inode->i_sb, 1019dcd0538fSMark Fasheh "Inode %llu has empty extent list at " 1020dcd0538fSMark Fasheh "depth %u\n", 1021dcd0538fSMark Fasheh (unsigned long long)oi->ip_blkno, 1022dcd0538fSMark Fasheh le16_to_cpu(el->l_tree_depth)); 1023dcd0538fSMark Fasheh ret = -EROFS; 1024dcd0538fSMark Fasheh goto out; 1025dcd0538fSMark Fasheh 1026dcd0538fSMark Fasheh } 1027dcd0538fSMark Fasheh 1028dcd0538fSMark Fasheh for(i = 0; i < le16_to_cpu(el->l_next_free_rec) - 1; i++) { 1029dcd0538fSMark Fasheh rec = &el->l_recs[i]; 1030dcd0538fSMark Fasheh 1031dcd0538fSMark Fasheh /* 1032dcd0538fSMark Fasheh * In the case that cpos is off the allocation 1033dcd0538fSMark Fasheh * tree, this should just wind up returning the 1034dcd0538fSMark Fasheh * rightmost record. 1035dcd0538fSMark Fasheh */ 1036dcd0538fSMark Fasheh range = le32_to_cpu(rec->e_cpos) + 1037dcd0538fSMark Fasheh le32_to_cpu(rec->e_clusters); 1038dcd0538fSMark Fasheh if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range) 1039dcd0538fSMark Fasheh break; 1040dcd0538fSMark Fasheh } 1041dcd0538fSMark Fasheh 1042dcd0538fSMark Fasheh blkno = le64_to_cpu(el->l_recs[i].e_blkno); 1043dcd0538fSMark Fasheh if (blkno == 0) { 1044dcd0538fSMark Fasheh ocfs2_error(inode->i_sb, 1045dcd0538fSMark Fasheh "Inode %llu has bad blkno in extent list " 1046dcd0538fSMark Fasheh "at depth %u (index %d)\n", 1047dcd0538fSMark Fasheh (unsigned long long)oi->ip_blkno, 1048dcd0538fSMark Fasheh le16_to_cpu(el->l_tree_depth), i); 1049dcd0538fSMark Fasheh ret = -EROFS; 1050dcd0538fSMark Fasheh goto out; 1051dcd0538fSMark Fasheh } 1052dcd0538fSMark Fasheh 1053dcd0538fSMark Fasheh brelse(bh); 1054dcd0538fSMark Fasheh bh = NULL; 1055dcd0538fSMark Fasheh ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, 1056dcd0538fSMark Fasheh &bh, OCFS2_BH_CACHED, inode); 1057dcd0538fSMark Fasheh if (ret) { 1058dcd0538fSMark Fasheh mlog_errno(ret); 1059dcd0538fSMark Fasheh goto out; 1060dcd0538fSMark Fasheh } 1061dcd0538fSMark Fasheh 1062dcd0538fSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 1063dcd0538fSMark Fasheh el = &eb->h_list; 1064dcd0538fSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1065dcd0538fSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1066dcd0538fSMark Fasheh ret = -EIO; 1067dcd0538fSMark Fasheh goto out; 1068dcd0538fSMark Fasheh } 1069dcd0538fSMark Fasheh 1070dcd0538fSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) > 1071dcd0538fSMark Fasheh le16_to_cpu(el->l_count)) { 1072dcd0538fSMark Fasheh ocfs2_error(inode->i_sb, 1073dcd0538fSMark Fasheh "Inode %llu has bad count in extent list " 1074dcd0538fSMark Fasheh "at block %llu (next free=%u, count=%u)\n", 1075dcd0538fSMark Fasheh (unsigned long long)oi->ip_blkno, 1076dcd0538fSMark Fasheh (unsigned long long)bh->b_blocknr, 1077dcd0538fSMark Fasheh le16_to_cpu(el->l_next_free_rec), 1078dcd0538fSMark Fasheh le16_to_cpu(el->l_count)); 1079dcd0538fSMark Fasheh ret = -EROFS; 1080dcd0538fSMark Fasheh goto out; 1081dcd0538fSMark Fasheh } 1082dcd0538fSMark Fasheh 1083dcd0538fSMark Fasheh if (func) 1084dcd0538fSMark Fasheh func(data, bh); 1085dcd0538fSMark Fasheh } 1086dcd0538fSMark Fasheh 1087dcd0538fSMark Fasheh out: 1088dcd0538fSMark Fasheh /* 1089dcd0538fSMark Fasheh * Catch any trailing bh that the loop didn't handle. 1090dcd0538fSMark Fasheh */ 1091dcd0538fSMark Fasheh brelse(bh); 1092dcd0538fSMark Fasheh 1093dcd0538fSMark Fasheh return ret; 1094dcd0538fSMark Fasheh } 1095dcd0538fSMark Fasheh 1096dcd0538fSMark Fasheh /* 1097dcd0538fSMark Fasheh * Given an initialized path (that is, it has a valid root extent 1098dcd0538fSMark Fasheh * list), this function will traverse the btree in search of the path 1099dcd0538fSMark Fasheh * which would contain cpos. 1100dcd0538fSMark Fasheh * 1101dcd0538fSMark Fasheh * The path traveled is recorded in the path structure. 1102dcd0538fSMark Fasheh * 1103dcd0538fSMark Fasheh * Note that this will not do any comparisons on leaf node extent 1104dcd0538fSMark Fasheh * records, so it will work fine in the case that we just added a tree 1105dcd0538fSMark Fasheh * branch. 1106dcd0538fSMark Fasheh */ 1107dcd0538fSMark Fasheh struct find_path_data { 1108dcd0538fSMark Fasheh int index; 1109dcd0538fSMark Fasheh struct ocfs2_path *path; 1110dcd0538fSMark Fasheh }; 1111dcd0538fSMark Fasheh static void find_path_ins(void *data, struct buffer_head *bh) 1112dcd0538fSMark Fasheh { 1113dcd0538fSMark Fasheh struct find_path_data *fp = data; 1114dcd0538fSMark Fasheh 1115dcd0538fSMark Fasheh get_bh(bh); 1116dcd0538fSMark Fasheh ocfs2_path_insert_eb(fp->path, fp->index, bh); 1117dcd0538fSMark Fasheh fp->index++; 1118dcd0538fSMark Fasheh } 1119dcd0538fSMark Fasheh static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path, 1120dcd0538fSMark Fasheh u32 cpos) 1121dcd0538fSMark Fasheh { 1122dcd0538fSMark Fasheh struct find_path_data data; 1123dcd0538fSMark Fasheh 1124dcd0538fSMark Fasheh data.index = 1; 1125dcd0538fSMark Fasheh data.path = path; 1126dcd0538fSMark Fasheh return __ocfs2_find_path(inode, path_root_el(path), cpos, 1127dcd0538fSMark Fasheh find_path_ins, &data); 1128dcd0538fSMark Fasheh } 1129dcd0538fSMark Fasheh 1130dcd0538fSMark Fasheh static void find_leaf_ins(void *data, struct buffer_head *bh) 1131dcd0538fSMark Fasheh { 1132dcd0538fSMark Fasheh struct ocfs2_extent_block *eb =(struct ocfs2_extent_block *)bh->b_data; 1133dcd0538fSMark Fasheh struct ocfs2_extent_list *el = &eb->h_list; 1134dcd0538fSMark Fasheh struct buffer_head **ret = data; 1135dcd0538fSMark Fasheh 1136dcd0538fSMark Fasheh /* We want to retain only the leaf block. */ 1137dcd0538fSMark Fasheh if (le16_to_cpu(el->l_tree_depth) == 0) { 1138dcd0538fSMark Fasheh get_bh(bh); 1139dcd0538fSMark Fasheh *ret = bh; 1140dcd0538fSMark Fasheh } 1141dcd0538fSMark Fasheh } 1142dcd0538fSMark Fasheh /* 1143dcd0538fSMark Fasheh * Find the leaf block in the tree which would contain cpos. No 1144dcd0538fSMark Fasheh * checking of the actual leaf is done. 1145dcd0538fSMark Fasheh * 1146dcd0538fSMark Fasheh * Some paths want to call this instead of allocating a path structure 1147dcd0538fSMark Fasheh * and calling ocfs2_find_path(). 1148dcd0538fSMark Fasheh * 1149dcd0538fSMark Fasheh * This function doesn't handle non btree extent lists. 1150dcd0538fSMark Fasheh */ 1151363041a5SMark Fasheh int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, 1152363041a5SMark Fasheh u32 cpos, struct buffer_head **leaf_bh) 1153dcd0538fSMark Fasheh { 1154dcd0538fSMark Fasheh int ret; 1155dcd0538fSMark Fasheh struct buffer_head *bh = NULL; 1156dcd0538fSMark Fasheh 1157dcd0538fSMark Fasheh ret = __ocfs2_find_path(inode, root_el, cpos, find_leaf_ins, &bh); 1158dcd0538fSMark Fasheh if (ret) { 1159dcd0538fSMark Fasheh mlog_errno(ret); 1160dcd0538fSMark Fasheh goto out; 1161dcd0538fSMark Fasheh } 1162dcd0538fSMark Fasheh 1163dcd0538fSMark Fasheh *leaf_bh = bh; 1164dcd0538fSMark Fasheh out: 1165dcd0538fSMark Fasheh return ret; 1166dcd0538fSMark Fasheh } 1167dcd0538fSMark Fasheh 1168dcd0538fSMark Fasheh /* 1169dcd0538fSMark Fasheh * Adjust the adjacent records (left_rec, right_rec) involved in a rotation. 1170dcd0538fSMark Fasheh * 1171dcd0538fSMark Fasheh * Basically, we've moved stuff around at the bottom of the tree and 1172dcd0538fSMark Fasheh * we need to fix up the extent records above the changes to reflect 1173dcd0538fSMark Fasheh * the new changes. 1174dcd0538fSMark Fasheh * 1175dcd0538fSMark Fasheh * left_rec: the record on the left. 1176dcd0538fSMark Fasheh * left_child_el: is the child list pointed to by left_rec 1177dcd0538fSMark Fasheh * right_rec: the record to the right of left_rec 1178dcd0538fSMark Fasheh * right_child_el: is the child list pointed to by right_rec 1179dcd0538fSMark Fasheh * 1180dcd0538fSMark Fasheh * By definition, this only works on interior nodes. 1181dcd0538fSMark Fasheh */ 1182dcd0538fSMark Fasheh static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec, 1183dcd0538fSMark Fasheh struct ocfs2_extent_list *left_child_el, 1184dcd0538fSMark Fasheh struct ocfs2_extent_rec *right_rec, 1185dcd0538fSMark Fasheh struct ocfs2_extent_list *right_child_el) 1186dcd0538fSMark Fasheh { 1187dcd0538fSMark Fasheh u32 left_clusters, right_end; 1188dcd0538fSMark Fasheh 1189dcd0538fSMark Fasheh /* 1190dcd0538fSMark Fasheh * Interior nodes never have holes. Their cpos is the cpos of 1191dcd0538fSMark Fasheh * the leftmost record in their child list. Their cluster 1192dcd0538fSMark Fasheh * count covers the full theoretical range of their child list 1193dcd0538fSMark Fasheh * - the range between their cpos and the cpos of the record 1194dcd0538fSMark Fasheh * immediately to their right. 1195dcd0538fSMark Fasheh */ 1196dcd0538fSMark Fasheh left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); 1197dcd0538fSMark Fasheh left_clusters -= le32_to_cpu(left_rec->e_cpos); 1198dcd0538fSMark Fasheh left_rec->e_clusters = cpu_to_le32(left_clusters); 1199dcd0538fSMark Fasheh 1200dcd0538fSMark Fasheh /* 1201dcd0538fSMark Fasheh * Calculate the rightmost cluster count boundary before 1202dcd0538fSMark Fasheh * moving cpos - we will need to adjust e_clusters after 1203dcd0538fSMark Fasheh * updating e_cpos to keep the same highest cluster count. 1204dcd0538fSMark Fasheh */ 1205dcd0538fSMark Fasheh right_end = le32_to_cpu(right_rec->e_cpos); 1206dcd0538fSMark Fasheh right_end += le32_to_cpu(right_rec->e_clusters); 1207dcd0538fSMark Fasheh 1208dcd0538fSMark Fasheh right_rec->e_cpos = left_rec->e_cpos; 1209dcd0538fSMark Fasheh le32_add_cpu(&right_rec->e_cpos, left_clusters); 1210dcd0538fSMark Fasheh 1211dcd0538fSMark Fasheh right_end -= le32_to_cpu(right_rec->e_cpos); 1212dcd0538fSMark Fasheh right_rec->e_clusters = cpu_to_le32(right_end); 1213dcd0538fSMark Fasheh } 1214dcd0538fSMark Fasheh 1215dcd0538fSMark Fasheh /* 1216dcd0538fSMark Fasheh * Adjust the adjacent root node records involved in a 1217dcd0538fSMark Fasheh * rotation. left_el_blkno is passed in as a key so that we can easily 1218dcd0538fSMark Fasheh * find it's index in the root list. 1219dcd0538fSMark Fasheh */ 1220dcd0538fSMark Fasheh static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el, 1221dcd0538fSMark Fasheh struct ocfs2_extent_list *left_el, 1222dcd0538fSMark Fasheh struct ocfs2_extent_list *right_el, 1223dcd0538fSMark Fasheh u64 left_el_blkno) 1224dcd0538fSMark Fasheh { 1225dcd0538fSMark Fasheh int i; 1226dcd0538fSMark Fasheh 1227dcd0538fSMark Fasheh BUG_ON(le16_to_cpu(root_el->l_tree_depth) <= 1228dcd0538fSMark Fasheh le16_to_cpu(left_el->l_tree_depth)); 1229dcd0538fSMark Fasheh 1230dcd0538fSMark Fasheh for(i = 0; i < le16_to_cpu(root_el->l_next_free_rec) - 1; i++) { 1231dcd0538fSMark Fasheh if (le64_to_cpu(root_el->l_recs[i].e_blkno) == left_el_blkno) 1232dcd0538fSMark Fasheh break; 1233dcd0538fSMark Fasheh } 1234dcd0538fSMark Fasheh 1235dcd0538fSMark Fasheh /* 1236dcd0538fSMark Fasheh * The path walking code should have never returned a root and 1237dcd0538fSMark Fasheh * two paths which are not adjacent. 1238dcd0538fSMark Fasheh */ 1239dcd0538fSMark Fasheh BUG_ON(i >= (le16_to_cpu(root_el->l_next_free_rec) - 1)); 1240dcd0538fSMark Fasheh 1241dcd0538fSMark Fasheh ocfs2_adjust_adjacent_records(&root_el->l_recs[i], left_el, 1242dcd0538fSMark Fasheh &root_el->l_recs[i + 1], right_el); 1243dcd0538fSMark Fasheh } 1244dcd0538fSMark Fasheh 1245dcd0538fSMark Fasheh /* 1246dcd0538fSMark Fasheh * We've changed a leaf block (in right_path) and need to reflect that 1247dcd0538fSMark Fasheh * change back up the subtree. 1248dcd0538fSMark Fasheh * 1249dcd0538fSMark Fasheh * This happens in multiple places: 1250dcd0538fSMark Fasheh * - When we've moved an extent record from the left path leaf to the right 1251dcd0538fSMark Fasheh * path leaf to make room for an empty extent in the left path leaf. 1252dcd0538fSMark Fasheh * - When our insert into the right path leaf is at the leftmost edge 1253dcd0538fSMark Fasheh * and requires an update of the path immediately to it's left. This 1254dcd0538fSMark Fasheh * can occur at the end of some types of rotation and appending inserts. 1255dcd0538fSMark Fasheh */ 1256dcd0538fSMark Fasheh static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, 1257dcd0538fSMark Fasheh struct ocfs2_path *left_path, 1258dcd0538fSMark Fasheh struct ocfs2_path *right_path, 1259dcd0538fSMark Fasheh int subtree_index) 1260dcd0538fSMark Fasheh { 1261dcd0538fSMark Fasheh int ret, i, idx; 1262dcd0538fSMark Fasheh struct ocfs2_extent_list *el, *left_el, *right_el; 1263dcd0538fSMark Fasheh struct ocfs2_extent_rec *left_rec, *right_rec; 1264dcd0538fSMark Fasheh struct buffer_head *root_bh = left_path->p_node[subtree_index].bh; 1265dcd0538fSMark Fasheh 1266dcd0538fSMark Fasheh /* 1267dcd0538fSMark Fasheh * Update the counts and position values within all the 1268dcd0538fSMark Fasheh * interior nodes to reflect the leaf rotation we just did. 1269dcd0538fSMark Fasheh * 1270dcd0538fSMark Fasheh * The root node is handled below the loop. 1271dcd0538fSMark Fasheh * 1272dcd0538fSMark Fasheh * We begin the loop with right_el and left_el pointing to the 1273dcd0538fSMark Fasheh * leaf lists and work our way up. 1274dcd0538fSMark Fasheh * 1275dcd0538fSMark Fasheh * NOTE: within this loop, left_el and right_el always refer 1276dcd0538fSMark Fasheh * to the *child* lists. 1277dcd0538fSMark Fasheh */ 1278dcd0538fSMark Fasheh left_el = path_leaf_el(left_path); 1279dcd0538fSMark Fasheh right_el = path_leaf_el(right_path); 1280dcd0538fSMark Fasheh for(i = left_path->p_tree_depth - 1; i > subtree_index; i--) { 1281dcd0538fSMark Fasheh mlog(0, "Adjust records at index %u\n", i); 1282dcd0538fSMark Fasheh 1283dcd0538fSMark Fasheh /* 1284dcd0538fSMark Fasheh * One nice property of knowing that all of these 1285dcd0538fSMark Fasheh * nodes are below the root is that we only deal with 1286dcd0538fSMark Fasheh * the leftmost right node record and the rightmost 1287dcd0538fSMark Fasheh * left node record. 1288dcd0538fSMark Fasheh */ 1289dcd0538fSMark Fasheh el = left_path->p_node[i].el; 1290dcd0538fSMark Fasheh idx = le16_to_cpu(left_el->l_next_free_rec) - 1; 1291dcd0538fSMark Fasheh left_rec = &el->l_recs[idx]; 1292dcd0538fSMark Fasheh 1293dcd0538fSMark Fasheh el = right_path->p_node[i].el; 1294dcd0538fSMark Fasheh right_rec = &el->l_recs[0]; 1295dcd0538fSMark Fasheh 1296dcd0538fSMark Fasheh ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec, 1297dcd0538fSMark Fasheh right_el); 1298dcd0538fSMark Fasheh 1299dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, left_path->p_node[i].bh); 1300dcd0538fSMark Fasheh if (ret) 1301dcd0538fSMark Fasheh mlog_errno(ret); 1302dcd0538fSMark Fasheh 1303dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, right_path->p_node[i].bh); 1304dcd0538fSMark Fasheh if (ret) 1305dcd0538fSMark Fasheh mlog_errno(ret); 1306dcd0538fSMark Fasheh 1307dcd0538fSMark Fasheh /* 1308dcd0538fSMark Fasheh * Setup our list pointers now so that the current 1309dcd0538fSMark Fasheh * parents become children in the next iteration. 1310dcd0538fSMark Fasheh */ 1311dcd0538fSMark Fasheh left_el = left_path->p_node[i].el; 1312dcd0538fSMark Fasheh right_el = right_path->p_node[i].el; 1313dcd0538fSMark Fasheh } 1314dcd0538fSMark Fasheh 1315dcd0538fSMark Fasheh /* 1316dcd0538fSMark Fasheh * At the root node, adjust the two adjacent records which 1317dcd0538fSMark Fasheh * begin our path to the leaves. 1318dcd0538fSMark Fasheh */ 1319dcd0538fSMark Fasheh 1320dcd0538fSMark Fasheh el = left_path->p_node[subtree_index].el; 1321dcd0538fSMark Fasheh left_el = left_path->p_node[subtree_index + 1].el; 1322dcd0538fSMark Fasheh right_el = right_path->p_node[subtree_index + 1].el; 1323dcd0538fSMark Fasheh 1324dcd0538fSMark Fasheh ocfs2_adjust_root_records(el, left_el, right_el, 1325dcd0538fSMark Fasheh left_path->p_node[subtree_index + 1].bh->b_blocknr); 1326dcd0538fSMark Fasheh 1327dcd0538fSMark Fasheh root_bh = left_path->p_node[subtree_index].bh; 1328dcd0538fSMark Fasheh 1329dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, root_bh); 1330dcd0538fSMark Fasheh if (ret) 1331dcd0538fSMark Fasheh mlog_errno(ret); 1332dcd0538fSMark Fasheh } 1333dcd0538fSMark Fasheh 1334dcd0538fSMark Fasheh static int ocfs2_rotate_subtree_right(struct inode *inode, 1335dcd0538fSMark Fasheh handle_t *handle, 1336dcd0538fSMark Fasheh struct ocfs2_path *left_path, 1337dcd0538fSMark Fasheh struct ocfs2_path *right_path, 1338dcd0538fSMark Fasheh int subtree_index) 1339dcd0538fSMark Fasheh { 1340dcd0538fSMark Fasheh int ret, i; 1341dcd0538fSMark Fasheh struct buffer_head *right_leaf_bh; 1342dcd0538fSMark Fasheh struct buffer_head *left_leaf_bh = NULL; 1343dcd0538fSMark Fasheh struct buffer_head *root_bh; 1344dcd0538fSMark Fasheh struct ocfs2_extent_list *right_el, *left_el; 1345dcd0538fSMark Fasheh struct ocfs2_extent_rec move_rec; 1346dcd0538fSMark Fasheh 1347dcd0538fSMark Fasheh left_leaf_bh = path_leaf_bh(left_path); 1348dcd0538fSMark Fasheh left_el = path_leaf_el(left_path); 1349dcd0538fSMark Fasheh 1350dcd0538fSMark Fasheh if (left_el->l_next_free_rec != left_el->l_count) { 1351dcd0538fSMark Fasheh ocfs2_error(inode->i_sb, 1352dcd0538fSMark Fasheh "Inode %llu has non-full interior leaf node %llu" 1353dcd0538fSMark Fasheh "(next free = %u)", 1354dcd0538fSMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1355dcd0538fSMark Fasheh (unsigned long long)left_leaf_bh->b_blocknr, 1356dcd0538fSMark Fasheh le16_to_cpu(left_el->l_next_free_rec)); 1357dcd0538fSMark Fasheh return -EROFS; 1358dcd0538fSMark Fasheh } 1359dcd0538fSMark Fasheh 1360dcd0538fSMark Fasheh /* 1361dcd0538fSMark Fasheh * This extent block may already have an empty record, so we 1362dcd0538fSMark Fasheh * return early if so. 1363dcd0538fSMark Fasheh */ 1364dcd0538fSMark Fasheh if (ocfs2_is_empty_extent(&left_el->l_recs[0])) 1365dcd0538fSMark Fasheh return 0; 1366dcd0538fSMark Fasheh 1367dcd0538fSMark Fasheh root_bh = left_path->p_node[subtree_index].bh; 1368dcd0538fSMark Fasheh BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 1369dcd0538fSMark Fasheh 1370dcd0538fSMark Fasheh ret = ocfs2_journal_access(handle, inode, root_bh, 1371dcd0538fSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1372dcd0538fSMark Fasheh if (ret) { 1373dcd0538fSMark Fasheh mlog_errno(ret); 1374dcd0538fSMark Fasheh goto out; 1375dcd0538fSMark Fasheh } 1376dcd0538fSMark Fasheh 1377dcd0538fSMark Fasheh for(i = subtree_index + 1; i < path_num_items(right_path); i++) { 1378dcd0538fSMark Fasheh ret = ocfs2_journal_access(handle, inode, 1379dcd0538fSMark Fasheh right_path->p_node[i].bh, 1380dcd0538fSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1381dcd0538fSMark Fasheh if (ret) { 1382dcd0538fSMark Fasheh mlog_errno(ret); 1383dcd0538fSMark Fasheh goto out; 1384dcd0538fSMark Fasheh } 1385dcd0538fSMark Fasheh 1386dcd0538fSMark Fasheh ret = ocfs2_journal_access(handle, inode, 1387dcd0538fSMark Fasheh left_path->p_node[i].bh, 1388dcd0538fSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1389dcd0538fSMark Fasheh if (ret) { 1390dcd0538fSMark Fasheh mlog_errno(ret); 1391dcd0538fSMark Fasheh goto out; 1392dcd0538fSMark Fasheh } 1393dcd0538fSMark Fasheh } 1394dcd0538fSMark Fasheh 1395dcd0538fSMark Fasheh right_leaf_bh = path_leaf_bh(right_path); 1396dcd0538fSMark Fasheh right_el = path_leaf_el(right_path); 1397dcd0538fSMark Fasheh 1398dcd0538fSMark Fasheh /* This is a code error, not a disk corruption. */ 1399dcd0538fSMark Fasheh mlog_bug_on_msg(!right_el->l_next_free_rec, "Inode %llu: Rotate fails " 1400dcd0538fSMark Fasheh "because rightmost leaf block %llu is empty\n", 1401dcd0538fSMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1402dcd0538fSMark Fasheh (unsigned long long)right_leaf_bh->b_blocknr); 1403dcd0538fSMark Fasheh 1404dcd0538fSMark Fasheh ocfs2_create_empty_extent(right_el); 1405dcd0538fSMark Fasheh 1406dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, right_leaf_bh); 1407dcd0538fSMark Fasheh if (ret) { 1408dcd0538fSMark Fasheh mlog_errno(ret); 1409dcd0538fSMark Fasheh goto out; 1410dcd0538fSMark Fasheh } 1411dcd0538fSMark Fasheh 1412dcd0538fSMark Fasheh /* Do the copy now. */ 1413dcd0538fSMark Fasheh i = le16_to_cpu(left_el->l_next_free_rec) - 1; 1414dcd0538fSMark Fasheh move_rec = left_el->l_recs[i]; 1415dcd0538fSMark Fasheh right_el->l_recs[0] = move_rec; 1416dcd0538fSMark Fasheh 1417dcd0538fSMark Fasheh /* 1418dcd0538fSMark Fasheh * Clear out the record we just copied and shift everything 1419dcd0538fSMark Fasheh * over, leaving an empty extent in the left leaf. 1420dcd0538fSMark Fasheh * 1421dcd0538fSMark Fasheh * We temporarily subtract from next_free_rec so that the 1422dcd0538fSMark Fasheh * shift will lose the tail record (which is now defunct). 1423dcd0538fSMark Fasheh */ 1424dcd0538fSMark Fasheh le16_add_cpu(&left_el->l_next_free_rec, -1); 1425dcd0538fSMark Fasheh ocfs2_shift_records_right(left_el); 1426dcd0538fSMark Fasheh memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); 1427dcd0538fSMark Fasheh le16_add_cpu(&left_el->l_next_free_rec, 1); 1428dcd0538fSMark Fasheh 1429dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, left_leaf_bh); 1430dcd0538fSMark Fasheh if (ret) { 1431dcd0538fSMark Fasheh mlog_errno(ret); 1432dcd0538fSMark Fasheh goto out; 1433dcd0538fSMark Fasheh } 1434dcd0538fSMark Fasheh 1435dcd0538fSMark Fasheh ocfs2_complete_edge_insert(inode, handle, left_path, right_path, 1436dcd0538fSMark Fasheh subtree_index); 1437dcd0538fSMark Fasheh 1438dcd0538fSMark Fasheh out: 1439dcd0538fSMark Fasheh return ret; 1440dcd0538fSMark Fasheh } 1441dcd0538fSMark Fasheh 1442dcd0538fSMark Fasheh /* 1443dcd0538fSMark Fasheh * Given a full path, determine what cpos value would return us a path 1444dcd0538fSMark Fasheh * containing the leaf immediately to the left of the current one. 1445dcd0538fSMark Fasheh * 1446dcd0538fSMark Fasheh * Will return zero if the path passed in is already the leftmost path. 1447dcd0538fSMark Fasheh */ 1448dcd0538fSMark Fasheh static int ocfs2_find_cpos_for_left_leaf(struct super_block *sb, 1449dcd0538fSMark Fasheh struct ocfs2_path *path, u32 *cpos) 1450dcd0538fSMark Fasheh { 1451dcd0538fSMark Fasheh int i, j, ret = 0; 1452dcd0538fSMark Fasheh u64 blkno; 1453dcd0538fSMark Fasheh struct ocfs2_extent_list *el; 1454dcd0538fSMark Fasheh 1455dcd0538fSMark Fasheh *cpos = 0; 1456dcd0538fSMark Fasheh 1457dcd0538fSMark Fasheh blkno = path_leaf_bh(path)->b_blocknr; 1458dcd0538fSMark Fasheh 1459dcd0538fSMark Fasheh /* Start at the tree node just above the leaf and work our way up. */ 1460dcd0538fSMark Fasheh i = path->p_tree_depth - 1; 1461dcd0538fSMark Fasheh while (i >= 0) { 1462dcd0538fSMark Fasheh el = path->p_node[i].el; 1463dcd0538fSMark Fasheh 1464dcd0538fSMark Fasheh /* 1465dcd0538fSMark Fasheh * Find the extent record just before the one in our 1466dcd0538fSMark Fasheh * path. 1467dcd0538fSMark Fasheh */ 1468dcd0538fSMark Fasheh for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) { 1469dcd0538fSMark Fasheh if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) { 1470dcd0538fSMark Fasheh if (j == 0) { 1471dcd0538fSMark Fasheh if (i == 0) { 1472dcd0538fSMark Fasheh /* 1473dcd0538fSMark Fasheh * We've determined that the 1474dcd0538fSMark Fasheh * path specified is already 1475dcd0538fSMark Fasheh * the leftmost one - return a 1476dcd0538fSMark Fasheh * cpos of zero. 1477dcd0538fSMark Fasheh */ 1478dcd0538fSMark Fasheh goto out; 1479dcd0538fSMark Fasheh } 1480dcd0538fSMark Fasheh /* 1481dcd0538fSMark Fasheh * The leftmost record points to our 1482dcd0538fSMark Fasheh * leaf - we need to travel up the 1483dcd0538fSMark Fasheh * tree one level. 1484dcd0538fSMark Fasheh */ 1485dcd0538fSMark Fasheh goto next_node; 1486dcd0538fSMark Fasheh } 1487dcd0538fSMark Fasheh 1488dcd0538fSMark Fasheh *cpos = le32_to_cpu(el->l_recs[j - 1].e_cpos); 1489dcd0538fSMark Fasheh *cpos = *cpos + le32_to_cpu(el->l_recs[j - 1].e_clusters) - 1; 1490dcd0538fSMark Fasheh goto out; 1491dcd0538fSMark Fasheh } 1492dcd0538fSMark Fasheh } 1493dcd0538fSMark Fasheh 1494dcd0538fSMark Fasheh /* 1495dcd0538fSMark Fasheh * If we got here, we never found a valid node where 1496dcd0538fSMark Fasheh * the tree indicated one should be. 1497dcd0538fSMark Fasheh */ 1498dcd0538fSMark Fasheh ocfs2_error(sb, 1499dcd0538fSMark Fasheh "Invalid extent tree at extent block %llu\n", 1500dcd0538fSMark Fasheh (unsigned long long)blkno); 1501dcd0538fSMark Fasheh ret = -EROFS; 1502dcd0538fSMark Fasheh goto out; 1503dcd0538fSMark Fasheh 1504dcd0538fSMark Fasheh next_node: 1505dcd0538fSMark Fasheh blkno = path->p_node[i].bh->b_blocknr; 1506dcd0538fSMark Fasheh i--; 1507dcd0538fSMark Fasheh } 1508dcd0538fSMark Fasheh 1509dcd0538fSMark Fasheh out: 1510dcd0538fSMark Fasheh return ret; 1511dcd0538fSMark Fasheh } 1512dcd0538fSMark Fasheh 1513dcd0538fSMark Fasheh static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth, 1514dcd0538fSMark Fasheh struct ocfs2_path *path) 1515dcd0538fSMark Fasheh { 1516dcd0538fSMark Fasheh int credits = (path->p_tree_depth - subtree_depth) * 2 + 1; 1517dcd0538fSMark Fasheh 1518dcd0538fSMark Fasheh if (handle->h_buffer_credits < credits) 1519dcd0538fSMark Fasheh return ocfs2_extend_trans(handle, credits); 1520dcd0538fSMark Fasheh 1521dcd0538fSMark Fasheh return 0; 1522dcd0538fSMark Fasheh } 1523dcd0538fSMark Fasheh 1524dcd0538fSMark Fasheh /* 1525dcd0538fSMark Fasheh * Trap the case where we're inserting into the theoretical range past 1526dcd0538fSMark Fasheh * the _actual_ left leaf range. Otherwise, we'll rotate a record 1527dcd0538fSMark Fasheh * whose cpos is less than ours into the right leaf. 1528dcd0538fSMark Fasheh * 1529dcd0538fSMark Fasheh * It's only necessary to look at the rightmost record of the left 1530dcd0538fSMark Fasheh * leaf because the logic that calls us should ensure that the 1531dcd0538fSMark Fasheh * theoretical ranges in the path components above the leaves are 1532dcd0538fSMark Fasheh * correct. 1533dcd0538fSMark Fasheh */ 1534dcd0538fSMark Fasheh static int ocfs2_rotate_requires_path_adjustment(struct ocfs2_path *left_path, 1535dcd0538fSMark Fasheh u32 insert_cpos) 1536dcd0538fSMark Fasheh { 1537dcd0538fSMark Fasheh struct ocfs2_extent_list *left_el; 1538dcd0538fSMark Fasheh struct ocfs2_extent_rec *rec; 1539dcd0538fSMark Fasheh int next_free; 1540dcd0538fSMark Fasheh 1541dcd0538fSMark Fasheh left_el = path_leaf_el(left_path); 1542dcd0538fSMark Fasheh next_free = le16_to_cpu(left_el->l_next_free_rec); 1543dcd0538fSMark Fasheh rec = &left_el->l_recs[next_free - 1]; 1544dcd0538fSMark Fasheh 1545dcd0538fSMark Fasheh if (insert_cpos > le32_to_cpu(rec->e_cpos)) 1546dcd0538fSMark Fasheh return 1; 1547dcd0538fSMark Fasheh return 0; 1548dcd0538fSMark Fasheh } 1549dcd0538fSMark Fasheh 1550dcd0538fSMark Fasheh /* 1551dcd0538fSMark Fasheh * Rotate all the records in a btree right one record, starting at insert_cpos. 1552dcd0538fSMark Fasheh * 1553dcd0538fSMark Fasheh * The path to the rightmost leaf should be passed in. 1554dcd0538fSMark Fasheh * 1555dcd0538fSMark Fasheh * The array is assumed to be large enough to hold an entire path (tree depth). 1556dcd0538fSMark Fasheh * 1557dcd0538fSMark Fasheh * Upon succesful return from this function: 1558dcd0538fSMark Fasheh * 1559dcd0538fSMark Fasheh * - The 'right_path' array will contain a path to the leaf block 1560dcd0538fSMark Fasheh * whose range contains e_cpos. 1561dcd0538fSMark Fasheh * - That leaf block will have a single empty extent in list index 0. 1562dcd0538fSMark Fasheh * - In the case that the rotation requires a post-insert update, 1563dcd0538fSMark Fasheh * *ret_left_path will contain a valid path which can be passed to 1564dcd0538fSMark Fasheh * ocfs2_insert_path(). 1565dcd0538fSMark Fasheh */ 1566dcd0538fSMark Fasheh static int ocfs2_rotate_tree_right(struct inode *inode, 1567dcd0538fSMark Fasheh handle_t *handle, 1568dcd0538fSMark Fasheh u32 insert_cpos, 1569dcd0538fSMark Fasheh struct ocfs2_path *right_path, 1570dcd0538fSMark Fasheh struct ocfs2_path **ret_left_path) 1571dcd0538fSMark Fasheh { 1572dcd0538fSMark Fasheh int ret, start; 1573dcd0538fSMark Fasheh u32 cpos; 1574dcd0538fSMark Fasheh struct ocfs2_path *left_path = NULL; 1575dcd0538fSMark Fasheh 1576dcd0538fSMark Fasheh *ret_left_path = NULL; 1577dcd0538fSMark Fasheh 1578dcd0538fSMark Fasheh left_path = ocfs2_new_path(path_root_bh(right_path), 1579dcd0538fSMark Fasheh path_root_el(right_path)); 1580dcd0538fSMark Fasheh if (!left_path) { 1581dcd0538fSMark Fasheh ret = -ENOMEM; 1582dcd0538fSMark Fasheh mlog_errno(ret); 1583dcd0538fSMark Fasheh goto out; 1584dcd0538fSMark Fasheh } 1585dcd0538fSMark Fasheh 1586dcd0538fSMark Fasheh ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, &cpos); 1587dcd0538fSMark Fasheh if (ret) { 1588dcd0538fSMark Fasheh mlog_errno(ret); 1589dcd0538fSMark Fasheh goto out; 1590dcd0538fSMark Fasheh } 1591dcd0538fSMark Fasheh 1592dcd0538fSMark Fasheh mlog(0, "Insert: %u, first left path cpos: %u\n", insert_cpos, cpos); 1593dcd0538fSMark Fasheh 1594dcd0538fSMark Fasheh /* 1595dcd0538fSMark Fasheh * What we want to do here is: 1596dcd0538fSMark Fasheh * 1597dcd0538fSMark Fasheh * 1) Start with the rightmost path. 1598dcd0538fSMark Fasheh * 1599dcd0538fSMark Fasheh * 2) Determine a path to the leaf block directly to the left 1600dcd0538fSMark Fasheh * of that leaf. 1601dcd0538fSMark Fasheh * 1602dcd0538fSMark Fasheh * 3) Determine the 'subtree root' - the lowest level tree node 1603dcd0538fSMark Fasheh * which contains a path to both leaves. 1604dcd0538fSMark Fasheh * 1605dcd0538fSMark Fasheh * 4) Rotate the subtree. 1606dcd0538fSMark Fasheh * 1607dcd0538fSMark Fasheh * 5) Find the next subtree by considering the left path to be 1608dcd0538fSMark Fasheh * the new right path. 1609dcd0538fSMark Fasheh * 1610dcd0538fSMark Fasheh * The check at the top of this while loop also accepts 1611dcd0538fSMark Fasheh * insert_cpos == cpos because cpos is only a _theoretical_ 1612dcd0538fSMark Fasheh * value to get us the left path - insert_cpos might very well 1613dcd0538fSMark Fasheh * be filling that hole. 1614dcd0538fSMark Fasheh * 1615dcd0538fSMark Fasheh * Stop at a cpos of '0' because we either started at the 1616dcd0538fSMark Fasheh * leftmost branch (i.e., a tree with one branch and a 1617dcd0538fSMark Fasheh * rotation inside of it), or we've gone as far as we can in 1618dcd0538fSMark Fasheh * rotating subtrees. 1619dcd0538fSMark Fasheh */ 1620dcd0538fSMark Fasheh while (cpos && insert_cpos <= cpos) { 1621dcd0538fSMark Fasheh mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n", 1622dcd0538fSMark Fasheh insert_cpos, cpos); 1623dcd0538fSMark Fasheh 1624dcd0538fSMark Fasheh ret = ocfs2_find_path(inode, left_path, cpos); 1625dcd0538fSMark Fasheh if (ret) { 1626dcd0538fSMark Fasheh mlog_errno(ret); 1627dcd0538fSMark Fasheh goto out; 1628dcd0538fSMark Fasheh } 1629dcd0538fSMark Fasheh 1630dcd0538fSMark Fasheh mlog_bug_on_msg(path_leaf_bh(left_path) == 1631dcd0538fSMark Fasheh path_leaf_bh(right_path), 1632dcd0538fSMark Fasheh "Inode %lu: error during insert of %u " 1633dcd0538fSMark Fasheh "(left path cpos %u) results in two identical " 1634dcd0538fSMark Fasheh "paths ending at %llu\n", 1635dcd0538fSMark Fasheh inode->i_ino, insert_cpos, cpos, 1636dcd0538fSMark Fasheh (unsigned long long) 1637dcd0538fSMark Fasheh path_leaf_bh(left_path)->b_blocknr); 1638dcd0538fSMark Fasheh 1639dcd0538fSMark Fasheh if (ocfs2_rotate_requires_path_adjustment(left_path, 1640dcd0538fSMark Fasheh insert_cpos)) { 1641dcd0538fSMark Fasheh mlog(0, "Path adjustment required\n"); 1642dcd0538fSMark Fasheh 1643dcd0538fSMark Fasheh /* 1644dcd0538fSMark Fasheh * We've rotated the tree as much as we 1645dcd0538fSMark Fasheh * should. The rest is up to 1646dcd0538fSMark Fasheh * ocfs2_insert_path() to complete, after the 1647dcd0538fSMark Fasheh * record insertion. We indicate this 1648dcd0538fSMark Fasheh * situation by returning the left path. 1649dcd0538fSMark Fasheh * 1650dcd0538fSMark Fasheh * The reason we don't adjust the records here 1651dcd0538fSMark Fasheh * before the record insert is that an error 1652dcd0538fSMark Fasheh * later might break the rule where a parent 1653dcd0538fSMark Fasheh * record e_cpos will reflect the actual 1654dcd0538fSMark Fasheh * e_cpos of the 1st nonempty record of the 1655dcd0538fSMark Fasheh * child list. 1656dcd0538fSMark Fasheh */ 1657dcd0538fSMark Fasheh *ret_left_path = left_path; 1658dcd0538fSMark Fasheh goto out_ret_path; 1659dcd0538fSMark Fasheh } 1660dcd0538fSMark Fasheh 1661dcd0538fSMark Fasheh start = ocfs2_find_subtree_root(inode, left_path, right_path); 1662dcd0538fSMark Fasheh 1663dcd0538fSMark Fasheh mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", 1664dcd0538fSMark Fasheh start, 1665dcd0538fSMark Fasheh (unsigned long long) right_path->p_node[start].bh->b_blocknr, 1666dcd0538fSMark Fasheh right_path->p_tree_depth); 1667dcd0538fSMark Fasheh 1668dcd0538fSMark Fasheh ret = ocfs2_extend_rotate_transaction(handle, start, 1669dcd0538fSMark Fasheh right_path); 1670dcd0538fSMark Fasheh if (ret) { 1671dcd0538fSMark Fasheh mlog_errno(ret); 1672dcd0538fSMark Fasheh goto out; 1673dcd0538fSMark Fasheh } 1674dcd0538fSMark Fasheh 1675dcd0538fSMark Fasheh ret = ocfs2_rotate_subtree_right(inode, handle, left_path, 1676dcd0538fSMark Fasheh right_path, start); 1677dcd0538fSMark Fasheh if (ret) { 1678dcd0538fSMark Fasheh mlog_errno(ret); 1679dcd0538fSMark Fasheh goto out; 1680dcd0538fSMark Fasheh } 1681dcd0538fSMark Fasheh 1682dcd0538fSMark Fasheh /* 1683dcd0538fSMark Fasheh * There is no need to re-read the next right path 1684dcd0538fSMark Fasheh * as we know that it'll be our current left 1685dcd0538fSMark Fasheh * path. Optimize by copying values instead. 1686dcd0538fSMark Fasheh */ 1687dcd0538fSMark Fasheh ocfs2_mv_path(right_path, left_path); 1688dcd0538fSMark Fasheh 1689dcd0538fSMark Fasheh ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, 1690dcd0538fSMark Fasheh &cpos); 1691dcd0538fSMark Fasheh if (ret) { 1692dcd0538fSMark Fasheh mlog_errno(ret); 1693dcd0538fSMark Fasheh goto out; 1694dcd0538fSMark Fasheh } 1695dcd0538fSMark Fasheh } 1696dcd0538fSMark Fasheh 1697dcd0538fSMark Fasheh out: 1698dcd0538fSMark Fasheh ocfs2_free_path(left_path); 1699dcd0538fSMark Fasheh 1700dcd0538fSMark Fasheh out_ret_path: 1701dcd0538fSMark Fasheh return ret; 1702dcd0538fSMark Fasheh } 1703dcd0538fSMark Fasheh 1704dcd0538fSMark Fasheh /* 1705dcd0538fSMark Fasheh * Do the final bits of extent record insertion at the target leaf 1706dcd0538fSMark Fasheh * list. If this leaf is part of an allocation tree, it is assumed 1707dcd0538fSMark Fasheh * that the tree above has been prepared. 1708dcd0538fSMark Fasheh */ 1709dcd0538fSMark Fasheh static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, 1710dcd0538fSMark Fasheh struct ocfs2_extent_list *el, 1711dcd0538fSMark Fasheh struct ocfs2_insert_type *insert, 1712dcd0538fSMark Fasheh struct inode *inode) 1713dcd0538fSMark Fasheh { 1714dcd0538fSMark Fasheh int i = insert->ins_contig_index; 1715dcd0538fSMark Fasheh unsigned int range; 1716dcd0538fSMark Fasheh struct ocfs2_extent_rec *rec; 1717dcd0538fSMark Fasheh 1718dcd0538fSMark Fasheh BUG_ON(el->l_tree_depth); 1719dcd0538fSMark Fasheh 1720dcd0538fSMark Fasheh /* 1721dcd0538fSMark Fasheh * Contiguous insert - either left or right. 1722dcd0538fSMark Fasheh */ 1723dcd0538fSMark Fasheh if (insert->ins_contig != CONTIG_NONE) { 1724dcd0538fSMark Fasheh rec = &el->l_recs[i]; 1725dcd0538fSMark Fasheh if (insert->ins_contig == CONTIG_LEFT) { 1726dcd0538fSMark Fasheh rec->e_blkno = insert_rec->e_blkno; 1727dcd0538fSMark Fasheh rec->e_cpos = insert_rec->e_cpos; 1728dcd0538fSMark Fasheh } 1729dcd0538fSMark Fasheh le32_add_cpu(&rec->e_clusters, 1730dcd0538fSMark Fasheh le32_to_cpu(insert_rec->e_clusters)); 1731dcd0538fSMark Fasheh return; 1732dcd0538fSMark Fasheh } 1733dcd0538fSMark Fasheh 1734dcd0538fSMark Fasheh /* 1735dcd0538fSMark Fasheh * Handle insert into an empty leaf. 1736dcd0538fSMark Fasheh */ 1737dcd0538fSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 0 || 1738dcd0538fSMark Fasheh ((le16_to_cpu(el->l_next_free_rec) == 1) && 1739dcd0538fSMark Fasheh ocfs2_is_empty_extent(&el->l_recs[0]))) { 1740dcd0538fSMark Fasheh el->l_recs[0] = *insert_rec; 1741dcd0538fSMark Fasheh el->l_next_free_rec = cpu_to_le16(1); 1742dcd0538fSMark Fasheh return; 1743dcd0538fSMark Fasheh } 1744dcd0538fSMark Fasheh 1745dcd0538fSMark Fasheh /* 1746dcd0538fSMark Fasheh * Appending insert. 1747dcd0538fSMark Fasheh */ 1748dcd0538fSMark Fasheh if (insert->ins_appending == APPEND_TAIL) { 1749dcd0538fSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 1750dcd0538fSMark Fasheh rec = &el->l_recs[i]; 1751dcd0538fSMark Fasheh range = le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters); 1752dcd0538fSMark Fasheh BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range); 1753dcd0538fSMark Fasheh 1754dcd0538fSMark Fasheh mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >= 1755dcd0538fSMark Fasheh le16_to_cpu(el->l_count), 1756dcd0538fSMark Fasheh "inode %lu, depth %u, count %u, next free %u, " 1757dcd0538fSMark Fasheh "rec.cpos %u, rec.clusters %u, " 1758dcd0538fSMark Fasheh "insert.cpos %u, insert.clusters %u\n", 1759dcd0538fSMark Fasheh inode->i_ino, 1760dcd0538fSMark Fasheh le16_to_cpu(el->l_tree_depth), 1761dcd0538fSMark Fasheh le16_to_cpu(el->l_count), 1762dcd0538fSMark Fasheh le16_to_cpu(el->l_next_free_rec), 1763dcd0538fSMark Fasheh le32_to_cpu(el->l_recs[i].e_cpos), 1764dcd0538fSMark Fasheh le32_to_cpu(el->l_recs[i].e_clusters), 1765dcd0538fSMark Fasheh le32_to_cpu(insert_rec->e_cpos), 1766dcd0538fSMark Fasheh le32_to_cpu(insert_rec->e_clusters)); 1767dcd0538fSMark Fasheh i++; 1768dcd0538fSMark Fasheh el->l_recs[i] = *insert_rec; 1769dcd0538fSMark Fasheh le16_add_cpu(&el->l_next_free_rec, 1); 1770dcd0538fSMark Fasheh return; 1771dcd0538fSMark Fasheh } 1772dcd0538fSMark Fasheh 1773dcd0538fSMark Fasheh /* 1774dcd0538fSMark Fasheh * Ok, we have to rotate. 1775dcd0538fSMark Fasheh * 1776dcd0538fSMark Fasheh * At this point, it is safe to assume that inserting into an 1777dcd0538fSMark Fasheh * empty leaf and appending to a leaf have both been handled 1778dcd0538fSMark Fasheh * above. 1779dcd0538fSMark Fasheh * 1780dcd0538fSMark Fasheh * This leaf needs to have space, either by the empty 1st 1781dcd0538fSMark Fasheh * extent record, or by virtue of an l_next_rec < l_count. 1782dcd0538fSMark Fasheh */ 1783dcd0538fSMark Fasheh ocfs2_rotate_leaf(el, insert_rec); 1784dcd0538fSMark Fasheh } 1785dcd0538fSMark Fasheh 1786dcd0538fSMark Fasheh static inline void ocfs2_update_dinode_clusters(struct inode *inode, 1787dcd0538fSMark Fasheh struct ocfs2_dinode *di, 1788dcd0538fSMark Fasheh u32 clusters) 1789dcd0538fSMark Fasheh { 1790dcd0538fSMark Fasheh le32_add_cpu(&di->i_clusters, clusters); 1791dcd0538fSMark Fasheh spin_lock(&OCFS2_I(inode)->ip_lock); 1792dcd0538fSMark Fasheh OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); 1793dcd0538fSMark Fasheh spin_unlock(&OCFS2_I(inode)->ip_lock); 1794dcd0538fSMark Fasheh } 1795dcd0538fSMark Fasheh 1796dcd0538fSMark Fasheh static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, 1797dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec, 1798dcd0538fSMark Fasheh struct ocfs2_path *right_path, 1799dcd0538fSMark Fasheh struct ocfs2_path **ret_left_path) 1800dcd0538fSMark Fasheh { 1801dcd0538fSMark Fasheh int ret, i, next_free; 1802dcd0538fSMark Fasheh struct buffer_head *bh; 1803dcd0538fSMark Fasheh struct ocfs2_extent_list *el; 1804dcd0538fSMark Fasheh struct ocfs2_path *left_path = NULL; 1805dcd0538fSMark Fasheh 1806dcd0538fSMark Fasheh *ret_left_path = NULL; 1807dcd0538fSMark Fasheh 1808dcd0538fSMark Fasheh /* 1809dcd0538fSMark Fasheh * If our appending insert is at the leftmost edge of a leaf, 1810dcd0538fSMark Fasheh * then we might need to update the rightmost records of the 1811dcd0538fSMark Fasheh * neighboring path. 1812dcd0538fSMark Fasheh */ 1813dcd0538fSMark Fasheh el = path_leaf_el(right_path); 1814dcd0538fSMark Fasheh next_free = le16_to_cpu(el->l_next_free_rec); 1815dcd0538fSMark Fasheh if (next_free == 0 || 1816dcd0538fSMark Fasheh (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) { 1817dcd0538fSMark Fasheh u32 left_cpos; 1818dcd0538fSMark Fasheh 1819dcd0538fSMark Fasheh ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, 1820dcd0538fSMark Fasheh &left_cpos); 1821dcd0538fSMark Fasheh if (ret) { 1822dcd0538fSMark Fasheh mlog_errno(ret); 1823dcd0538fSMark Fasheh goto out; 1824dcd0538fSMark Fasheh } 1825dcd0538fSMark Fasheh 1826dcd0538fSMark Fasheh mlog(0, "Append may need a left path update. cpos: %u, " 1827dcd0538fSMark Fasheh "left_cpos: %u\n", le32_to_cpu(insert_rec->e_cpos), 1828dcd0538fSMark Fasheh left_cpos); 1829dcd0538fSMark Fasheh 1830dcd0538fSMark Fasheh /* 1831dcd0538fSMark Fasheh * No need to worry if the append is already in the 1832dcd0538fSMark Fasheh * leftmost leaf. 1833dcd0538fSMark Fasheh */ 1834dcd0538fSMark Fasheh if (left_cpos) { 1835dcd0538fSMark Fasheh left_path = ocfs2_new_path(path_root_bh(right_path), 1836dcd0538fSMark Fasheh path_root_el(right_path)); 1837dcd0538fSMark Fasheh if (!left_path) { 1838dcd0538fSMark Fasheh ret = -ENOMEM; 1839dcd0538fSMark Fasheh mlog_errno(ret); 1840dcd0538fSMark Fasheh goto out; 1841dcd0538fSMark Fasheh } 1842dcd0538fSMark Fasheh 1843dcd0538fSMark Fasheh ret = ocfs2_find_path(inode, left_path, left_cpos); 1844dcd0538fSMark Fasheh if (ret) { 1845dcd0538fSMark Fasheh mlog_errno(ret); 1846dcd0538fSMark Fasheh goto out; 1847dcd0538fSMark Fasheh } 1848dcd0538fSMark Fasheh 1849dcd0538fSMark Fasheh /* 1850dcd0538fSMark Fasheh * ocfs2_insert_path() will pass the left_path to the 1851dcd0538fSMark Fasheh * journal for us. 1852dcd0538fSMark Fasheh */ 1853dcd0538fSMark Fasheh } 1854dcd0538fSMark Fasheh } 1855dcd0538fSMark Fasheh 1856dcd0538fSMark Fasheh ret = ocfs2_journal_access_path(inode, handle, right_path); 1857dcd0538fSMark Fasheh if (ret) { 1858dcd0538fSMark Fasheh mlog_errno(ret); 1859dcd0538fSMark Fasheh goto out; 1860dcd0538fSMark Fasheh } 1861dcd0538fSMark Fasheh 1862dcd0538fSMark Fasheh el = path_root_el(right_path); 1863dcd0538fSMark Fasheh bh = path_root_bh(right_path); 1864dcd0538fSMark Fasheh i = 0; 1865dcd0538fSMark Fasheh while (1) { 1866dcd0538fSMark Fasheh next_free = le16_to_cpu(el->l_next_free_rec); 1867dcd0538fSMark Fasheh if (next_free == 0) { 1868dcd0538fSMark Fasheh ocfs2_error(inode->i_sb, 1869dcd0538fSMark Fasheh "Dinode %llu has a bad extent list", 1870dcd0538fSMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 1871dcd0538fSMark Fasheh ret = -EIO; 1872dcd0538fSMark Fasheh goto out; 1873dcd0538fSMark Fasheh } 1874dcd0538fSMark Fasheh 1875dcd0538fSMark Fasheh el->l_recs[next_free - 1].e_clusters = insert_rec->e_cpos; 1876dcd0538fSMark Fasheh le32_add_cpu(&el->l_recs[next_free - 1].e_clusters, 1877dcd0538fSMark Fasheh le32_to_cpu(insert_rec->e_clusters)); 1878dcd0538fSMark Fasheh le32_add_cpu(&el->l_recs[next_free - 1].e_clusters, 1879dcd0538fSMark Fasheh -le32_to_cpu(el->l_recs[next_free - 1].e_cpos)); 1880dcd0538fSMark Fasheh 1881dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, bh); 1882dcd0538fSMark Fasheh if (ret) 1883dcd0538fSMark Fasheh mlog_errno(ret); 1884dcd0538fSMark Fasheh 1885dcd0538fSMark Fasheh if (++i >= right_path->p_tree_depth) 1886dcd0538fSMark Fasheh break; 1887dcd0538fSMark Fasheh 1888dcd0538fSMark Fasheh bh = right_path->p_node[i].bh; 1889dcd0538fSMark Fasheh el = right_path->p_node[i].el; 1890dcd0538fSMark Fasheh } 1891dcd0538fSMark Fasheh 1892dcd0538fSMark Fasheh *ret_left_path = left_path; 1893dcd0538fSMark Fasheh ret = 0; 1894dcd0538fSMark Fasheh out: 1895dcd0538fSMark Fasheh if (ret != 0) 1896dcd0538fSMark Fasheh ocfs2_free_path(left_path); 1897dcd0538fSMark Fasheh 1898dcd0538fSMark Fasheh return ret; 1899dcd0538fSMark Fasheh } 1900dcd0538fSMark Fasheh 1901dcd0538fSMark Fasheh /* 1902dcd0538fSMark Fasheh * This function only does inserts on an allocation b-tree. For dinode 1903dcd0538fSMark Fasheh * lists, ocfs2_insert_at_leaf() is called directly. 1904dcd0538fSMark Fasheh * 1905dcd0538fSMark Fasheh * right_path is the path we want to do the actual insert 1906dcd0538fSMark Fasheh * in. left_path should only be passed in if we need to update that 1907dcd0538fSMark Fasheh * portion of the tree after an edge insert. 1908dcd0538fSMark Fasheh */ 1909dcd0538fSMark Fasheh static int ocfs2_insert_path(struct inode *inode, 1910dcd0538fSMark Fasheh handle_t *handle, 1911dcd0538fSMark Fasheh struct ocfs2_path *left_path, 1912dcd0538fSMark Fasheh struct ocfs2_path *right_path, 1913dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec, 1914dcd0538fSMark Fasheh struct ocfs2_insert_type *insert) 1915dcd0538fSMark Fasheh { 1916dcd0538fSMark Fasheh int ret, subtree_index; 1917dcd0538fSMark Fasheh struct buffer_head *leaf_bh = path_leaf_bh(right_path); 1918dcd0538fSMark Fasheh struct ocfs2_extent_list *el; 1919dcd0538fSMark Fasheh 1920dcd0538fSMark Fasheh /* 1921dcd0538fSMark Fasheh * Pass both paths to the journal. The majority of inserts 1922dcd0538fSMark Fasheh * will be touching all components anyway. 1923dcd0538fSMark Fasheh */ 1924dcd0538fSMark Fasheh ret = ocfs2_journal_access_path(inode, handle, right_path); 1925dcd0538fSMark Fasheh if (ret < 0) { 1926dcd0538fSMark Fasheh mlog_errno(ret); 1927dcd0538fSMark Fasheh goto out; 1928dcd0538fSMark Fasheh } 1929dcd0538fSMark Fasheh 1930dcd0538fSMark Fasheh if (left_path) { 1931dcd0538fSMark Fasheh int credits = handle->h_buffer_credits; 1932dcd0538fSMark Fasheh 1933dcd0538fSMark Fasheh /* 1934dcd0538fSMark Fasheh * There's a chance that left_path got passed back to 1935dcd0538fSMark Fasheh * us without being accounted for in the 1936dcd0538fSMark Fasheh * journal. Extend our transaction here to be sure we 1937dcd0538fSMark Fasheh * can change those blocks. 1938dcd0538fSMark Fasheh */ 1939dcd0538fSMark Fasheh credits += left_path->p_tree_depth; 1940dcd0538fSMark Fasheh 1941dcd0538fSMark Fasheh ret = ocfs2_extend_trans(handle, credits); 1942dcd0538fSMark Fasheh if (ret < 0) { 1943dcd0538fSMark Fasheh mlog_errno(ret); 1944dcd0538fSMark Fasheh goto out; 1945dcd0538fSMark Fasheh } 1946dcd0538fSMark Fasheh 1947dcd0538fSMark Fasheh ret = ocfs2_journal_access_path(inode, handle, left_path); 1948dcd0538fSMark Fasheh if (ret < 0) { 1949dcd0538fSMark Fasheh mlog_errno(ret); 1950dcd0538fSMark Fasheh goto out; 1951dcd0538fSMark Fasheh } 1952dcd0538fSMark Fasheh } 1953dcd0538fSMark Fasheh 1954dcd0538fSMark Fasheh el = path_leaf_el(right_path); 1955dcd0538fSMark Fasheh 1956dcd0538fSMark Fasheh ocfs2_insert_at_leaf(insert_rec, el, insert, inode); 1957dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, leaf_bh); 1958dcd0538fSMark Fasheh if (ret) 1959dcd0538fSMark Fasheh mlog_errno(ret); 1960dcd0538fSMark Fasheh 1961dcd0538fSMark Fasheh if (left_path) { 1962dcd0538fSMark Fasheh /* 1963dcd0538fSMark Fasheh * The rotate code has indicated that we need to fix 1964dcd0538fSMark Fasheh * up portions of the tree after the insert. 1965dcd0538fSMark Fasheh * 1966dcd0538fSMark Fasheh * XXX: Should we extend the transaction here? 1967dcd0538fSMark Fasheh */ 1968dcd0538fSMark Fasheh subtree_index = ocfs2_find_subtree_root(inode, left_path, 1969dcd0538fSMark Fasheh right_path); 1970dcd0538fSMark Fasheh ocfs2_complete_edge_insert(inode, handle, left_path, 1971dcd0538fSMark Fasheh right_path, subtree_index); 1972dcd0538fSMark Fasheh } 1973dcd0538fSMark Fasheh 1974dcd0538fSMark Fasheh ret = 0; 1975dcd0538fSMark Fasheh out: 1976dcd0538fSMark Fasheh return ret; 1977dcd0538fSMark Fasheh } 1978dcd0538fSMark Fasheh 1979dcd0538fSMark Fasheh static int ocfs2_do_insert_extent(struct inode *inode, 1980dcd0538fSMark Fasheh handle_t *handle, 1981dcd0538fSMark Fasheh struct buffer_head *di_bh, 1982dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec, 1983dcd0538fSMark Fasheh struct ocfs2_insert_type *type) 1984dcd0538fSMark Fasheh { 1985dcd0538fSMark Fasheh int ret, rotate = 0; 1986dcd0538fSMark Fasheh u32 cpos; 1987dcd0538fSMark Fasheh struct ocfs2_path *right_path = NULL; 1988dcd0538fSMark Fasheh struct ocfs2_path *left_path = NULL; 1989dcd0538fSMark Fasheh struct ocfs2_dinode *di; 1990dcd0538fSMark Fasheh struct ocfs2_extent_list *el; 1991dcd0538fSMark Fasheh 1992dcd0538fSMark Fasheh di = (struct ocfs2_dinode *) di_bh->b_data; 1993dcd0538fSMark Fasheh el = &di->id2.i_list; 1994dcd0538fSMark Fasheh 1995dcd0538fSMark Fasheh ret = ocfs2_journal_access(handle, inode, di_bh, 1996dcd0538fSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1997dcd0538fSMark Fasheh if (ret) { 1998dcd0538fSMark Fasheh mlog_errno(ret); 1999dcd0538fSMark Fasheh goto out; 2000dcd0538fSMark Fasheh } 2001dcd0538fSMark Fasheh 2002dcd0538fSMark Fasheh if (le16_to_cpu(el->l_tree_depth) == 0) { 2003dcd0538fSMark Fasheh ocfs2_insert_at_leaf(insert_rec, el, type, inode); 2004dcd0538fSMark Fasheh goto out_update_clusters; 2005dcd0538fSMark Fasheh } 2006dcd0538fSMark Fasheh 2007dcd0538fSMark Fasheh right_path = ocfs2_new_inode_path(di_bh); 2008dcd0538fSMark Fasheh if (!right_path) { 2009dcd0538fSMark Fasheh ret = -ENOMEM; 2010dcd0538fSMark Fasheh mlog_errno(ret); 2011dcd0538fSMark Fasheh goto out; 2012dcd0538fSMark Fasheh } 2013dcd0538fSMark Fasheh 2014dcd0538fSMark Fasheh /* 2015dcd0538fSMark Fasheh * Determine the path to start with. Rotations need the 2016dcd0538fSMark Fasheh * rightmost path, everything else can go directly to the 2017dcd0538fSMark Fasheh * target leaf. 2018dcd0538fSMark Fasheh */ 2019dcd0538fSMark Fasheh cpos = le32_to_cpu(insert_rec->e_cpos); 2020dcd0538fSMark Fasheh if (type->ins_appending == APPEND_NONE && 2021dcd0538fSMark Fasheh type->ins_contig == CONTIG_NONE) { 2022dcd0538fSMark Fasheh rotate = 1; 2023dcd0538fSMark Fasheh cpos = UINT_MAX; 2024dcd0538fSMark Fasheh } 2025dcd0538fSMark Fasheh 2026dcd0538fSMark Fasheh ret = ocfs2_find_path(inode, right_path, cpos); 2027dcd0538fSMark Fasheh if (ret) { 2028dcd0538fSMark Fasheh mlog_errno(ret); 2029dcd0538fSMark Fasheh goto out; 2030dcd0538fSMark Fasheh } 2031dcd0538fSMark Fasheh 2032dcd0538fSMark Fasheh /* 2033dcd0538fSMark Fasheh * Rotations and appends need special treatment - they modify 2034dcd0538fSMark Fasheh * parts of the tree's above them. 2035dcd0538fSMark Fasheh * 2036dcd0538fSMark Fasheh * Both might pass back a path immediate to the left of the 2037dcd0538fSMark Fasheh * one being inserted to. This will be cause 2038dcd0538fSMark Fasheh * ocfs2_insert_path() to modify the rightmost records of 2039dcd0538fSMark Fasheh * left_path to account for an edge insert. 2040dcd0538fSMark Fasheh * 2041dcd0538fSMark Fasheh * XXX: When modifying this code, keep in mind that an insert 2042dcd0538fSMark Fasheh * can wind up skipping both of these two special cases... 2043dcd0538fSMark Fasheh */ 2044dcd0538fSMark Fasheh if (rotate) { 2045dcd0538fSMark Fasheh ret = ocfs2_rotate_tree_right(inode, handle, 2046dcd0538fSMark Fasheh le32_to_cpu(insert_rec->e_cpos), 2047dcd0538fSMark Fasheh right_path, &left_path); 2048dcd0538fSMark Fasheh if (ret) { 2049dcd0538fSMark Fasheh mlog_errno(ret); 2050dcd0538fSMark Fasheh goto out; 2051dcd0538fSMark Fasheh } 2052dcd0538fSMark Fasheh } else if (type->ins_appending == APPEND_TAIL 2053dcd0538fSMark Fasheh && type->ins_contig != CONTIG_LEFT) { 2054dcd0538fSMark Fasheh ret = ocfs2_append_rec_to_path(inode, handle, insert_rec, 2055dcd0538fSMark Fasheh right_path, &left_path); 2056dcd0538fSMark Fasheh if (ret) { 2057dcd0538fSMark Fasheh mlog_errno(ret); 2058dcd0538fSMark Fasheh goto out; 2059dcd0538fSMark Fasheh } 2060dcd0538fSMark Fasheh } 2061dcd0538fSMark Fasheh 2062dcd0538fSMark Fasheh ret = ocfs2_insert_path(inode, handle, left_path, right_path, 2063dcd0538fSMark Fasheh insert_rec, type); 2064dcd0538fSMark Fasheh if (ret) { 2065dcd0538fSMark Fasheh mlog_errno(ret); 2066dcd0538fSMark Fasheh goto out; 2067dcd0538fSMark Fasheh } 2068dcd0538fSMark Fasheh 2069dcd0538fSMark Fasheh out_update_clusters: 2070dcd0538fSMark Fasheh ocfs2_update_dinode_clusters(inode, di, 2071dcd0538fSMark Fasheh le32_to_cpu(insert_rec->e_clusters)); 2072dcd0538fSMark Fasheh 2073dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, di_bh); 2074dcd0538fSMark Fasheh if (ret) 2075dcd0538fSMark Fasheh mlog_errno(ret); 2076dcd0538fSMark Fasheh 2077dcd0538fSMark Fasheh out: 2078dcd0538fSMark Fasheh ocfs2_free_path(left_path); 2079dcd0538fSMark Fasheh ocfs2_free_path(right_path); 2080dcd0538fSMark Fasheh 2081dcd0538fSMark Fasheh return ret; 2082dcd0538fSMark Fasheh } 2083dcd0538fSMark Fasheh 2084dcd0538fSMark Fasheh static void ocfs2_figure_contig_type(struct inode *inode, 2085dcd0538fSMark Fasheh struct ocfs2_insert_type *insert, 2086dcd0538fSMark Fasheh struct ocfs2_extent_list *el, 2087dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec) 2088dcd0538fSMark Fasheh { 2089dcd0538fSMark Fasheh int i; 2090dcd0538fSMark Fasheh enum ocfs2_contig_type contig_type = CONTIG_NONE; 2091dcd0538fSMark Fasheh 2092dcd0538fSMark Fasheh for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 2093dcd0538fSMark Fasheh contig_type = ocfs2_extent_contig(inode, &el->l_recs[i], 2094dcd0538fSMark Fasheh insert_rec); 2095dcd0538fSMark Fasheh if (contig_type != CONTIG_NONE) { 2096dcd0538fSMark Fasheh insert->ins_contig_index = i; 2097dcd0538fSMark Fasheh break; 2098dcd0538fSMark Fasheh } 2099dcd0538fSMark Fasheh } 2100dcd0538fSMark Fasheh insert->ins_contig = contig_type; 2101dcd0538fSMark Fasheh } 2102dcd0538fSMark Fasheh 2103dcd0538fSMark Fasheh /* 2104dcd0538fSMark Fasheh * This should only be called against the righmost leaf extent list. 2105dcd0538fSMark Fasheh * 2106dcd0538fSMark Fasheh * ocfs2_figure_appending_type() will figure out whether we'll have to 2107dcd0538fSMark Fasheh * insert at the tail of the rightmost leaf. 2108dcd0538fSMark Fasheh * 2109dcd0538fSMark Fasheh * This should also work against the dinode list for tree's with 0 2110dcd0538fSMark Fasheh * depth. If we consider the dinode list to be the rightmost leaf node 2111dcd0538fSMark Fasheh * then the logic here makes sense. 2112dcd0538fSMark Fasheh */ 2113dcd0538fSMark Fasheh static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert, 2114dcd0538fSMark Fasheh struct ocfs2_extent_list *el, 2115dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec) 2116dcd0538fSMark Fasheh { 2117dcd0538fSMark Fasheh int i; 2118dcd0538fSMark Fasheh u32 cpos = le32_to_cpu(insert_rec->e_cpos); 2119dcd0538fSMark Fasheh struct ocfs2_extent_rec *rec; 2120dcd0538fSMark Fasheh 2121dcd0538fSMark Fasheh insert->ins_appending = APPEND_NONE; 2122dcd0538fSMark Fasheh 2123dcd0538fSMark Fasheh BUG_ON(el->l_tree_depth); 2124dcd0538fSMark Fasheh 2125dcd0538fSMark Fasheh if (!el->l_next_free_rec) 2126dcd0538fSMark Fasheh goto set_tail_append; 2127dcd0538fSMark Fasheh 2128dcd0538fSMark Fasheh if (ocfs2_is_empty_extent(&el->l_recs[0])) { 2129dcd0538fSMark Fasheh /* Were all records empty? */ 2130dcd0538fSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 1) 2131dcd0538fSMark Fasheh goto set_tail_append; 2132dcd0538fSMark Fasheh } 2133dcd0538fSMark Fasheh 2134dcd0538fSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 2135dcd0538fSMark Fasheh rec = &el->l_recs[i]; 2136dcd0538fSMark Fasheh 2137dcd0538fSMark Fasheh if (cpos >= (le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters))) 2138dcd0538fSMark Fasheh goto set_tail_append; 2139dcd0538fSMark Fasheh 2140dcd0538fSMark Fasheh return; 2141dcd0538fSMark Fasheh 2142dcd0538fSMark Fasheh set_tail_append: 2143dcd0538fSMark Fasheh insert->ins_appending = APPEND_TAIL; 2144dcd0538fSMark Fasheh } 2145dcd0538fSMark Fasheh 2146dcd0538fSMark Fasheh /* 2147dcd0538fSMark Fasheh * Helper function called at the begining of an insert. 2148dcd0538fSMark Fasheh * 2149dcd0538fSMark Fasheh * This computes a few things that are commonly used in the process of 2150dcd0538fSMark Fasheh * inserting into the btree: 2151dcd0538fSMark Fasheh * - Whether the new extent is contiguous with an existing one. 2152dcd0538fSMark Fasheh * - The current tree depth. 2153dcd0538fSMark Fasheh * - Whether the insert is an appending one. 2154dcd0538fSMark Fasheh * - The total # of free records in the tree. 2155dcd0538fSMark Fasheh * 2156dcd0538fSMark Fasheh * All of the information is stored on the ocfs2_insert_type 2157dcd0538fSMark Fasheh * structure. 2158dcd0538fSMark Fasheh */ 2159dcd0538fSMark Fasheh static int ocfs2_figure_insert_type(struct inode *inode, 2160dcd0538fSMark Fasheh struct buffer_head *di_bh, 2161dcd0538fSMark Fasheh struct buffer_head **last_eb_bh, 2162dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec, 2163dcd0538fSMark Fasheh struct ocfs2_insert_type *insert) 2164dcd0538fSMark Fasheh { 2165dcd0538fSMark Fasheh int ret; 2166dcd0538fSMark Fasheh struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2167dcd0538fSMark Fasheh struct ocfs2_extent_block *eb; 2168dcd0538fSMark Fasheh struct ocfs2_extent_list *el; 2169dcd0538fSMark Fasheh struct ocfs2_path *path = NULL; 2170dcd0538fSMark Fasheh struct buffer_head *bh = NULL; 2171dcd0538fSMark Fasheh 2172dcd0538fSMark Fasheh el = &di->id2.i_list; 2173dcd0538fSMark Fasheh insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); 2174dcd0538fSMark Fasheh 2175dcd0538fSMark Fasheh if (el->l_tree_depth) { 2176dcd0538fSMark Fasheh /* 2177dcd0538fSMark Fasheh * If we have tree depth, we read in the 2178dcd0538fSMark Fasheh * rightmost extent block ahead of time as 2179dcd0538fSMark Fasheh * ocfs2_figure_insert_type() and ocfs2_add_branch() 2180dcd0538fSMark Fasheh * may want it later. 2181dcd0538fSMark Fasheh */ 2182dcd0538fSMark Fasheh ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 2183dcd0538fSMark Fasheh le64_to_cpu(di->i_last_eb_blk), &bh, 2184dcd0538fSMark Fasheh OCFS2_BH_CACHED, inode); 2185dcd0538fSMark Fasheh if (ret) { 2186dcd0538fSMark Fasheh mlog_exit(ret); 2187dcd0538fSMark Fasheh goto out; 2188dcd0538fSMark Fasheh } 2189dcd0538fSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 2190dcd0538fSMark Fasheh el = &eb->h_list; 2191dcd0538fSMark Fasheh } 2192dcd0538fSMark Fasheh 2193dcd0538fSMark Fasheh /* 2194dcd0538fSMark Fasheh * Unless we have a contiguous insert, we'll need to know if 2195dcd0538fSMark Fasheh * there is room left in our allocation tree for another 2196dcd0538fSMark Fasheh * extent record. 2197dcd0538fSMark Fasheh * 2198dcd0538fSMark Fasheh * XXX: This test is simplistic, we can search for empty 2199dcd0538fSMark Fasheh * extent records too. 2200dcd0538fSMark Fasheh */ 2201dcd0538fSMark Fasheh insert->ins_free_records = le16_to_cpu(el->l_count) - 2202dcd0538fSMark Fasheh le16_to_cpu(el->l_next_free_rec); 2203dcd0538fSMark Fasheh 2204dcd0538fSMark Fasheh if (!insert->ins_tree_depth) { 2205dcd0538fSMark Fasheh ocfs2_figure_contig_type(inode, insert, el, insert_rec); 2206dcd0538fSMark Fasheh ocfs2_figure_appending_type(insert, el, insert_rec); 2207dcd0538fSMark Fasheh return 0; 2208dcd0538fSMark Fasheh } 2209dcd0538fSMark Fasheh 2210dcd0538fSMark Fasheh path = ocfs2_new_inode_path(di_bh); 2211dcd0538fSMark Fasheh if (!path) { 2212dcd0538fSMark Fasheh ret = -ENOMEM; 2213dcd0538fSMark Fasheh mlog_errno(ret); 2214dcd0538fSMark Fasheh goto out; 2215dcd0538fSMark Fasheh } 2216dcd0538fSMark Fasheh 2217dcd0538fSMark Fasheh /* 2218dcd0538fSMark Fasheh * In the case that we're inserting past what the tree 2219dcd0538fSMark Fasheh * currently accounts for, ocfs2_find_path() will return for 2220dcd0538fSMark Fasheh * us the rightmost tree path. This is accounted for below in 2221dcd0538fSMark Fasheh * the appending code. 2222dcd0538fSMark Fasheh */ 2223dcd0538fSMark Fasheh ret = ocfs2_find_path(inode, path, le32_to_cpu(insert_rec->e_cpos)); 2224dcd0538fSMark Fasheh if (ret) { 2225dcd0538fSMark Fasheh mlog_errno(ret); 2226dcd0538fSMark Fasheh goto out; 2227dcd0538fSMark Fasheh } 2228dcd0538fSMark Fasheh 2229dcd0538fSMark Fasheh el = path_leaf_el(path); 2230dcd0538fSMark Fasheh 2231dcd0538fSMark Fasheh /* 2232dcd0538fSMark Fasheh * Now that we have the path, there's two things we want to determine: 2233dcd0538fSMark Fasheh * 1) Contiguousness (also set contig_index if this is so) 2234dcd0538fSMark Fasheh * 2235dcd0538fSMark Fasheh * 2) Are we doing an append? We can trivially break this up 2236dcd0538fSMark Fasheh * into two types of appends: simple record append, or a 2237dcd0538fSMark Fasheh * rotate inside the tail leaf. 2238dcd0538fSMark Fasheh */ 2239dcd0538fSMark Fasheh ocfs2_figure_contig_type(inode, insert, el, insert_rec); 2240dcd0538fSMark Fasheh 2241dcd0538fSMark Fasheh /* 2242dcd0538fSMark Fasheh * The insert code isn't quite ready to deal with all cases of 2243dcd0538fSMark Fasheh * left contiguousness. Specifically, if it's an insert into 2244dcd0538fSMark Fasheh * the 1st record in a leaf, it will require the adjustment of 2245dcd0538fSMark Fasheh * e_clusters on the last record of the path directly to it's 2246dcd0538fSMark Fasheh * left. For now, just catch that case and fool the layers 2247dcd0538fSMark Fasheh * above us. This works just fine for tree_depth == 0, which 2248dcd0538fSMark Fasheh * is why we allow that above. 2249dcd0538fSMark Fasheh */ 2250dcd0538fSMark Fasheh if (insert->ins_contig == CONTIG_LEFT && 2251dcd0538fSMark Fasheh insert->ins_contig_index == 0) 2252dcd0538fSMark Fasheh insert->ins_contig = CONTIG_NONE; 2253dcd0538fSMark Fasheh 2254dcd0538fSMark Fasheh /* 2255dcd0538fSMark Fasheh * Ok, so we can simply compare against last_eb to figure out 2256dcd0538fSMark Fasheh * whether the path doesn't exist. This will only happen in 2257dcd0538fSMark Fasheh * the case that we're doing a tail append, so maybe we can 2258dcd0538fSMark Fasheh * take advantage of that information somehow. 2259dcd0538fSMark Fasheh */ 2260dcd0538fSMark Fasheh if (le64_to_cpu(di->i_last_eb_blk) == path_leaf_bh(path)->b_blocknr) { 2261dcd0538fSMark Fasheh /* 2262dcd0538fSMark Fasheh * Ok, ocfs2_find_path() returned us the rightmost 2263dcd0538fSMark Fasheh * tree path. This might be an appending insert. There are 2264dcd0538fSMark Fasheh * two cases: 2265dcd0538fSMark Fasheh * 1) We're doing a true append at the tail: 2266dcd0538fSMark Fasheh * -This might even be off the end of the leaf 2267dcd0538fSMark Fasheh * 2) We're "appending" by rotating in the tail 2268dcd0538fSMark Fasheh */ 2269dcd0538fSMark Fasheh ocfs2_figure_appending_type(insert, el, insert_rec); 2270dcd0538fSMark Fasheh } 2271dcd0538fSMark Fasheh 2272dcd0538fSMark Fasheh out: 2273dcd0538fSMark Fasheh ocfs2_free_path(path); 2274dcd0538fSMark Fasheh 2275dcd0538fSMark Fasheh if (ret == 0) 2276dcd0538fSMark Fasheh *last_eb_bh = bh; 2277dcd0538fSMark Fasheh else 2278dcd0538fSMark Fasheh brelse(bh); 2279dcd0538fSMark Fasheh return ret; 2280dcd0538fSMark Fasheh } 2281dcd0538fSMark Fasheh 2282dcd0538fSMark Fasheh /* 2283dcd0538fSMark Fasheh * Insert an extent into an inode btree. 2284dcd0538fSMark Fasheh * 2285dcd0538fSMark Fasheh * The caller needs to update fe->i_clusters 2286dcd0538fSMark Fasheh */ 2287ccd979bdSMark Fasheh int ocfs2_insert_extent(struct ocfs2_super *osb, 22881fabe148SMark Fasheh handle_t *handle, 2289ccd979bdSMark Fasheh struct inode *inode, 2290ccd979bdSMark Fasheh struct buffer_head *fe_bh, 2291dcd0538fSMark Fasheh u32 cpos, 2292ccd979bdSMark Fasheh u64 start_blk, 2293ccd979bdSMark Fasheh u32 new_clusters, 2294ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac) 2295ccd979bdSMark Fasheh { 2296dcd0538fSMark Fasheh int status, shift; 2297ccd979bdSMark Fasheh struct buffer_head *last_eb_bh = NULL; 2298ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 2299dcd0538fSMark Fasheh struct ocfs2_insert_type insert = {0, }; 2300dcd0538fSMark Fasheh struct ocfs2_extent_rec rec; 2301ccd979bdSMark Fasheh 2302dcd0538fSMark Fasheh mlog(0, "add %u clusters at position %u to inode %llu\n", 2303dcd0538fSMark Fasheh new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); 2304ccd979bdSMark Fasheh 2305dcd0538fSMark Fasheh mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && 2306dcd0538fSMark Fasheh (OCFS2_I(inode)->ip_clusters != cpos), 2307dcd0538fSMark Fasheh "Device %s, asking for sparse allocation: inode %llu, " 2308dcd0538fSMark Fasheh "cpos %u, clusters %u\n", 2309dcd0538fSMark Fasheh osb->dev_str, 2310dcd0538fSMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, 2311dcd0538fSMark Fasheh OCFS2_I(inode)->ip_clusters); 2312ccd979bdSMark Fasheh 2313dcd0538fSMark Fasheh rec.e_cpos = cpu_to_le32(cpos); 2314dcd0538fSMark Fasheh rec.e_blkno = cpu_to_le64(start_blk); 2315dcd0538fSMark Fasheh rec.e_clusters = cpu_to_le32(new_clusters); 2316ccd979bdSMark Fasheh 2317dcd0538fSMark Fasheh status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, 2318dcd0538fSMark Fasheh &insert); 2319ccd979bdSMark Fasheh if (status < 0) { 2320dcd0538fSMark Fasheh mlog_errno(status); 2321ccd979bdSMark Fasheh goto bail; 2322ccd979bdSMark Fasheh } 2323ccd979bdSMark Fasheh 2324dcd0538fSMark Fasheh mlog(0, "Insert.appending: %u, Insert.Contig: %u, " 2325dcd0538fSMark Fasheh "Insert.contig_index: %d, Insert.free_records: %d, " 2326dcd0538fSMark Fasheh "Insert.tree_depth: %d\n", 2327dcd0538fSMark Fasheh insert.ins_appending, insert.ins_contig, insert.ins_contig_index, 2328dcd0538fSMark Fasheh insert.ins_free_records, insert.ins_tree_depth); 2329dcd0538fSMark Fasheh 2330dcd0538fSMark Fasheh /* 2331dcd0538fSMark Fasheh * Avoid growing the tree unless we're out of records and the 2332dcd0538fSMark Fasheh * insert type requres one. 2333dcd0538fSMark Fasheh */ 2334dcd0538fSMark Fasheh if (insert.ins_contig != CONTIG_NONE || insert.ins_free_records) 2335ccd979bdSMark Fasheh goto out_add; 2336ccd979bdSMark Fasheh 2337ccd979bdSMark Fasheh shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh); 2338ccd979bdSMark Fasheh if (shift < 0) { 2339ccd979bdSMark Fasheh status = shift; 2340ccd979bdSMark Fasheh mlog_errno(status); 2341ccd979bdSMark Fasheh goto bail; 2342ccd979bdSMark Fasheh } 2343ccd979bdSMark Fasheh 2344ccd979bdSMark Fasheh /* We traveled all the way to the bottom of the allocation tree 2345ccd979bdSMark Fasheh * and didn't find room for any more extents - we need to add 2346ccd979bdSMark Fasheh * another tree level */ 2347ccd979bdSMark Fasheh if (shift) { 2348ccd979bdSMark Fasheh BUG_ON(bh); 2349dcd0538fSMark Fasheh mlog(0, "need to shift tree depth " 2350dcd0538fSMark Fasheh "(current = %d)\n", insert.ins_tree_depth); 2351ccd979bdSMark Fasheh 2352ccd979bdSMark Fasheh /* ocfs2_shift_tree_depth will return us a buffer with 2353ccd979bdSMark Fasheh * the new extent block (so we can pass that to 2354ccd979bdSMark Fasheh * ocfs2_add_branch). */ 2355ccd979bdSMark Fasheh status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh, 2356ccd979bdSMark Fasheh meta_ac, &bh); 2357ccd979bdSMark Fasheh if (status < 0) { 2358ccd979bdSMark Fasheh mlog_errno(status); 2359ccd979bdSMark Fasheh goto bail; 2360ccd979bdSMark Fasheh } 2361dcd0538fSMark Fasheh insert.ins_tree_depth++; 2362ccd979bdSMark Fasheh /* Special case: we have room now if we shifted from 2363ccd979bdSMark Fasheh * tree_depth 0 */ 2364dcd0538fSMark Fasheh if (insert.ins_tree_depth == 1) 2365ccd979bdSMark Fasheh goto out_add; 2366ccd979bdSMark Fasheh } 2367ccd979bdSMark Fasheh 2368ccd979bdSMark Fasheh /* call ocfs2_add_branch to add the final part of the tree with 2369ccd979bdSMark Fasheh * the new data. */ 2370dcd0538fSMark Fasheh mlog(0, "add branch. bh = %p\n", bh); 2371ccd979bdSMark Fasheh status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh, 2372ccd979bdSMark Fasheh meta_ac); 2373ccd979bdSMark Fasheh if (status < 0) { 2374ccd979bdSMark Fasheh mlog_errno(status); 2375ccd979bdSMark Fasheh goto bail; 2376ccd979bdSMark Fasheh } 2377ccd979bdSMark Fasheh 2378ccd979bdSMark Fasheh out_add: 2379dcd0538fSMark Fasheh /* Finally, we can add clusters. This might rotate the tree for us. */ 2380dcd0538fSMark Fasheh status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); 2381ccd979bdSMark Fasheh if (status < 0) 2382ccd979bdSMark Fasheh mlog_errno(status); 2383ccd979bdSMark Fasheh 2384ccd979bdSMark Fasheh bail: 2385ccd979bdSMark Fasheh if (bh) 2386ccd979bdSMark Fasheh brelse(bh); 2387ccd979bdSMark Fasheh 2388ccd979bdSMark Fasheh if (last_eb_bh) 2389ccd979bdSMark Fasheh brelse(last_eb_bh); 2390ccd979bdSMark Fasheh 2391ccd979bdSMark Fasheh mlog_exit(status); 2392ccd979bdSMark Fasheh return status; 2393ccd979bdSMark Fasheh } 2394ccd979bdSMark Fasheh 2395ccd979bdSMark Fasheh static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) 2396ccd979bdSMark Fasheh { 2397ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 2398ccd979bdSMark Fasheh struct ocfs2_dinode *di; 2399ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 2400ccd979bdSMark Fasheh 2401ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 2402ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 2403ccd979bdSMark Fasheh 2404ccd979bdSMark Fasheh mlog_bug_on_msg(le16_to_cpu(tl->tl_used) > le16_to_cpu(tl->tl_count), 2405ccd979bdSMark Fasheh "slot %d, invalid truncate log parameters: used = " 2406ccd979bdSMark Fasheh "%u, count = %u\n", osb->slot_num, 2407ccd979bdSMark Fasheh le16_to_cpu(tl->tl_used), le16_to_cpu(tl->tl_count)); 2408ccd979bdSMark Fasheh return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count); 2409ccd979bdSMark Fasheh } 2410ccd979bdSMark Fasheh 2411ccd979bdSMark Fasheh static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl, 2412ccd979bdSMark Fasheh unsigned int new_start) 2413ccd979bdSMark Fasheh { 2414ccd979bdSMark Fasheh unsigned int tail_index; 2415ccd979bdSMark Fasheh unsigned int current_tail; 2416ccd979bdSMark Fasheh 2417ccd979bdSMark Fasheh /* No records, nothing to coalesce */ 2418ccd979bdSMark Fasheh if (!le16_to_cpu(tl->tl_used)) 2419ccd979bdSMark Fasheh return 0; 2420ccd979bdSMark Fasheh 2421ccd979bdSMark Fasheh tail_index = le16_to_cpu(tl->tl_used) - 1; 2422ccd979bdSMark Fasheh current_tail = le32_to_cpu(tl->tl_recs[tail_index].t_start); 2423ccd979bdSMark Fasheh current_tail += le32_to_cpu(tl->tl_recs[tail_index].t_clusters); 2424ccd979bdSMark Fasheh 2425ccd979bdSMark Fasheh return current_tail == new_start; 2426ccd979bdSMark Fasheh } 2427ccd979bdSMark Fasheh 2428ccd979bdSMark Fasheh static int ocfs2_truncate_log_append(struct ocfs2_super *osb, 24291fabe148SMark Fasheh handle_t *handle, 2430ccd979bdSMark Fasheh u64 start_blk, 2431ccd979bdSMark Fasheh unsigned int num_clusters) 2432ccd979bdSMark Fasheh { 2433ccd979bdSMark Fasheh int status, index; 2434ccd979bdSMark Fasheh unsigned int start_cluster, tl_count; 2435ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 2436ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 2437ccd979bdSMark Fasheh struct ocfs2_dinode *di; 2438ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 2439ccd979bdSMark Fasheh 2440b0697053SMark Fasheh mlog_entry("start_blk = %llu, num_clusters = %u\n", 2441b0697053SMark Fasheh (unsigned long long)start_blk, num_clusters); 2442ccd979bdSMark Fasheh 24431b1dcc1bSJes Sorensen BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 2444ccd979bdSMark Fasheh 2445ccd979bdSMark Fasheh start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); 2446ccd979bdSMark Fasheh 2447ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 2448ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 2449ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(di)) { 2450ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); 2451ccd979bdSMark Fasheh status = -EIO; 2452ccd979bdSMark Fasheh goto bail; 2453ccd979bdSMark Fasheh } 2454ccd979bdSMark Fasheh 2455ccd979bdSMark Fasheh tl_count = le16_to_cpu(tl->tl_count); 2456ccd979bdSMark Fasheh mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) || 2457ccd979bdSMark Fasheh tl_count == 0, 2458b0697053SMark Fasheh "Truncate record count on #%llu invalid " 2459b0697053SMark Fasheh "wanted %u, actual %u\n", 2460b0697053SMark Fasheh (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, 2461ccd979bdSMark Fasheh ocfs2_truncate_recs_per_inode(osb->sb), 2462ccd979bdSMark Fasheh le16_to_cpu(tl->tl_count)); 2463ccd979bdSMark Fasheh 2464ccd979bdSMark Fasheh /* Caller should have known to flush before calling us. */ 2465ccd979bdSMark Fasheh index = le16_to_cpu(tl->tl_used); 2466ccd979bdSMark Fasheh if (index >= tl_count) { 2467ccd979bdSMark Fasheh status = -ENOSPC; 2468ccd979bdSMark Fasheh mlog_errno(status); 2469ccd979bdSMark Fasheh goto bail; 2470ccd979bdSMark Fasheh } 2471ccd979bdSMark Fasheh 2472ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, tl_inode, tl_bh, 2473ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 2474ccd979bdSMark Fasheh if (status < 0) { 2475ccd979bdSMark Fasheh mlog_errno(status); 2476ccd979bdSMark Fasheh goto bail; 2477ccd979bdSMark Fasheh } 2478ccd979bdSMark Fasheh 2479ccd979bdSMark Fasheh mlog(0, "Log truncate of %u clusters starting at cluster %u to " 2480b0697053SMark Fasheh "%llu (index = %d)\n", num_clusters, start_cluster, 2481b0697053SMark Fasheh (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index); 2482ccd979bdSMark Fasheh 2483ccd979bdSMark Fasheh if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) { 2484ccd979bdSMark Fasheh /* 2485ccd979bdSMark Fasheh * Move index back to the record we are coalescing with. 2486ccd979bdSMark Fasheh * ocfs2_truncate_log_can_coalesce() guarantees nonzero 2487ccd979bdSMark Fasheh */ 2488ccd979bdSMark Fasheh index--; 2489ccd979bdSMark Fasheh 2490ccd979bdSMark Fasheh num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters); 2491ccd979bdSMark Fasheh mlog(0, "Coalesce with index %u (start = %u, clusters = %u)\n", 2492ccd979bdSMark Fasheh index, le32_to_cpu(tl->tl_recs[index].t_start), 2493ccd979bdSMark Fasheh num_clusters); 2494ccd979bdSMark Fasheh } else { 2495ccd979bdSMark Fasheh tl->tl_recs[index].t_start = cpu_to_le32(start_cluster); 2496ccd979bdSMark Fasheh tl->tl_used = cpu_to_le16(index + 1); 2497ccd979bdSMark Fasheh } 2498ccd979bdSMark Fasheh tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters); 2499ccd979bdSMark Fasheh 2500ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, tl_bh); 2501ccd979bdSMark Fasheh if (status < 0) { 2502ccd979bdSMark Fasheh mlog_errno(status); 2503ccd979bdSMark Fasheh goto bail; 2504ccd979bdSMark Fasheh } 2505ccd979bdSMark Fasheh 2506ccd979bdSMark Fasheh bail: 2507ccd979bdSMark Fasheh mlog_exit(status); 2508ccd979bdSMark Fasheh return status; 2509ccd979bdSMark Fasheh } 2510ccd979bdSMark Fasheh 2511ccd979bdSMark Fasheh static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, 25121fabe148SMark Fasheh handle_t *handle, 2513ccd979bdSMark Fasheh struct inode *data_alloc_inode, 2514ccd979bdSMark Fasheh struct buffer_head *data_alloc_bh) 2515ccd979bdSMark Fasheh { 2516ccd979bdSMark Fasheh int status = 0; 2517ccd979bdSMark Fasheh int i; 2518ccd979bdSMark Fasheh unsigned int num_clusters; 2519ccd979bdSMark Fasheh u64 start_blk; 2520ccd979bdSMark Fasheh struct ocfs2_truncate_rec rec; 2521ccd979bdSMark Fasheh struct ocfs2_dinode *di; 2522ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 2523ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 2524ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 2525ccd979bdSMark Fasheh 2526ccd979bdSMark Fasheh mlog_entry_void(); 2527ccd979bdSMark Fasheh 2528ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 2529ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 2530ccd979bdSMark Fasheh i = le16_to_cpu(tl->tl_used) - 1; 2531ccd979bdSMark Fasheh while (i >= 0) { 2532ccd979bdSMark Fasheh /* Caller has given us at least enough credits to 2533ccd979bdSMark Fasheh * update the truncate log dinode */ 2534ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, tl_inode, tl_bh, 2535ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 2536ccd979bdSMark Fasheh if (status < 0) { 2537ccd979bdSMark Fasheh mlog_errno(status); 2538ccd979bdSMark Fasheh goto bail; 2539ccd979bdSMark Fasheh } 2540ccd979bdSMark Fasheh 2541ccd979bdSMark Fasheh tl->tl_used = cpu_to_le16(i); 2542ccd979bdSMark Fasheh 2543ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, tl_bh); 2544ccd979bdSMark Fasheh if (status < 0) { 2545ccd979bdSMark Fasheh mlog_errno(status); 2546ccd979bdSMark Fasheh goto bail; 2547ccd979bdSMark Fasheh } 2548ccd979bdSMark Fasheh 2549ccd979bdSMark Fasheh /* TODO: Perhaps we can calculate the bulk of the 2550ccd979bdSMark Fasheh * credits up front rather than extending like 2551ccd979bdSMark Fasheh * this. */ 2552ccd979bdSMark Fasheh status = ocfs2_extend_trans(handle, 2553ccd979bdSMark Fasheh OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); 2554ccd979bdSMark Fasheh if (status < 0) { 2555ccd979bdSMark Fasheh mlog_errno(status); 2556ccd979bdSMark Fasheh goto bail; 2557ccd979bdSMark Fasheh } 2558ccd979bdSMark Fasheh 2559ccd979bdSMark Fasheh rec = tl->tl_recs[i]; 2560ccd979bdSMark Fasheh start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb, 2561ccd979bdSMark Fasheh le32_to_cpu(rec.t_start)); 2562ccd979bdSMark Fasheh num_clusters = le32_to_cpu(rec.t_clusters); 2563ccd979bdSMark Fasheh 2564ccd979bdSMark Fasheh /* if start_blk is not set, we ignore the record as 2565ccd979bdSMark Fasheh * invalid. */ 2566ccd979bdSMark Fasheh if (start_blk) { 2567ccd979bdSMark Fasheh mlog(0, "free record %d, start = %u, clusters = %u\n", 2568ccd979bdSMark Fasheh i, le32_to_cpu(rec.t_start), num_clusters); 2569ccd979bdSMark Fasheh 2570ccd979bdSMark Fasheh status = ocfs2_free_clusters(handle, data_alloc_inode, 2571ccd979bdSMark Fasheh data_alloc_bh, start_blk, 2572ccd979bdSMark Fasheh num_clusters); 2573ccd979bdSMark Fasheh if (status < 0) { 2574ccd979bdSMark Fasheh mlog_errno(status); 2575ccd979bdSMark Fasheh goto bail; 2576ccd979bdSMark Fasheh } 2577ccd979bdSMark Fasheh } 2578ccd979bdSMark Fasheh i--; 2579ccd979bdSMark Fasheh } 2580ccd979bdSMark Fasheh 2581ccd979bdSMark Fasheh bail: 2582ccd979bdSMark Fasheh mlog_exit(status); 2583ccd979bdSMark Fasheh return status; 2584ccd979bdSMark Fasheh } 2585ccd979bdSMark Fasheh 25861b1dcc1bSJes Sorensen /* Expects you to already be holding tl_inode->i_mutex */ 2587ccd979bdSMark Fasheh static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) 2588ccd979bdSMark Fasheh { 2589ccd979bdSMark Fasheh int status; 2590ccd979bdSMark Fasheh unsigned int num_to_flush; 25911fabe148SMark Fasheh handle_t *handle; 2592ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 2593ccd979bdSMark Fasheh struct inode *data_alloc_inode = NULL; 2594ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 2595ccd979bdSMark Fasheh struct buffer_head *data_alloc_bh = NULL; 2596ccd979bdSMark Fasheh struct ocfs2_dinode *di; 2597ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 2598ccd979bdSMark Fasheh 2599ccd979bdSMark Fasheh mlog_entry_void(); 2600ccd979bdSMark Fasheh 26011b1dcc1bSJes Sorensen BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 2602ccd979bdSMark Fasheh 2603ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 2604ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 2605ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(di)) { 2606ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); 2607ccd979bdSMark Fasheh status = -EIO; 2608e08dc8b9SMark Fasheh goto out; 2609ccd979bdSMark Fasheh } 2610ccd979bdSMark Fasheh 2611ccd979bdSMark Fasheh num_to_flush = le16_to_cpu(tl->tl_used); 2612b0697053SMark Fasheh mlog(0, "Flush %u records from truncate log #%llu\n", 2613b0697053SMark Fasheh num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno); 2614ccd979bdSMark Fasheh if (!num_to_flush) { 2615ccd979bdSMark Fasheh status = 0; 2616e08dc8b9SMark Fasheh goto out; 2617ccd979bdSMark Fasheh } 2618ccd979bdSMark Fasheh 2619ccd979bdSMark Fasheh data_alloc_inode = ocfs2_get_system_file_inode(osb, 2620ccd979bdSMark Fasheh GLOBAL_BITMAP_SYSTEM_INODE, 2621ccd979bdSMark Fasheh OCFS2_INVALID_SLOT); 2622ccd979bdSMark Fasheh if (!data_alloc_inode) { 2623ccd979bdSMark Fasheh status = -EINVAL; 2624ccd979bdSMark Fasheh mlog(ML_ERROR, "Could not get bitmap inode!\n"); 2625e08dc8b9SMark Fasheh goto out; 2626ccd979bdSMark Fasheh } 2627ccd979bdSMark Fasheh 2628e08dc8b9SMark Fasheh mutex_lock(&data_alloc_inode->i_mutex); 2629e08dc8b9SMark Fasheh 26304bcec184SMark Fasheh status = ocfs2_meta_lock(data_alloc_inode, &data_alloc_bh, 1); 2631ccd979bdSMark Fasheh if (status < 0) { 2632ccd979bdSMark Fasheh mlog_errno(status); 2633e08dc8b9SMark Fasheh goto out_mutex; 2634ccd979bdSMark Fasheh } 2635ccd979bdSMark Fasheh 263665eff9ccSMark Fasheh handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE); 2637ccd979bdSMark Fasheh if (IS_ERR(handle)) { 2638ccd979bdSMark Fasheh status = PTR_ERR(handle); 2639ccd979bdSMark Fasheh mlog_errno(status); 2640e08dc8b9SMark Fasheh goto out_unlock; 2641ccd979bdSMark Fasheh } 2642ccd979bdSMark Fasheh 2643ccd979bdSMark Fasheh status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode, 2644ccd979bdSMark Fasheh data_alloc_bh); 2645e08dc8b9SMark Fasheh if (status < 0) 2646ccd979bdSMark Fasheh mlog_errno(status); 2647ccd979bdSMark Fasheh 264802dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle); 2649ccd979bdSMark Fasheh 2650e08dc8b9SMark Fasheh out_unlock: 2651e08dc8b9SMark Fasheh brelse(data_alloc_bh); 2652e08dc8b9SMark Fasheh ocfs2_meta_unlock(data_alloc_inode, 1); 2653e08dc8b9SMark Fasheh 2654e08dc8b9SMark Fasheh out_mutex: 2655e08dc8b9SMark Fasheh mutex_unlock(&data_alloc_inode->i_mutex); 2656ccd979bdSMark Fasheh iput(data_alloc_inode); 2657ccd979bdSMark Fasheh 2658e08dc8b9SMark Fasheh out: 2659ccd979bdSMark Fasheh mlog_exit(status); 2660ccd979bdSMark Fasheh return status; 2661ccd979bdSMark Fasheh } 2662ccd979bdSMark Fasheh 2663ccd979bdSMark Fasheh int ocfs2_flush_truncate_log(struct ocfs2_super *osb) 2664ccd979bdSMark Fasheh { 2665ccd979bdSMark Fasheh int status; 2666ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 2667ccd979bdSMark Fasheh 26681b1dcc1bSJes Sorensen mutex_lock(&tl_inode->i_mutex); 2669ccd979bdSMark Fasheh status = __ocfs2_flush_truncate_log(osb); 26701b1dcc1bSJes Sorensen mutex_unlock(&tl_inode->i_mutex); 2671ccd979bdSMark Fasheh 2672ccd979bdSMark Fasheh return status; 2673ccd979bdSMark Fasheh } 2674ccd979bdSMark Fasheh 2675c4028958SDavid Howells static void ocfs2_truncate_log_worker(struct work_struct *work) 2676ccd979bdSMark Fasheh { 2677ccd979bdSMark Fasheh int status; 2678c4028958SDavid Howells struct ocfs2_super *osb = 2679c4028958SDavid Howells container_of(work, struct ocfs2_super, 2680c4028958SDavid Howells osb_truncate_log_wq.work); 2681ccd979bdSMark Fasheh 2682ccd979bdSMark Fasheh mlog_entry_void(); 2683ccd979bdSMark Fasheh 2684ccd979bdSMark Fasheh status = ocfs2_flush_truncate_log(osb); 2685ccd979bdSMark Fasheh if (status < 0) 2686ccd979bdSMark Fasheh mlog_errno(status); 2687ccd979bdSMark Fasheh 2688ccd979bdSMark Fasheh mlog_exit(status); 2689ccd979bdSMark Fasheh } 2690ccd979bdSMark Fasheh 2691ccd979bdSMark Fasheh #define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ) 2692ccd979bdSMark Fasheh void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb, 2693ccd979bdSMark Fasheh int cancel) 2694ccd979bdSMark Fasheh { 2695ccd979bdSMark Fasheh if (osb->osb_tl_inode) { 2696ccd979bdSMark Fasheh /* We want to push off log flushes while truncates are 2697ccd979bdSMark Fasheh * still running. */ 2698ccd979bdSMark Fasheh if (cancel) 2699ccd979bdSMark Fasheh cancel_delayed_work(&osb->osb_truncate_log_wq); 2700ccd979bdSMark Fasheh 2701ccd979bdSMark Fasheh queue_delayed_work(ocfs2_wq, &osb->osb_truncate_log_wq, 2702ccd979bdSMark Fasheh OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL); 2703ccd979bdSMark Fasheh } 2704ccd979bdSMark Fasheh } 2705ccd979bdSMark Fasheh 2706ccd979bdSMark Fasheh static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, 2707ccd979bdSMark Fasheh int slot_num, 2708ccd979bdSMark Fasheh struct inode **tl_inode, 2709ccd979bdSMark Fasheh struct buffer_head **tl_bh) 2710ccd979bdSMark Fasheh { 2711ccd979bdSMark Fasheh int status; 2712ccd979bdSMark Fasheh struct inode *inode = NULL; 2713ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 2714ccd979bdSMark Fasheh 2715ccd979bdSMark Fasheh inode = ocfs2_get_system_file_inode(osb, 2716ccd979bdSMark Fasheh TRUNCATE_LOG_SYSTEM_INODE, 2717ccd979bdSMark Fasheh slot_num); 2718ccd979bdSMark Fasheh if (!inode) { 2719ccd979bdSMark Fasheh status = -EINVAL; 2720ccd979bdSMark Fasheh mlog(ML_ERROR, "Could not get load truncate log inode!\n"); 2721ccd979bdSMark Fasheh goto bail; 2722ccd979bdSMark Fasheh } 2723ccd979bdSMark Fasheh 2724ccd979bdSMark Fasheh status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, 2725ccd979bdSMark Fasheh OCFS2_BH_CACHED, inode); 2726ccd979bdSMark Fasheh if (status < 0) { 2727ccd979bdSMark Fasheh iput(inode); 2728ccd979bdSMark Fasheh mlog_errno(status); 2729ccd979bdSMark Fasheh goto bail; 2730ccd979bdSMark Fasheh } 2731ccd979bdSMark Fasheh 2732ccd979bdSMark Fasheh *tl_inode = inode; 2733ccd979bdSMark Fasheh *tl_bh = bh; 2734ccd979bdSMark Fasheh bail: 2735ccd979bdSMark Fasheh mlog_exit(status); 2736ccd979bdSMark Fasheh return status; 2737ccd979bdSMark Fasheh } 2738ccd979bdSMark Fasheh 2739ccd979bdSMark Fasheh /* called during the 1st stage of node recovery. we stamp a clean 2740ccd979bdSMark Fasheh * truncate log and pass back a copy for processing later. if the 2741ccd979bdSMark Fasheh * truncate log does not require processing, a *tl_copy is set to 2742ccd979bdSMark Fasheh * NULL. */ 2743ccd979bdSMark Fasheh int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, 2744ccd979bdSMark Fasheh int slot_num, 2745ccd979bdSMark Fasheh struct ocfs2_dinode **tl_copy) 2746ccd979bdSMark Fasheh { 2747ccd979bdSMark Fasheh int status; 2748ccd979bdSMark Fasheh struct inode *tl_inode = NULL; 2749ccd979bdSMark Fasheh struct buffer_head *tl_bh = NULL; 2750ccd979bdSMark Fasheh struct ocfs2_dinode *di; 2751ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 2752ccd979bdSMark Fasheh 2753ccd979bdSMark Fasheh *tl_copy = NULL; 2754ccd979bdSMark Fasheh 2755ccd979bdSMark Fasheh mlog(0, "recover truncate log from slot %d\n", slot_num); 2756ccd979bdSMark Fasheh 2757ccd979bdSMark Fasheh status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh); 2758ccd979bdSMark Fasheh if (status < 0) { 2759ccd979bdSMark Fasheh mlog_errno(status); 2760ccd979bdSMark Fasheh goto bail; 2761ccd979bdSMark Fasheh } 2762ccd979bdSMark Fasheh 2763ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 2764ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 2765ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(di)) { 2766ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di); 2767ccd979bdSMark Fasheh status = -EIO; 2768ccd979bdSMark Fasheh goto bail; 2769ccd979bdSMark Fasheh } 2770ccd979bdSMark Fasheh 2771ccd979bdSMark Fasheh if (le16_to_cpu(tl->tl_used)) { 2772ccd979bdSMark Fasheh mlog(0, "We'll have %u logs to recover\n", 2773ccd979bdSMark Fasheh le16_to_cpu(tl->tl_used)); 2774ccd979bdSMark Fasheh 2775ccd979bdSMark Fasheh *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL); 2776ccd979bdSMark Fasheh if (!(*tl_copy)) { 2777ccd979bdSMark Fasheh status = -ENOMEM; 2778ccd979bdSMark Fasheh mlog_errno(status); 2779ccd979bdSMark Fasheh goto bail; 2780ccd979bdSMark Fasheh } 2781ccd979bdSMark Fasheh 2782ccd979bdSMark Fasheh /* Assuming the write-out below goes well, this copy 2783ccd979bdSMark Fasheh * will be passed back to recovery for processing. */ 2784ccd979bdSMark Fasheh memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size); 2785ccd979bdSMark Fasheh 2786ccd979bdSMark Fasheh /* All we need to do to clear the truncate log is set 2787ccd979bdSMark Fasheh * tl_used. */ 2788ccd979bdSMark Fasheh tl->tl_used = 0; 2789ccd979bdSMark Fasheh 2790ccd979bdSMark Fasheh status = ocfs2_write_block(osb, tl_bh, tl_inode); 2791ccd979bdSMark Fasheh if (status < 0) { 2792ccd979bdSMark Fasheh mlog_errno(status); 2793ccd979bdSMark Fasheh goto bail; 2794ccd979bdSMark Fasheh } 2795ccd979bdSMark Fasheh } 2796ccd979bdSMark Fasheh 2797ccd979bdSMark Fasheh bail: 2798ccd979bdSMark Fasheh if (tl_inode) 2799ccd979bdSMark Fasheh iput(tl_inode); 2800ccd979bdSMark Fasheh if (tl_bh) 2801ccd979bdSMark Fasheh brelse(tl_bh); 2802ccd979bdSMark Fasheh 2803ccd979bdSMark Fasheh if (status < 0 && (*tl_copy)) { 2804ccd979bdSMark Fasheh kfree(*tl_copy); 2805ccd979bdSMark Fasheh *tl_copy = NULL; 2806ccd979bdSMark Fasheh } 2807ccd979bdSMark Fasheh 2808ccd979bdSMark Fasheh mlog_exit(status); 2809ccd979bdSMark Fasheh return status; 2810ccd979bdSMark Fasheh } 2811ccd979bdSMark Fasheh 2812ccd979bdSMark Fasheh int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, 2813ccd979bdSMark Fasheh struct ocfs2_dinode *tl_copy) 2814ccd979bdSMark Fasheh { 2815ccd979bdSMark Fasheh int status = 0; 2816ccd979bdSMark Fasheh int i; 2817ccd979bdSMark Fasheh unsigned int clusters, num_recs, start_cluster; 2818ccd979bdSMark Fasheh u64 start_blk; 28191fabe148SMark Fasheh handle_t *handle; 2820ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 2821ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 2822ccd979bdSMark Fasheh 2823ccd979bdSMark Fasheh mlog_entry_void(); 2824ccd979bdSMark Fasheh 2825ccd979bdSMark Fasheh if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) { 2826ccd979bdSMark Fasheh mlog(ML_ERROR, "Asked to recover my own truncate log!\n"); 2827ccd979bdSMark Fasheh return -EINVAL; 2828ccd979bdSMark Fasheh } 2829ccd979bdSMark Fasheh 2830ccd979bdSMark Fasheh tl = &tl_copy->id2.i_dealloc; 2831ccd979bdSMark Fasheh num_recs = le16_to_cpu(tl->tl_used); 2832b0697053SMark Fasheh mlog(0, "cleanup %u records from %llu\n", num_recs, 2833b0697053SMark Fasheh (unsigned long long)tl_copy->i_blkno); 2834ccd979bdSMark Fasheh 28351b1dcc1bSJes Sorensen mutex_lock(&tl_inode->i_mutex); 2836ccd979bdSMark Fasheh for(i = 0; i < num_recs; i++) { 2837ccd979bdSMark Fasheh if (ocfs2_truncate_log_needs_flush(osb)) { 2838ccd979bdSMark Fasheh status = __ocfs2_flush_truncate_log(osb); 2839ccd979bdSMark Fasheh if (status < 0) { 2840ccd979bdSMark Fasheh mlog_errno(status); 2841ccd979bdSMark Fasheh goto bail_up; 2842ccd979bdSMark Fasheh } 2843ccd979bdSMark Fasheh } 2844ccd979bdSMark Fasheh 284565eff9ccSMark Fasheh handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE); 2846ccd979bdSMark Fasheh if (IS_ERR(handle)) { 2847ccd979bdSMark Fasheh status = PTR_ERR(handle); 2848ccd979bdSMark Fasheh mlog_errno(status); 2849ccd979bdSMark Fasheh goto bail_up; 2850ccd979bdSMark Fasheh } 2851ccd979bdSMark Fasheh 2852ccd979bdSMark Fasheh clusters = le32_to_cpu(tl->tl_recs[i].t_clusters); 2853ccd979bdSMark Fasheh start_cluster = le32_to_cpu(tl->tl_recs[i].t_start); 2854ccd979bdSMark Fasheh start_blk = ocfs2_clusters_to_blocks(osb->sb, start_cluster); 2855ccd979bdSMark Fasheh 2856ccd979bdSMark Fasheh status = ocfs2_truncate_log_append(osb, handle, 2857ccd979bdSMark Fasheh start_blk, clusters); 285802dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle); 2859ccd979bdSMark Fasheh if (status < 0) { 2860ccd979bdSMark Fasheh mlog_errno(status); 2861ccd979bdSMark Fasheh goto bail_up; 2862ccd979bdSMark Fasheh } 2863ccd979bdSMark Fasheh } 2864ccd979bdSMark Fasheh 2865ccd979bdSMark Fasheh bail_up: 28661b1dcc1bSJes Sorensen mutex_unlock(&tl_inode->i_mutex); 2867ccd979bdSMark Fasheh 2868ccd979bdSMark Fasheh mlog_exit(status); 2869ccd979bdSMark Fasheh return status; 2870ccd979bdSMark Fasheh } 2871ccd979bdSMark Fasheh 2872ccd979bdSMark Fasheh void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb) 2873ccd979bdSMark Fasheh { 2874ccd979bdSMark Fasheh int status; 2875ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 2876ccd979bdSMark Fasheh 2877ccd979bdSMark Fasheh mlog_entry_void(); 2878ccd979bdSMark Fasheh 2879ccd979bdSMark Fasheh if (tl_inode) { 2880ccd979bdSMark Fasheh cancel_delayed_work(&osb->osb_truncate_log_wq); 2881ccd979bdSMark Fasheh flush_workqueue(ocfs2_wq); 2882ccd979bdSMark Fasheh 2883ccd979bdSMark Fasheh status = ocfs2_flush_truncate_log(osb); 2884ccd979bdSMark Fasheh if (status < 0) 2885ccd979bdSMark Fasheh mlog_errno(status); 2886ccd979bdSMark Fasheh 2887ccd979bdSMark Fasheh brelse(osb->osb_tl_bh); 2888ccd979bdSMark Fasheh iput(osb->osb_tl_inode); 2889ccd979bdSMark Fasheh } 2890ccd979bdSMark Fasheh 2891ccd979bdSMark Fasheh mlog_exit_void(); 2892ccd979bdSMark Fasheh } 2893ccd979bdSMark Fasheh 2894ccd979bdSMark Fasheh int ocfs2_truncate_log_init(struct ocfs2_super *osb) 2895ccd979bdSMark Fasheh { 2896ccd979bdSMark Fasheh int status; 2897ccd979bdSMark Fasheh struct inode *tl_inode = NULL; 2898ccd979bdSMark Fasheh struct buffer_head *tl_bh = NULL; 2899ccd979bdSMark Fasheh 2900ccd979bdSMark Fasheh mlog_entry_void(); 2901ccd979bdSMark Fasheh 2902ccd979bdSMark Fasheh status = ocfs2_get_truncate_log_info(osb, 2903ccd979bdSMark Fasheh osb->slot_num, 2904ccd979bdSMark Fasheh &tl_inode, 2905ccd979bdSMark Fasheh &tl_bh); 2906ccd979bdSMark Fasheh if (status < 0) 2907ccd979bdSMark Fasheh mlog_errno(status); 2908ccd979bdSMark Fasheh 2909ccd979bdSMark Fasheh /* ocfs2_truncate_log_shutdown keys on the existence of 2910ccd979bdSMark Fasheh * osb->osb_tl_inode so we don't set any of the osb variables 2911ccd979bdSMark Fasheh * until we're sure all is well. */ 2912c4028958SDavid Howells INIT_DELAYED_WORK(&osb->osb_truncate_log_wq, 2913c4028958SDavid Howells ocfs2_truncate_log_worker); 2914ccd979bdSMark Fasheh osb->osb_tl_bh = tl_bh; 2915ccd979bdSMark Fasheh osb->osb_tl_inode = tl_inode; 2916ccd979bdSMark Fasheh 2917ccd979bdSMark Fasheh mlog_exit(status); 2918ccd979bdSMark Fasheh return status; 2919ccd979bdSMark Fasheh } 2920ccd979bdSMark Fasheh 2921ccd979bdSMark Fasheh /* This function will figure out whether the currently last extent 2922ccd979bdSMark Fasheh * block will be deleted, and if it will, what the new last extent 2923ccd979bdSMark Fasheh * block will be so we can update his h_next_leaf_blk field, as well 2924ccd979bdSMark Fasheh * as the dinodes i_last_eb_blk */ 2925dcd0538fSMark Fasheh static int ocfs2_find_new_last_ext_blk(struct inode *inode, 29263a0782d0SMark Fasheh unsigned int clusters_to_del, 2927dcd0538fSMark Fasheh struct ocfs2_path *path, 2928ccd979bdSMark Fasheh struct buffer_head **new_last_eb) 2929ccd979bdSMark Fasheh { 29303a0782d0SMark Fasheh int next_free, ret = 0; 2931dcd0538fSMark Fasheh u32 cpos; 29323a0782d0SMark Fasheh struct ocfs2_extent_rec *rec; 2933ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 2934ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 2935ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 2936ccd979bdSMark Fasheh 2937ccd979bdSMark Fasheh *new_last_eb = NULL; 2938ccd979bdSMark Fasheh 2939ccd979bdSMark Fasheh /* we have no tree, so of course, no last_eb. */ 2940dcd0538fSMark Fasheh if (!path->p_tree_depth) 2941dcd0538fSMark Fasheh goto out; 2942ccd979bdSMark Fasheh 2943ccd979bdSMark Fasheh /* trunc to zero special case - this makes tree_depth = 0 2944ccd979bdSMark Fasheh * regardless of what it is. */ 29453a0782d0SMark Fasheh if (OCFS2_I(inode)->ip_clusters == clusters_to_del) 2946dcd0538fSMark Fasheh goto out; 2947ccd979bdSMark Fasheh 2948dcd0538fSMark Fasheh el = path_leaf_el(path); 2949ccd979bdSMark Fasheh BUG_ON(!el->l_next_free_rec); 2950ccd979bdSMark Fasheh 29513a0782d0SMark Fasheh /* 29523a0782d0SMark Fasheh * Make sure that this extent list will actually be empty 29533a0782d0SMark Fasheh * after we clear away the data. We can shortcut out if 29543a0782d0SMark Fasheh * there's more than one non-empty extent in the 29553a0782d0SMark Fasheh * list. Otherwise, a check of the remaining extent is 29563a0782d0SMark Fasheh * necessary. 29573a0782d0SMark Fasheh */ 29583a0782d0SMark Fasheh next_free = le16_to_cpu(el->l_next_free_rec); 29593a0782d0SMark Fasheh rec = NULL; 2960dcd0538fSMark Fasheh if (ocfs2_is_empty_extent(&el->l_recs[0])) { 29613a0782d0SMark Fasheh if (next_free > 2) 2962dcd0538fSMark Fasheh goto out; 29633a0782d0SMark Fasheh 29643a0782d0SMark Fasheh /* We may have a valid extent in index 1, check it. */ 29653a0782d0SMark Fasheh if (next_free == 2) 29663a0782d0SMark Fasheh rec = &el->l_recs[1]; 29673a0782d0SMark Fasheh 29683a0782d0SMark Fasheh /* 29693a0782d0SMark Fasheh * Fall through - no more nonempty extents, so we want 29703a0782d0SMark Fasheh * to delete this leaf. 29713a0782d0SMark Fasheh */ 29723a0782d0SMark Fasheh } else { 29733a0782d0SMark Fasheh if (next_free > 1) 2974dcd0538fSMark Fasheh goto out; 2975ccd979bdSMark Fasheh 29763a0782d0SMark Fasheh rec = &el->l_recs[0]; 29773a0782d0SMark Fasheh } 29783a0782d0SMark Fasheh 29793a0782d0SMark Fasheh if (rec) { 29803a0782d0SMark Fasheh /* 29813a0782d0SMark Fasheh * Check it we'll only be trimming off the end of this 29823a0782d0SMark Fasheh * cluster. 29833a0782d0SMark Fasheh */ 29843a0782d0SMark Fasheh if (le16_to_cpu(rec->e_clusters) > clusters_to_del) 29853a0782d0SMark Fasheh goto out; 29863a0782d0SMark Fasheh } 29873a0782d0SMark Fasheh 2988dcd0538fSMark Fasheh ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos); 2989dcd0538fSMark Fasheh if (ret) { 2990dcd0538fSMark Fasheh mlog_errno(ret); 2991dcd0538fSMark Fasheh goto out; 2992ccd979bdSMark Fasheh } 2993ccd979bdSMark Fasheh 2994dcd0538fSMark Fasheh ret = ocfs2_find_leaf(inode, path_root_el(path), cpos, &bh); 2995dcd0538fSMark Fasheh if (ret) { 2996dcd0538fSMark Fasheh mlog_errno(ret); 2997dcd0538fSMark Fasheh goto out; 2998ccd979bdSMark Fasheh } 2999dcd0538fSMark Fasheh 3000ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 3001ccd979bdSMark Fasheh el = &eb->h_list; 3002ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 3003ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 3004dcd0538fSMark Fasheh ret = -EROFS; 3005dcd0538fSMark Fasheh goto out; 3006ccd979bdSMark Fasheh } 3007ccd979bdSMark Fasheh 3008ccd979bdSMark Fasheh *new_last_eb = bh; 3009ccd979bdSMark Fasheh get_bh(*new_last_eb); 3010dcd0538fSMark Fasheh mlog(0, "returning block %llu, (cpos: %u)\n", 3011dcd0538fSMark Fasheh (unsigned long long)le64_to_cpu(eb->h_blkno), cpos); 3012dcd0538fSMark Fasheh out: 3013ccd979bdSMark Fasheh brelse(bh); 3014ccd979bdSMark Fasheh 3015dcd0538fSMark Fasheh return ret; 3016ccd979bdSMark Fasheh } 3017ccd979bdSMark Fasheh 30183a0782d0SMark Fasheh /* 30193a0782d0SMark Fasheh * Trim some clusters off the rightmost edge of a tree. Only called 30203a0782d0SMark Fasheh * during truncate. 30213a0782d0SMark Fasheh * 30223a0782d0SMark Fasheh * The caller needs to: 30233a0782d0SMark Fasheh * - start journaling of each path component. 30243a0782d0SMark Fasheh * - compute and fully set up any new last ext block 30253a0782d0SMark Fasheh */ 30263a0782d0SMark Fasheh static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path, 30273a0782d0SMark Fasheh handle_t *handle, struct ocfs2_truncate_context *tc, 30283a0782d0SMark Fasheh u32 clusters_to_del, u64 *delete_start) 30293a0782d0SMark Fasheh { 30303a0782d0SMark Fasheh int ret, i, index = path->p_tree_depth; 30313a0782d0SMark Fasheh u32 new_edge = 0; 30323a0782d0SMark Fasheh u64 deleted_eb = 0; 30333a0782d0SMark Fasheh struct buffer_head *bh; 30343a0782d0SMark Fasheh struct ocfs2_extent_list *el; 30353a0782d0SMark Fasheh struct ocfs2_extent_rec *rec; 30363a0782d0SMark Fasheh 30373a0782d0SMark Fasheh *delete_start = 0; 30383a0782d0SMark Fasheh 30393a0782d0SMark Fasheh while (index >= 0) { 30403a0782d0SMark Fasheh bh = path->p_node[index].bh; 30413a0782d0SMark Fasheh el = path->p_node[index].el; 30423a0782d0SMark Fasheh 30433a0782d0SMark Fasheh mlog(0, "traveling tree (index = %d, block = %llu)\n", 30443a0782d0SMark Fasheh index, (unsigned long long)bh->b_blocknr); 30453a0782d0SMark Fasheh 30463a0782d0SMark Fasheh BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); 30473a0782d0SMark Fasheh 30483a0782d0SMark Fasheh if (index != 30493a0782d0SMark Fasheh (path->p_tree_depth - le16_to_cpu(el->l_tree_depth))) { 30503a0782d0SMark Fasheh ocfs2_error(inode->i_sb, 30513a0782d0SMark Fasheh "Inode %lu has invalid ext. block %llu", 30523a0782d0SMark Fasheh inode->i_ino, 30533a0782d0SMark Fasheh (unsigned long long)bh->b_blocknr); 30543a0782d0SMark Fasheh ret = -EROFS; 30553a0782d0SMark Fasheh goto out; 30563a0782d0SMark Fasheh } 30573a0782d0SMark Fasheh 30583a0782d0SMark Fasheh find_tail_record: 30593a0782d0SMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 30603a0782d0SMark Fasheh rec = &el->l_recs[i]; 30613a0782d0SMark Fasheh 30623a0782d0SMark Fasheh mlog(0, "Extent list before: record %d: (%u, %u, %llu), " 30633a0782d0SMark Fasheh "next = %u\n", i, le32_to_cpu(rec->e_cpos), 30643a0782d0SMark Fasheh le32_to_cpu(rec->e_clusters), 30653a0782d0SMark Fasheh (unsigned long long)le64_to_cpu(rec->e_blkno), 30663a0782d0SMark Fasheh le16_to_cpu(el->l_next_free_rec)); 30673a0782d0SMark Fasheh 30683a0782d0SMark Fasheh BUG_ON(le32_to_cpu(rec->e_clusters) < clusters_to_del); 30693a0782d0SMark Fasheh 30703a0782d0SMark Fasheh if (le16_to_cpu(el->l_tree_depth) == 0) { 30713a0782d0SMark Fasheh /* 30723a0782d0SMark Fasheh * If the leaf block contains a single empty 30733a0782d0SMark Fasheh * extent and no records, we can just remove 30743a0782d0SMark Fasheh * the block. 30753a0782d0SMark Fasheh */ 30763a0782d0SMark Fasheh if (i == 0 && ocfs2_is_empty_extent(rec)) { 30773a0782d0SMark Fasheh memset(rec, 0, 30783a0782d0SMark Fasheh sizeof(struct ocfs2_extent_rec)); 30793a0782d0SMark Fasheh el->l_next_free_rec = cpu_to_le16(0); 30803a0782d0SMark Fasheh 30813a0782d0SMark Fasheh goto delete; 30823a0782d0SMark Fasheh } 30833a0782d0SMark Fasheh 30843a0782d0SMark Fasheh /* 30853a0782d0SMark Fasheh * Remove any empty extents by shifting things 30863a0782d0SMark Fasheh * left. That should make life much easier on 30873a0782d0SMark Fasheh * the code below. This condition is rare 30883a0782d0SMark Fasheh * enough that we shouldn't see a performance 30893a0782d0SMark Fasheh * hit. 30903a0782d0SMark Fasheh */ 30913a0782d0SMark Fasheh if (ocfs2_is_empty_extent(&el->l_recs[0])) { 30923a0782d0SMark Fasheh le16_add_cpu(&el->l_next_free_rec, -1); 30933a0782d0SMark Fasheh 30943a0782d0SMark Fasheh for(i = 0; 30953a0782d0SMark Fasheh i < le16_to_cpu(el->l_next_free_rec); i++) 30963a0782d0SMark Fasheh el->l_recs[i] = el->l_recs[i + 1]; 30973a0782d0SMark Fasheh 30983a0782d0SMark Fasheh memset(&el->l_recs[i], 0, 30993a0782d0SMark Fasheh sizeof(struct ocfs2_extent_rec)); 31003a0782d0SMark Fasheh 31013a0782d0SMark Fasheh /* 31023a0782d0SMark Fasheh * We've modified our extent list. The 31033a0782d0SMark Fasheh * simplest way to handle this change 31043a0782d0SMark Fasheh * is to being the search from the 31053a0782d0SMark Fasheh * start again. 31063a0782d0SMark Fasheh */ 31073a0782d0SMark Fasheh goto find_tail_record; 31083a0782d0SMark Fasheh } 31093a0782d0SMark Fasheh 31103a0782d0SMark Fasheh le32_add_cpu(&rec->e_clusters, -clusters_to_del); 31113a0782d0SMark Fasheh 31123a0782d0SMark Fasheh /* 31133a0782d0SMark Fasheh * We'll use "new_edge" on our way back up the 31143a0782d0SMark Fasheh * tree to know what our rightmost cpos is. 31153a0782d0SMark Fasheh */ 31163a0782d0SMark Fasheh new_edge = le32_to_cpu(rec->e_clusters); 31173a0782d0SMark Fasheh new_edge += le32_to_cpu(rec->e_cpos); 31183a0782d0SMark Fasheh 31193a0782d0SMark Fasheh /* 31203a0782d0SMark Fasheh * The caller will use this to delete data blocks. 31213a0782d0SMark Fasheh */ 31223a0782d0SMark Fasheh *delete_start = le64_to_cpu(rec->e_blkno) 31233a0782d0SMark Fasheh + ocfs2_clusters_to_blocks(inode->i_sb, 31243a0782d0SMark Fasheh le32_to_cpu(rec->e_clusters)); 31253a0782d0SMark Fasheh 31263a0782d0SMark Fasheh /* 31273a0782d0SMark Fasheh * If it's now empty, remove this record. 31283a0782d0SMark Fasheh */ 31293a0782d0SMark Fasheh if (le32_to_cpu(rec->e_clusters) == 0) { 31303a0782d0SMark Fasheh memset(rec, 0, 31313a0782d0SMark Fasheh sizeof(struct ocfs2_extent_rec)); 31323a0782d0SMark Fasheh le16_add_cpu(&el->l_next_free_rec, -1); 31333a0782d0SMark Fasheh } 31343a0782d0SMark Fasheh } else { 31353a0782d0SMark Fasheh if (le64_to_cpu(rec->e_blkno) == deleted_eb) { 31363a0782d0SMark Fasheh memset(rec, 0, 31373a0782d0SMark Fasheh sizeof(struct ocfs2_extent_rec)); 31383a0782d0SMark Fasheh le16_add_cpu(&el->l_next_free_rec, -1); 31393a0782d0SMark Fasheh 31403a0782d0SMark Fasheh goto delete; 31413a0782d0SMark Fasheh } 31423a0782d0SMark Fasheh 31433a0782d0SMark Fasheh /* Can this actually happen? */ 31443a0782d0SMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 0) 31453a0782d0SMark Fasheh goto delete; 31463a0782d0SMark Fasheh 31473a0782d0SMark Fasheh /* 31483a0782d0SMark Fasheh * We never actually deleted any clusters 31493a0782d0SMark Fasheh * because our leaf was empty. There's no 31503a0782d0SMark Fasheh * reason to adjust the rightmost edge then. 31513a0782d0SMark Fasheh */ 31523a0782d0SMark Fasheh if (new_edge == 0) 31533a0782d0SMark Fasheh goto delete; 31543a0782d0SMark Fasheh 31553a0782d0SMark Fasheh rec->e_clusters = cpu_to_le32(new_edge); 31563a0782d0SMark Fasheh le32_add_cpu(&rec->e_clusters, 31573a0782d0SMark Fasheh -le32_to_cpu(rec->e_cpos)); 31583a0782d0SMark Fasheh 31593a0782d0SMark Fasheh /* 31603a0782d0SMark Fasheh * A deleted child record should have been 31613a0782d0SMark Fasheh * caught above. 31623a0782d0SMark Fasheh */ 31633a0782d0SMark Fasheh BUG_ON(le32_to_cpu(rec->e_clusters) == 0); 31643a0782d0SMark Fasheh } 31653a0782d0SMark Fasheh 31663a0782d0SMark Fasheh delete: 31673a0782d0SMark Fasheh ret = ocfs2_journal_dirty(handle, bh); 31683a0782d0SMark Fasheh if (ret) { 31693a0782d0SMark Fasheh mlog_errno(ret); 31703a0782d0SMark Fasheh goto out; 31713a0782d0SMark Fasheh } 31723a0782d0SMark Fasheh 31733a0782d0SMark Fasheh mlog(0, "extent list container %llu, after: record %d: " 31743a0782d0SMark Fasheh "(%u, %u, %llu), next = %u.\n", 31753a0782d0SMark Fasheh (unsigned long long)bh->b_blocknr, i, 31763a0782d0SMark Fasheh le32_to_cpu(rec->e_cpos), le32_to_cpu(rec->e_clusters), 31773a0782d0SMark Fasheh (unsigned long long)le64_to_cpu(rec->e_blkno), 31783a0782d0SMark Fasheh le16_to_cpu(el->l_next_free_rec)); 31793a0782d0SMark Fasheh 31803a0782d0SMark Fasheh /* 31813a0782d0SMark Fasheh * We must be careful to only attempt delete of an 31823a0782d0SMark Fasheh * extent block (and not the root inode block). 31833a0782d0SMark Fasheh */ 31843a0782d0SMark Fasheh if (index > 0 && le16_to_cpu(el->l_next_free_rec) == 0) { 31853a0782d0SMark Fasheh struct ocfs2_extent_block *eb = 31863a0782d0SMark Fasheh (struct ocfs2_extent_block *)bh->b_data; 31873a0782d0SMark Fasheh 31883a0782d0SMark Fasheh /* 31893a0782d0SMark Fasheh * Save this for use when processing the 31903a0782d0SMark Fasheh * parent block. 31913a0782d0SMark Fasheh */ 31923a0782d0SMark Fasheh deleted_eb = le64_to_cpu(eb->h_blkno); 31933a0782d0SMark Fasheh 31943a0782d0SMark Fasheh mlog(0, "deleting this extent block.\n"); 31953a0782d0SMark Fasheh 31963a0782d0SMark Fasheh ocfs2_remove_from_cache(inode, bh); 31973a0782d0SMark Fasheh 31983a0782d0SMark Fasheh BUG_ON(le32_to_cpu(el->l_recs[0].e_clusters)); 31993a0782d0SMark Fasheh BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); 32003a0782d0SMark Fasheh BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); 32013a0782d0SMark Fasheh 32023a0782d0SMark Fasheh if (le16_to_cpu(eb->h_suballoc_slot) == 0) { 32033a0782d0SMark Fasheh /* 32043a0782d0SMark Fasheh * This code only understands how to 32053a0782d0SMark Fasheh * lock the suballocator in slot 0, 32063a0782d0SMark Fasheh * which is fine because allocation is 32073a0782d0SMark Fasheh * only ever done out of that 32083a0782d0SMark Fasheh * suballocator too. A future version 32093a0782d0SMark Fasheh * might change that however, so avoid 32103a0782d0SMark Fasheh * a free if we don't know how to 32113a0782d0SMark Fasheh * handle it. This way an fs incompat 32123a0782d0SMark Fasheh * bit will not be necessary. 32133a0782d0SMark Fasheh */ 32143a0782d0SMark Fasheh ret = ocfs2_free_extent_block(handle, 32153a0782d0SMark Fasheh tc->tc_ext_alloc_inode, 32163a0782d0SMark Fasheh tc->tc_ext_alloc_bh, 32173a0782d0SMark Fasheh eb); 32183a0782d0SMark Fasheh 32193a0782d0SMark Fasheh /* An error here is not fatal. */ 32203a0782d0SMark Fasheh if (ret < 0) 32213a0782d0SMark Fasheh mlog_errno(ret); 32223a0782d0SMark Fasheh } 32233a0782d0SMark Fasheh } else { 32243a0782d0SMark Fasheh deleted_eb = 0; 32253a0782d0SMark Fasheh } 32263a0782d0SMark Fasheh 32273a0782d0SMark Fasheh index--; 32283a0782d0SMark Fasheh } 32293a0782d0SMark Fasheh 32303a0782d0SMark Fasheh ret = 0; 32313a0782d0SMark Fasheh out: 32323a0782d0SMark Fasheh return ret; 32333a0782d0SMark Fasheh } 32343a0782d0SMark Fasheh 3235ccd979bdSMark Fasheh static int ocfs2_do_truncate(struct ocfs2_super *osb, 3236ccd979bdSMark Fasheh unsigned int clusters_to_del, 3237ccd979bdSMark Fasheh struct inode *inode, 3238ccd979bdSMark Fasheh struct buffer_head *fe_bh, 32391fabe148SMark Fasheh handle_t *handle, 3240dcd0538fSMark Fasheh struct ocfs2_truncate_context *tc, 3241dcd0538fSMark Fasheh struct ocfs2_path *path) 3242ccd979bdSMark Fasheh { 32433a0782d0SMark Fasheh int status; 3244ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 3245ccd979bdSMark Fasheh struct ocfs2_extent_block *last_eb = NULL; 3246ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 3247ccd979bdSMark Fasheh struct buffer_head *last_eb_bh = NULL; 3248ccd979bdSMark Fasheh u64 delete_blk = 0; 3249ccd979bdSMark Fasheh 3250ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 3251ccd979bdSMark Fasheh 32523a0782d0SMark Fasheh status = ocfs2_find_new_last_ext_blk(inode, clusters_to_del, 3253dcd0538fSMark Fasheh path, &last_eb_bh); 3254ccd979bdSMark Fasheh if (status < 0) { 3255ccd979bdSMark Fasheh mlog_errno(status); 3256ccd979bdSMark Fasheh goto bail; 3257ccd979bdSMark Fasheh } 3258ccd979bdSMark Fasheh 3259dcd0538fSMark Fasheh /* 3260dcd0538fSMark Fasheh * Each component will be touched, so we might as well journal 3261dcd0538fSMark Fasheh * here to avoid having to handle errors later. 3262dcd0538fSMark Fasheh */ 32633a0782d0SMark Fasheh status = ocfs2_journal_access_path(inode, handle, path); 3264ccd979bdSMark Fasheh if (status < 0) { 3265ccd979bdSMark Fasheh mlog_errno(status); 3266ccd979bdSMark Fasheh goto bail; 3267ccd979bdSMark Fasheh } 3268dcd0538fSMark Fasheh 3269dcd0538fSMark Fasheh if (last_eb_bh) { 3270dcd0538fSMark Fasheh status = ocfs2_journal_access(handle, inode, last_eb_bh, 3271dcd0538fSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 3272dcd0538fSMark Fasheh if (status < 0) { 3273dcd0538fSMark Fasheh mlog_errno(status); 3274dcd0538fSMark Fasheh goto bail; 3275dcd0538fSMark Fasheh } 3276dcd0538fSMark Fasheh 3277dcd0538fSMark Fasheh last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 3278dcd0538fSMark Fasheh } 3279dcd0538fSMark Fasheh 3280ccd979bdSMark Fasheh el = &(fe->id2.i_list); 3281ccd979bdSMark Fasheh 3282dcd0538fSMark Fasheh /* 3283dcd0538fSMark Fasheh * Lower levels depend on this never happening, but it's best 3284dcd0538fSMark Fasheh * to check it up here before changing the tree. 3285dcd0538fSMark Fasheh */ 3286dcd0538fSMark Fasheh if (el->l_tree_depth && ocfs2_is_empty_extent(&el->l_recs[0])) { 3287dcd0538fSMark Fasheh ocfs2_error(inode->i_sb, 3288dcd0538fSMark Fasheh "Inode %lu has an empty extent record, depth %u\n", 3289dcd0538fSMark Fasheh inode->i_ino, le16_to_cpu(el->l_tree_depth)); 32903a0782d0SMark Fasheh status = -EROFS; 3291dcd0538fSMark Fasheh goto bail; 3292dcd0538fSMark Fasheh } 3293dcd0538fSMark Fasheh 3294ccd979bdSMark Fasheh spin_lock(&OCFS2_I(inode)->ip_lock); 3295ccd979bdSMark Fasheh OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - 3296ccd979bdSMark Fasheh clusters_to_del; 3297ccd979bdSMark Fasheh spin_unlock(&OCFS2_I(inode)->ip_lock); 3298ccd979bdSMark Fasheh le32_add_cpu(&fe->i_clusters, -clusters_to_del); 3299ccd979bdSMark Fasheh 33003a0782d0SMark Fasheh status = ocfs2_trim_tree(inode, path, handle, tc, 33013a0782d0SMark Fasheh clusters_to_del, &delete_blk); 33023a0782d0SMark Fasheh if (status) { 33033a0782d0SMark Fasheh mlog_errno(status); 33043a0782d0SMark Fasheh goto bail; 3305ccd979bdSMark Fasheh } 3306ccd979bdSMark Fasheh 3307dcd0538fSMark Fasheh if (le32_to_cpu(fe->i_clusters) == 0) { 3308ccd979bdSMark Fasheh /* trunc to zero is a special case. */ 3309ccd979bdSMark Fasheh el->l_tree_depth = 0; 3310ccd979bdSMark Fasheh fe->i_last_eb_blk = 0; 3311ccd979bdSMark Fasheh } else if (last_eb) 3312ccd979bdSMark Fasheh fe->i_last_eb_blk = last_eb->h_blkno; 3313ccd979bdSMark Fasheh 3314ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, fe_bh); 3315ccd979bdSMark Fasheh if (status < 0) { 3316ccd979bdSMark Fasheh mlog_errno(status); 3317ccd979bdSMark Fasheh goto bail; 3318ccd979bdSMark Fasheh } 3319ccd979bdSMark Fasheh 3320ccd979bdSMark Fasheh if (last_eb) { 3321ccd979bdSMark Fasheh /* If there will be a new last extent block, then by 3322ccd979bdSMark Fasheh * definition, there cannot be any leaves to the right of 3323ccd979bdSMark Fasheh * him. */ 3324ccd979bdSMark Fasheh last_eb->h_next_leaf_blk = 0; 3325ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, last_eb_bh); 3326ccd979bdSMark Fasheh if (status < 0) { 3327ccd979bdSMark Fasheh mlog_errno(status); 3328ccd979bdSMark Fasheh goto bail; 3329ccd979bdSMark Fasheh } 3330ccd979bdSMark Fasheh } 3331ccd979bdSMark Fasheh 33323a0782d0SMark Fasheh if (delete_blk) { 3333ccd979bdSMark Fasheh status = ocfs2_truncate_log_append(osb, handle, delete_blk, 3334ccd979bdSMark Fasheh clusters_to_del); 3335ccd979bdSMark Fasheh if (status < 0) { 3336ccd979bdSMark Fasheh mlog_errno(status); 3337ccd979bdSMark Fasheh goto bail; 3338ccd979bdSMark Fasheh } 33393a0782d0SMark Fasheh } 3340ccd979bdSMark Fasheh status = 0; 3341ccd979bdSMark Fasheh bail: 3342dcd0538fSMark Fasheh 3343ccd979bdSMark Fasheh mlog_exit(status); 3344ccd979bdSMark Fasheh return status; 3345ccd979bdSMark Fasheh } 3346ccd979bdSMark Fasheh 334760b11392SMark Fasheh static int ocfs2_writeback_zero_func(handle_t *handle, struct buffer_head *bh) 334860b11392SMark Fasheh { 334960b11392SMark Fasheh set_buffer_uptodate(bh); 335060b11392SMark Fasheh mark_buffer_dirty(bh); 335160b11392SMark Fasheh return 0; 335260b11392SMark Fasheh } 335360b11392SMark Fasheh 335460b11392SMark Fasheh static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh) 335560b11392SMark Fasheh { 335660b11392SMark Fasheh set_buffer_uptodate(bh); 335760b11392SMark Fasheh mark_buffer_dirty(bh); 335860b11392SMark Fasheh return ocfs2_journal_dirty_data(handle, bh); 335960b11392SMark Fasheh } 336060b11392SMark Fasheh 336160b11392SMark Fasheh static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t isize, 336260b11392SMark Fasheh struct page **pages, int numpages, 336360b11392SMark Fasheh u64 phys, handle_t *handle) 336460b11392SMark Fasheh { 336560b11392SMark Fasheh int i, ret, partial = 0; 336660b11392SMark Fasheh void *kaddr; 336760b11392SMark Fasheh struct page *page; 336860b11392SMark Fasheh unsigned int from, to = PAGE_CACHE_SIZE; 336960b11392SMark Fasheh struct super_block *sb = inode->i_sb; 337060b11392SMark Fasheh 337160b11392SMark Fasheh BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb))); 337260b11392SMark Fasheh 337360b11392SMark Fasheh if (numpages == 0) 337460b11392SMark Fasheh goto out; 337560b11392SMark Fasheh 337660b11392SMark Fasheh from = isize & (PAGE_CACHE_SIZE - 1); /* 1st page offset */ 337760b11392SMark Fasheh if (PAGE_CACHE_SHIFT > OCFS2_SB(sb)->s_clustersize_bits) { 337860b11392SMark Fasheh /* 337960b11392SMark Fasheh * Since 'from' has been capped to a value below page 338060b11392SMark Fasheh * size, this calculation won't be able to overflow 338160b11392SMark Fasheh * 'to' 338260b11392SMark Fasheh */ 338360b11392SMark Fasheh to = ocfs2_align_bytes_to_clusters(sb, from); 338460b11392SMark Fasheh 338560b11392SMark Fasheh /* 338660b11392SMark Fasheh * The truncate tail in this case should never contain 338760b11392SMark Fasheh * more than one page at maximum. The loop below also 338860b11392SMark Fasheh * assumes this. 338960b11392SMark Fasheh */ 339060b11392SMark Fasheh BUG_ON(numpages != 1); 339160b11392SMark Fasheh } 339260b11392SMark Fasheh 339360b11392SMark Fasheh for(i = 0; i < numpages; i++) { 339460b11392SMark Fasheh page = pages[i]; 339560b11392SMark Fasheh 339660b11392SMark Fasheh BUG_ON(from > PAGE_CACHE_SIZE); 339760b11392SMark Fasheh BUG_ON(to > PAGE_CACHE_SIZE); 339860b11392SMark Fasheh 339960b11392SMark Fasheh ret = ocfs2_map_page_blocks(page, &phys, inode, from, to, 0); 340060b11392SMark Fasheh if (ret) 340160b11392SMark Fasheh mlog_errno(ret); 340260b11392SMark Fasheh 340360b11392SMark Fasheh kaddr = kmap_atomic(page, KM_USER0); 340460b11392SMark Fasheh memset(kaddr + from, 0, to - from); 340560b11392SMark Fasheh kunmap_atomic(kaddr, KM_USER0); 340660b11392SMark Fasheh 340760b11392SMark Fasheh /* 340860b11392SMark Fasheh * Need to set the buffers we zero'd into uptodate 340960b11392SMark Fasheh * here if they aren't - ocfs2_map_page_blocks() 341060b11392SMark Fasheh * might've skipped some 341160b11392SMark Fasheh */ 341260b11392SMark Fasheh if (ocfs2_should_order_data(inode)) { 341360b11392SMark Fasheh ret = walk_page_buffers(handle, 341460b11392SMark Fasheh page_buffers(page), 341560b11392SMark Fasheh from, to, &partial, 341660b11392SMark Fasheh ocfs2_ordered_zero_func); 341760b11392SMark Fasheh if (ret < 0) 341860b11392SMark Fasheh mlog_errno(ret); 341960b11392SMark Fasheh } else { 342060b11392SMark Fasheh ret = walk_page_buffers(handle, page_buffers(page), 342160b11392SMark Fasheh from, to, &partial, 342260b11392SMark Fasheh ocfs2_writeback_zero_func); 342360b11392SMark Fasheh if (ret < 0) 342460b11392SMark Fasheh mlog_errno(ret); 342560b11392SMark Fasheh } 342660b11392SMark Fasheh 342760b11392SMark Fasheh if (!partial) 342860b11392SMark Fasheh SetPageUptodate(page); 342960b11392SMark Fasheh 343060b11392SMark Fasheh flush_dcache_page(page); 343160b11392SMark Fasheh 343260b11392SMark Fasheh /* 343360b11392SMark Fasheh * Every page after the 1st one should be completely zero'd. 343460b11392SMark Fasheh */ 343560b11392SMark Fasheh from = 0; 343660b11392SMark Fasheh } 343760b11392SMark Fasheh out: 343860b11392SMark Fasheh if (pages) { 343960b11392SMark Fasheh for (i = 0; i < numpages; i++) { 344060b11392SMark Fasheh page = pages[i]; 344160b11392SMark Fasheh unlock_page(page); 344260b11392SMark Fasheh mark_page_accessed(page); 344360b11392SMark Fasheh page_cache_release(page); 344460b11392SMark Fasheh } 344560b11392SMark Fasheh } 344660b11392SMark Fasheh } 344760b11392SMark Fasheh 344860b11392SMark Fasheh static int ocfs2_grab_eof_pages(struct inode *inode, loff_t isize, struct page **pages, 344960b11392SMark Fasheh int *num, u64 *phys) 345060b11392SMark Fasheh { 345160b11392SMark Fasheh int i, numpages = 0, ret = 0; 345260b11392SMark Fasheh unsigned int csize = OCFS2_SB(inode->i_sb)->s_clustersize; 345360b11392SMark Fasheh struct super_block *sb = inode->i_sb; 345460b11392SMark Fasheh struct address_space *mapping = inode->i_mapping; 345560b11392SMark Fasheh unsigned long index; 345660b11392SMark Fasheh u64 next_cluster_bytes; 345760b11392SMark Fasheh 345860b11392SMark Fasheh BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb))); 345960b11392SMark Fasheh 346060b11392SMark Fasheh /* Cluster boundary, so we don't need to grab any pages. */ 346160b11392SMark Fasheh if ((isize & (csize - 1)) == 0) 346260b11392SMark Fasheh goto out; 346360b11392SMark Fasheh 346460b11392SMark Fasheh ret = ocfs2_extent_map_get_blocks(inode, isize >> sb->s_blocksize_bits, 346560b11392SMark Fasheh phys, NULL); 346660b11392SMark Fasheh if (ret) { 346760b11392SMark Fasheh mlog_errno(ret); 346860b11392SMark Fasheh goto out; 346960b11392SMark Fasheh } 347060b11392SMark Fasheh 347160b11392SMark Fasheh /* Tail is a hole. */ 347260b11392SMark Fasheh if (*phys == 0) 347360b11392SMark Fasheh goto out; 347460b11392SMark Fasheh 347560b11392SMark Fasheh next_cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, isize); 347660b11392SMark Fasheh index = isize >> PAGE_CACHE_SHIFT; 347760b11392SMark Fasheh do { 347860b11392SMark Fasheh pages[numpages] = grab_cache_page(mapping, index); 347960b11392SMark Fasheh if (!pages[numpages]) { 348060b11392SMark Fasheh ret = -ENOMEM; 348160b11392SMark Fasheh mlog_errno(ret); 348260b11392SMark Fasheh goto out; 348360b11392SMark Fasheh } 348460b11392SMark Fasheh 348560b11392SMark Fasheh numpages++; 348660b11392SMark Fasheh index++; 348760b11392SMark Fasheh } while (index < (next_cluster_bytes >> PAGE_CACHE_SHIFT)); 348860b11392SMark Fasheh 348960b11392SMark Fasheh out: 349060b11392SMark Fasheh if (ret != 0) { 349160b11392SMark Fasheh if (pages) { 349260b11392SMark Fasheh for (i = 0; i < numpages; i++) { 349360b11392SMark Fasheh if (pages[i]) { 349460b11392SMark Fasheh unlock_page(pages[i]); 349560b11392SMark Fasheh page_cache_release(pages[i]); 349660b11392SMark Fasheh } 349760b11392SMark Fasheh } 349860b11392SMark Fasheh } 349960b11392SMark Fasheh numpages = 0; 350060b11392SMark Fasheh } 350160b11392SMark Fasheh 350260b11392SMark Fasheh *num = numpages; 350360b11392SMark Fasheh 350460b11392SMark Fasheh return ret; 350560b11392SMark Fasheh } 350660b11392SMark Fasheh 350760b11392SMark Fasheh /* 350860b11392SMark Fasheh * Zero the area past i_size but still within an allocated 350960b11392SMark Fasheh * cluster. This avoids exposing nonzero data on subsequent file 351060b11392SMark Fasheh * extends. 351160b11392SMark Fasheh * 351260b11392SMark Fasheh * We need to call this before i_size is updated on the inode because 351360b11392SMark Fasheh * otherwise block_write_full_page() will skip writeout of pages past 351460b11392SMark Fasheh * i_size. The new_i_size parameter is passed for this reason. 351560b11392SMark Fasheh */ 351660b11392SMark Fasheh int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle, 351760b11392SMark Fasheh u64 new_i_size) 351860b11392SMark Fasheh { 351960b11392SMark Fasheh int ret, numpages; 3520*fa41045fSMark Fasheh loff_t endbyte; 352160b11392SMark Fasheh struct page **pages = NULL; 352260b11392SMark Fasheh u64 phys; 352360b11392SMark Fasheh 352460b11392SMark Fasheh /* 352560b11392SMark Fasheh * File systems which don't support sparse files zero on every 352660b11392SMark Fasheh * extend. 352760b11392SMark Fasheh */ 352860b11392SMark Fasheh if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 352960b11392SMark Fasheh return 0; 353060b11392SMark Fasheh 353160b11392SMark Fasheh pages = kcalloc(ocfs2_pages_per_cluster(inode->i_sb), 353260b11392SMark Fasheh sizeof(struct page *), GFP_NOFS); 353360b11392SMark Fasheh if (pages == NULL) { 353460b11392SMark Fasheh ret = -ENOMEM; 353560b11392SMark Fasheh mlog_errno(ret); 353660b11392SMark Fasheh goto out; 353760b11392SMark Fasheh } 353860b11392SMark Fasheh 353960b11392SMark Fasheh ret = ocfs2_grab_eof_pages(inode, new_i_size, pages, &numpages, &phys); 354060b11392SMark Fasheh if (ret) { 354160b11392SMark Fasheh mlog_errno(ret); 354260b11392SMark Fasheh goto out; 354360b11392SMark Fasheh } 354460b11392SMark Fasheh 354560b11392SMark Fasheh /* 354660b11392SMark Fasheh * Truncate on an i_size boundary - nothing more to do. 354760b11392SMark Fasheh */ 354860b11392SMark Fasheh if (numpages == 0) 354960b11392SMark Fasheh goto out; 355060b11392SMark Fasheh 355160b11392SMark Fasheh ocfs2_zero_cluster_pages(inode, new_i_size, pages, numpages, phys, 355260b11392SMark Fasheh handle); 355360b11392SMark Fasheh 355460b11392SMark Fasheh /* 355560b11392SMark Fasheh * Initiate writeout of the pages we zero'd here. We don't 355660b11392SMark Fasheh * wait on them - the truncate_inode_pages() call later will 355760b11392SMark Fasheh * do that for us. 355860b11392SMark Fasheh */ 3559*fa41045fSMark Fasheh endbyte = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size); 3560*fa41045fSMark Fasheh ret = do_sync_mapping_range(inode->i_mapping, new_i_size, 3561*fa41045fSMark Fasheh endbyte - 1, SYNC_FILE_RANGE_WRITE); 356260b11392SMark Fasheh if (ret) 356360b11392SMark Fasheh mlog_errno(ret); 356460b11392SMark Fasheh 356560b11392SMark Fasheh out: 356660b11392SMark Fasheh if (pages) 356760b11392SMark Fasheh kfree(pages); 356860b11392SMark Fasheh 356960b11392SMark Fasheh return ret; 357060b11392SMark Fasheh } 357160b11392SMark Fasheh 3572ccd979bdSMark Fasheh /* 3573ccd979bdSMark Fasheh * It is expected, that by the time you call this function, 3574ccd979bdSMark Fasheh * inode->i_size and fe->i_size have been adjusted. 3575ccd979bdSMark Fasheh * 3576ccd979bdSMark Fasheh * WARNING: This will kfree the truncate context 3577ccd979bdSMark Fasheh */ 3578ccd979bdSMark Fasheh int ocfs2_commit_truncate(struct ocfs2_super *osb, 3579ccd979bdSMark Fasheh struct inode *inode, 3580ccd979bdSMark Fasheh struct buffer_head *fe_bh, 3581ccd979bdSMark Fasheh struct ocfs2_truncate_context *tc) 3582ccd979bdSMark Fasheh { 3583ccd979bdSMark Fasheh int status, i, credits, tl_sem = 0; 3584dcd0538fSMark Fasheh u32 clusters_to_del, new_highest_cpos, range; 3585ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 35861fabe148SMark Fasheh handle_t *handle = NULL; 3587ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 3588dcd0538fSMark Fasheh struct ocfs2_path *path = NULL; 3589ccd979bdSMark Fasheh 3590ccd979bdSMark Fasheh mlog_entry_void(); 3591ccd979bdSMark Fasheh 3592ccd979bdSMark Fasheh down_write(&OCFS2_I(inode)->ip_alloc_sem); 3593ccd979bdSMark Fasheh 3594dcd0538fSMark Fasheh new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, 3595ccd979bdSMark Fasheh i_size_read(inode)); 3596ccd979bdSMark Fasheh 3597dcd0538fSMark Fasheh path = ocfs2_new_inode_path(fe_bh); 3598dcd0538fSMark Fasheh if (!path) { 3599dcd0538fSMark Fasheh status = -ENOMEM; 3600ccd979bdSMark Fasheh mlog_errno(status); 3601ccd979bdSMark Fasheh goto bail; 3602ccd979bdSMark Fasheh } 3603dcd0538fSMark Fasheh start: 3604dcd0538fSMark Fasheh /* 36053a0782d0SMark Fasheh * Check that we still have allocation to delete. 36063a0782d0SMark Fasheh */ 36073a0782d0SMark Fasheh if (OCFS2_I(inode)->ip_clusters == 0) { 36083a0782d0SMark Fasheh status = 0; 36093a0782d0SMark Fasheh goto bail; 36103a0782d0SMark Fasheh } 36113a0782d0SMark Fasheh 36123a0782d0SMark Fasheh /* 3613dcd0538fSMark Fasheh * Truncate always works against the rightmost tree branch. 3614dcd0538fSMark Fasheh */ 3615dcd0538fSMark Fasheh status = ocfs2_find_path(inode, path, UINT_MAX); 3616dcd0538fSMark Fasheh if (status) { 3617dcd0538fSMark Fasheh mlog_errno(status); 3618ccd979bdSMark Fasheh goto bail; 3619ccd979bdSMark Fasheh } 3620ccd979bdSMark Fasheh 3621dcd0538fSMark Fasheh mlog(0, "inode->ip_clusters = %u, tree_depth = %u\n", 3622dcd0538fSMark Fasheh OCFS2_I(inode)->ip_clusters, path->p_tree_depth); 3623dcd0538fSMark Fasheh 3624dcd0538fSMark Fasheh /* 3625dcd0538fSMark Fasheh * By now, el will point to the extent list on the bottom most 3626dcd0538fSMark Fasheh * portion of this tree. Only the tail record is considered in 3627dcd0538fSMark Fasheh * each pass. 3628dcd0538fSMark Fasheh * 3629dcd0538fSMark Fasheh * We handle the following cases, in order: 3630dcd0538fSMark Fasheh * - empty extent: delete the remaining branch 3631dcd0538fSMark Fasheh * - remove the entire record 3632dcd0538fSMark Fasheh * - remove a partial record 3633dcd0538fSMark Fasheh * - no record needs to be removed (truncate has completed) 3634dcd0538fSMark Fasheh */ 3635dcd0538fSMark Fasheh el = path_leaf_el(path); 36363a0782d0SMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 0) { 36373a0782d0SMark Fasheh ocfs2_error(inode->i_sb, 36383a0782d0SMark Fasheh "Inode %llu has empty extent block at %llu\n", 36393a0782d0SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 36403a0782d0SMark Fasheh (unsigned long long)path_leaf_bh(path)->b_blocknr); 36413a0782d0SMark Fasheh status = -EROFS; 36423a0782d0SMark Fasheh goto bail; 36433a0782d0SMark Fasheh } 36443a0782d0SMark Fasheh 3645ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 3646dcd0538fSMark Fasheh range = le32_to_cpu(el->l_recs[i].e_cpos) + 3647dcd0538fSMark Fasheh le32_to_cpu(el->l_recs[i].e_clusters); 3648dcd0538fSMark Fasheh if (i == 0 && ocfs2_is_empty_extent(&el->l_recs[i])) { 3649dcd0538fSMark Fasheh clusters_to_del = 0; 3650dcd0538fSMark Fasheh } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { 3651ccd979bdSMark Fasheh clusters_to_del = le32_to_cpu(el->l_recs[i].e_clusters); 3652dcd0538fSMark Fasheh } else if (range > new_highest_cpos) { 3653ccd979bdSMark Fasheh clusters_to_del = (le32_to_cpu(el->l_recs[i].e_clusters) + 3654ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_cpos)) - 3655dcd0538fSMark Fasheh new_highest_cpos; 3656dcd0538fSMark Fasheh } else { 3657dcd0538fSMark Fasheh status = 0; 3658dcd0538fSMark Fasheh goto bail; 3659dcd0538fSMark Fasheh } 3660ccd979bdSMark Fasheh 3661dcd0538fSMark Fasheh mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", 3662dcd0538fSMark Fasheh clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr); 3663dcd0538fSMark Fasheh 3664dcd0538fSMark Fasheh BUG_ON(clusters_to_del == 0); 3665ccd979bdSMark Fasheh 36661b1dcc1bSJes Sorensen mutex_lock(&tl_inode->i_mutex); 3667ccd979bdSMark Fasheh tl_sem = 1; 3668ccd979bdSMark Fasheh /* ocfs2_truncate_log_needs_flush guarantees us at least one 3669ccd979bdSMark Fasheh * record is free for use. If there isn't any, we flush to get 3670ccd979bdSMark Fasheh * an empty truncate log. */ 3671ccd979bdSMark Fasheh if (ocfs2_truncate_log_needs_flush(osb)) { 3672ccd979bdSMark Fasheh status = __ocfs2_flush_truncate_log(osb); 3673ccd979bdSMark Fasheh if (status < 0) { 3674ccd979bdSMark Fasheh mlog_errno(status); 3675ccd979bdSMark Fasheh goto bail; 3676ccd979bdSMark Fasheh } 3677ccd979bdSMark Fasheh } 3678ccd979bdSMark Fasheh 3679ccd979bdSMark Fasheh credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, 3680dcd0538fSMark Fasheh (struct ocfs2_dinode *)fe_bh->b_data, 3681dcd0538fSMark Fasheh el); 368265eff9ccSMark Fasheh handle = ocfs2_start_trans(osb, credits); 3683ccd979bdSMark Fasheh if (IS_ERR(handle)) { 3684ccd979bdSMark Fasheh status = PTR_ERR(handle); 3685ccd979bdSMark Fasheh handle = NULL; 3686ccd979bdSMark Fasheh mlog_errno(status); 3687ccd979bdSMark Fasheh goto bail; 3688ccd979bdSMark Fasheh } 3689ccd979bdSMark Fasheh 3690dcd0538fSMark Fasheh status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle, 3691dcd0538fSMark Fasheh tc, path); 3692ccd979bdSMark Fasheh if (status < 0) { 3693ccd979bdSMark Fasheh mlog_errno(status); 3694ccd979bdSMark Fasheh goto bail; 3695ccd979bdSMark Fasheh } 3696ccd979bdSMark Fasheh 36971b1dcc1bSJes Sorensen mutex_unlock(&tl_inode->i_mutex); 3698ccd979bdSMark Fasheh tl_sem = 0; 3699ccd979bdSMark Fasheh 370002dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle); 3701ccd979bdSMark Fasheh handle = NULL; 3702ccd979bdSMark Fasheh 3703dcd0538fSMark Fasheh ocfs2_reinit_path(path, 1); 3704dcd0538fSMark Fasheh 3705dcd0538fSMark Fasheh /* 37063a0782d0SMark Fasheh * The check above will catch the case where we've truncated 37073a0782d0SMark Fasheh * away all allocation. 3708dcd0538fSMark Fasheh */ 3709ccd979bdSMark Fasheh goto start; 37103a0782d0SMark Fasheh 3711ccd979bdSMark Fasheh bail: 3712ccd979bdSMark Fasheh up_write(&OCFS2_I(inode)->ip_alloc_sem); 3713ccd979bdSMark Fasheh 3714ccd979bdSMark Fasheh ocfs2_schedule_truncate_log_flush(osb, 1); 3715ccd979bdSMark Fasheh 3716ccd979bdSMark Fasheh if (tl_sem) 37171b1dcc1bSJes Sorensen mutex_unlock(&tl_inode->i_mutex); 3718ccd979bdSMark Fasheh 3719ccd979bdSMark Fasheh if (handle) 372002dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle); 3721ccd979bdSMark Fasheh 3722dcd0538fSMark Fasheh ocfs2_free_path(path); 3723ccd979bdSMark Fasheh 3724ccd979bdSMark Fasheh /* This will drop the ext_alloc cluster lock for us */ 3725ccd979bdSMark Fasheh ocfs2_free_truncate_context(tc); 3726ccd979bdSMark Fasheh 3727ccd979bdSMark Fasheh mlog_exit(status); 3728ccd979bdSMark Fasheh return status; 3729ccd979bdSMark Fasheh } 3730ccd979bdSMark Fasheh 3731ccd979bdSMark Fasheh /* 3732ccd979bdSMark Fasheh * Expects the inode to already be locked. This will figure out which 3733ccd979bdSMark Fasheh * inodes need to be locked and will put them on the returned truncate 3734ccd979bdSMark Fasheh * context. 3735ccd979bdSMark Fasheh */ 3736ccd979bdSMark Fasheh int ocfs2_prepare_truncate(struct ocfs2_super *osb, 3737ccd979bdSMark Fasheh struct inode *inode, 3738ccd979bdSMark Fasheh struct buffer_head *fe_bh, 3739ccd979bdSMark Fasheh struct ocfs2_truncate_context **tc) 3740ccd979bdSMark Fasheh { 3741dcd0538fSMark Fasheh int status, metadata_delete, i; 3742ccd979bdSMark Fasheh unsigned int new_i_clusters; 3743ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 3744ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 3745ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 3746ccd979bdSMark Fasheh struct buffer_head *last_eb_bh = NULL; 3747ccd979bdSMark Fasheh struct inode *ext_alloc_inode = NULL; 3748ccd979bdSMark Fasheh struct buffer_head *ext_alloc_bh = NULL; 3749ccd979bdSMark Fasheh 3750ccd979bdSMark Fasheh mlog_entry_void(); 3751ccd979bdSMark Fasheh 3752ccd979bdSMark Fasheh *tc = NULL; 3753ccd979bdSMark Fasheh 3754ccd979bdSMark Fasheh new_i_clusters = ocfs2_clusters_for_bytes(osb->sb, 3755ccd979bdSMark Fasheh i_size_read(inode)); 3756ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 3757ccd979bdSMark Fasheh 3758ccd979bdSMark Fasheh mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size =" 3759b0697053SMark Fasheh "%llu\n", fe->i_clusters, new_i_clusters, 3760b0697053SMark Fasheh (unsigned long long)fe->i_size); 3761ccd979bdSMark Fasheh 3762cd861280SRobert P. J. Day *tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL); 3763ccd979bdSMark Fasheh if (!(*tc)) { 3764ccd979bdSMark Fasheh status = -ENOMEM; 3765ccd979bdSMark Fasheh mlog_errno(status); 3766ccd979bdSMark Fasheh goto bail; 3767ccd979bdSMark Fasheh } 3768ccd979bdSMark Fasheh 3769ccd979bdSMark Fasheh metadata_delete = 0; 3770ccd979bdSMark Fasheh if (fe->id2.i_list.l_tree_depth) { 3771ccd979bdSMark Fasheh /* If we have a tree, then the truncate may result in 3772ccd979bdSMark Fasheh * metadata deletes. Figure this out from the 3773ccd979bdSMark Fasheh * rightmost leaf block.*/ 3774ccd979bdSMark Fasheh status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 3775ccd979bdSMark Fasheh &last_eb_bh, OCFS2_BH_CACHED, inode); 3776ccd979bdSMark Fasheh if (status < 0) { 3777ccd979bdSMark Fasheh mlog_errno(status); 3778ccd979bdSMark Fasheh goto bail; 3779ccd979bdSMark Fasheh } 3780ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 3781ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 3782ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 3783ccd979bdSMark Fasheh 3784ccd979bdSMark Fasheh brelse(last_eb_bh); 3785ccd979bdSMark Fasheh status = -EIO; 3786ccd979bdSMark Fasheh goto bail; 3787ccd979bdSMark Fasheh } 3788ccd979bdSMark Fasheh el = &(eb->h_list); 3789dcd0538fSMark Fasheh 3790dcd0538fSMark Fasheh i = 0; 3791dcd0538fSMark Fasheh if (ocfs2_is_empty_extent(&el->l_recs[0])) 3792dcd0538fSMark Fasheh i = 1; 3793dcd0538fSMark Fasheh /* 3794dcd0538fSMark Fasheh * XXX: Should we check that next_free_rec contains 3795dcd0538fSMark Fasheh * the extent? 3796dcd0538fSMark Fasheh */ 3797dcd0538fSMark Fasheh if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_i_clusters) 3798ccd979bdSMark Fasheh metadata_delete = 1; 3799ccd979bdSMark Fasheh } 3800ccd979bdSMark Fasheh 3801ccd979bdSMark Fasheh (*tc)->tc_last_eb_bh = last_eb_bh; 3802ccd979bdSMark Fasheh 3803ccd979bdSMark Fasheh if (metadata_delete) { 3804ccd979bdSMark Fasheh mlog(0, "Will have to delete metadata for this trunc. " 3805ccd979bdSMark Fasheh "locking allocator.\n"); 3806ccd979bdSMark Fasheh ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0); 3807ccd979bdSMark Fasheh if (!ext_alloc_inode) { 3808ccd979bdSMark Fasheh status = -ENOMEM; 3809ccd979bdSMark Fasheh mlog_errno(status); 3810ccd979bdSMark Fasheh goto bail; 3811ccd979bdSMark Fasheh } 3812ccd979bdSMark Fasheh 38131b1dcc1bSJes Sorensen mutex_lock(&ext_alloc_inode->i_mutex); 3814ccd979bdSMark Fasheh (*tc)->tc_ext_alloc_inode = ext_alloc_inode; 3815ccd979bdSMark Fasheh 38164bcec184SMark Fasheh status = ocfs2_meta_lock(ext_alloc_inode, &ext_alloc_bh, 1); 3817ccd979bdSMark Fasheh if (status < 0) { 3818ccd979bdSMark Fasheh mlog_errno(status); 3819ccd979bdSMark Fasheh goto bail; 3820ccd979bdSMark Fasheh } 3821ccd979bdSMark Fasheh (*tc)->tc_ext_alloc_bh = ext_alloc_bh; 3822ccd979bdSMark Fasheh (*tc)->tc_ext_alloc_locked = 1; 3823ccd979bdSMark Fasheh } 3824ccd979bdSMark Fasheh 3825ccd979bdSMark Fasheh status = 0; 3826ccd979bdSMark Fasheh bail: 3827ccd979bdSMark Fasheh if (status < 0) { 3828ccd979bdSMark Fasheh if (*tc) 3829ccd979bdSMark Fasheh ocfs2_free_truncate_context(*tc); 3830ccd979bdSMark Fasheh *tc = NULL; 3831ccd979bdSMark Fasheh } 3832ccd979bdSMark Fasheh mlog_exit_void(); 3833ccd979bdSMark Fasheh return status; 3834ccd979bdSMark Fasheh } 3835ccd979bdSMark Fasheh 3836ccd979bdSMark Fasheh static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) 3837ccd979bdSMark Fasheh { 3838ccd979bdSMark Fasheh if (tc->tc_ext_alloc_inode) { 3839ccd979bdSMark Fasheh if (tc->tc_ext_alloc_locked) 3840ccd979bdSMark Fasheh ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1); 3841ccd979bdSMark Fasheh 38421b1dcc1bSJes Sorensen mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex); 3843ccd979bdSMark Fasheh iput(tc->tc_ext_alloc_inode); 3844ccd979bdSMark Fasheh } 3845ccd979bdSMark Fasheh 3846ccd979bdSMark Fasheh if (tc->tc_ext_alloc_bh) 3847ccd979bdSMark Fasheh brelse(tc->tc_ext_alloc_bh); 3848ccd979bdSMark Fasheh 3849ccd979bdSMark Fasheh if (tc->tc_last_eb_bh) 3850ccd979bdSMark Fasheh brelse(tc->tc_last_eb_bh); 3851ccd979bdSMark Fasheh 3852ccd979bdSMark Fasheh kfree(tc); 3853ccd979bdSMark Fasheh } 3854