1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*- 2ccd979bdSMark Fasheh * vim: noexpandtab sw=8 ts=8 sts=0: 3ccd979bdSMark Fasheh * 4ccd979bdSMark Fasheh * alloc.c 5ccd979bdSMark Fasheh * 6ccd979bdSMark Fasheh * Extent allocs and frees 7ccd979bdSMark Fasheh * 8ccd979bdSMark Fasheh * Copyright (C) 2002, 2004 Oracle. All rights reserved. 9ccd979bdSMark Fasheh * 10ccd979bdSMark Fasheh * This program is free software; you can redistribute it and/or 11ccd979bdSMark Fasheh * modify it under the terms of the GNU General Public 12ccd979bdSMark Fasheh * License as published by the Free Software Foundation; either 13ccd979bdSMark Fasheh * version 2 of the License, or (at your option) any later version. 14ccd979bdSMark Fasheh * 15ccd979bdSMark Fasheh * This program is distributed in the hope that it will be useful, 16ccd979bdSMark Fasheh * but WITHOUT ANY WARRANTY; without even the implied warranty of 17ccd979bdSMark Fasheh * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18ccd979bdSMark Fasheh * General Public License for more details. 19ccd979bdSMark Fasheh * 20ccd979bdSMark Fasheh * You should have received a copy of the GNU General Public 21ccd979bdSMark Fasheh * License along with this program; if not, write to the 22ccd979bdSMark Fasheh * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23ccd979bdSMark Fasheh * Boston, MA 021110-1307, USA. 24ccd979bdSMark Fasheh */ 25ccd979bdSMark Fasheh 26ccd979bdSMark Fasheh #include <linux/fs.h> 27ccd979bdSMark Fasheh #include <linux/types.h> 28ccd979bdSMark Fasheh #include <linux/slab.h> 29ccd979bdSMark Fasheh #include <linux/highmem.h> 30ccd979bdSMark Fasheh 31ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DISK_ALLOC 32ccd979bdSMark Fasheh #include <cluster/masklog.h> 33ccd979bdSMark Fasheh 34ccd979bdSMark Fasheh #include "ocfs2.h" 35ccd979bdSMark Fasheh 36ccd979bdSMark Fasheh #include "alloc.h" 37ccd979bdSMark Fasheh #include "dlmglue.h" 38ccd979bdSMark Fasheh #include "extent_map.h" 39ccd979bdSMark Fasheh #include "inode.h" 40ccd979bdSMark Fasheh #include "journal.h" 41ccd979bdSMark Fasheh #include "localalloc.h" 42ccd979bdSMark Fasheh #include "suballoc.h" 43ccd979bdSMark Fasheh #include "sysfile.h" 44ccd979bdSMark Fasheh #include "file.h" 45ccd979bdSMark Fasheh #include "super.h" 46ccd979bdSMark Fasheh #include "uptodate.h" 47ccd979bdSMark Fasheh 48ccd979bdSMark Fasheh #include "buffer_head_io.h" 49ccd979bdSMark Fasheh 50ccd979bdSMark Fasheh static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); 51ccd979bdSMark Fasheh 52dcd0538fSMark Fasheh /* 53dcd0538fSMark Fasheh * Structures which describe a path through a btree, and functions to 54dcd0538fSMark Fasheh * manipulate them. 55dcd0538fSMark Fasheh * 56dcd0538fSMark Fasheh * The idea here is to be as generic as possible with the tree 57dcd0538fSMark Fasheh * manipulation code. 58dcd0538fSMark Fasheh */ 59dcd0538fSMark Fasheh struct ocfs2_path_item { 60dcd0538fSMark Fasheh struct buffer_head *bh; 61dcd0538fSMark Fasheh struct ocfs2_extent_list *el; 62dcd0538fSMark Fasheh }; 63dcd0538fSMark Fasheh 64dcd0538fSMark Fasheh #define OCFS2_MAX_PATH_DEPTH 5 65dcd0538fSMark Fasheh 66dcd0538fSMark Fasheh struct ocfs2_path { 67dcd0538fSMark Fasheh int p_tree_depth; 68dcd0538fSMark Fasheh struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH]; 69dcd0538fSMark Fasheh }; 70dcd0538fSMark Fasheh 71dcd0538fSMark Fasheh #define path_root_bh(_path) ((_path)->p_node[0].bh) 72dcd0538fSMark Fasheh #define path_root_el(_path) ((_path)->p_node[0].el) 73dcd0538fSMark Fasheh #define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh) 74dcd0538fSMark Fasheh #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el) 75dcd0538fSMark Fasheh #define path_num_items(_path) ((_path)->p_tree_depth + 1) 76dcd0538fSMark Fasheh 77dcd0538fSMark Fasheh /* 78dcd0538fSMark Fasheh * Reset the actual path elements so that we can re-use the structure 79dcd0538fSMark Fasheh * to build another path. Generally, this involves freeing the buffer 80dcd0538fSMark Fasheh * heads. 81dcd0538fSMark Fasheh */ 82dcd0538fSMark Fasheh static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root) 83dcd0538fSMark Fasheh { 84dcd0538fSMark Fasheh int i, start = 0, depth = 0; 85dcd0538fSMark Fasheh struct ocfs2_path_item *node; 86dcd0538fSMark Fasheh 87dcd0538fSMark Fasheh if (keep_root) 88dcd0538fSMark Fasheh start = 1; 89dcd0538fSMark Fasheh 90dcd0538fSMark Fasheh for(i = start; i < path_num_items(path); i++) { 91dcd0538fSMark Fasheh node = &path->p_node[i]; 92dcd0538fSMark Fasheh 93dcd0538fSMark Fasheh brelse(node->bh); 94dcd0538fSMark Fasheh node->bh = NULL; 95dcd0538fSMark Fasheh node->el = NULL; 96dcd0538fSMark Fasheh } 97dcd0538fSMark Fasheh 98dcd0538fSMark Fasheh /* 99dcd0538fSMark Fasheh * Tree depth may change during truncate, or insert. If we're 100dcd0538fSMark Fasheh * keeping the root extent list, then make sure that our path 101dcd0538fSMark Fasheh * structure reflects the proper depth. 102dcd0538fSMark Fasheh */ 103dcd0538fSMark Fasheh if (keep_root) 104dcd0538fSMark Fasheh depth = le16_to_cpu(path_root_el(path)->l_tree_depth); 105dcd0538fSMark Fasheh 106dcd0538fSMark Fasheh path->p_tree_depth = depth; 107dcd0538fSMark Fasheh } 108dcd0538fSMark Fasheh 109dcd0538fSMark Fasheh static void ocfs2_free_path(struct ocfs2_path *path) 110dcd0538fSMark Fasheh { 111dcd0538fSMark Fasheh if (path) { 112dcd0538fSMark Fasheh ocfs2_reinit_path(path, 0); 113dcd0538fSMark Fasheh kfree(path); 114dcd0538fSMark Fasheh } 115dcd0538fSMark Fasheh } 116dcd0538fSMark Fasheh 117dcd0538fSMark Fasheh /* 118dcd0538fSMark Fasheh * Make the *dest path the same as src and re-initialize src path to 119dcd0538fSMark Fasheh * have a root only. 120dcd0538fSMark Fasheh */ 121dcd0538fSMark Fasheh static void ocfs2_mv_path(struct ocfs2_path *dest, struct ocfs2_path *src) 122dcd0538fSMark Fasheh { 123dcd0538fSMark Fasheh int i; 124dcd0538fSMark Fasheh 125dcd0538fSMark Fasheh BUG_ON(path_root_bh(dest) != path_root_bh(src)); 126dcd0538fSMark Fasheh 127dcd0538fSMark Fasheh for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) { 128dcd0538fSMark Fasheh brelse(dest->p_node[i].bh); 129dcd0538fSMark Fasheh 130dcd0538fSMark Fasheh dest->p_node[i].bh = src->p_node[i].bh; 131dcd0538fSMark Fasheh dest->p_node[i].el = src->p_node[i].el; 132dcd0538fSMark Fasheh 133dcd0538fSMark Fasheh src->p_node[i].bh = NULL; 134dcd0538fSMark Fasheh src->p_node[i].el = NULL; 135dcd0538fSMark Fasheh } 136dcd0538fSMark Fasheh } 137dcd0538fSMark Fasheh 138dcd0538fSMark Fasheh /* 139dcd0538fSMark Fasheh * Insert an extent block at given index. 140dcd0538fSMark Fasheh * 141dcd0538fSMark Fasheh * This will not take an additional reference on eb_bh. 142dcd0538fSMark Fasheh */ 143dcd0538fSMark Fasheh static inline void ocfs2_path_insert_eb(struct ocfs2_path *path, int index, 144dcd0538fSMark Fasheh struct buffer_head *eb_bh) 145dcd0538fSMark Fasheh { 146dcd0538fSMark Fasheh struct ocfs2_extent_block *eb = (struct ocfs2_extent_block *)eb_bh->b_data; 147dcd0538fSMark Fasheh 148dcd0538fSMark Fasheh /* 149dcd0538fSMark Fasheh * Right now, no root bh is an extent block, so this helps 150dcd0538fSMark Fasheh * catch code errors with dinode trees. The assertion can be 151dcd0538fSMark Fasheh * safely removed if we ever need to insert extent block 152dcd0538fSMark Fasheh * structures at the root. 153dcd0538fSMark Fasheh */ 154dcd0538fSMark Fasheh BUG_ON(index == 0); 155dcd0538fSMark Fasheh 156dcd0538fSMark Fasheh path->p_node[index].bh = eb_bh; 157dcd0538fSMark Fasheh path->p_node[index].el = &eb->h_list; 158dcd0538fSMark Fasheh } 159dcd0538fSMark Fasheh 160dcd0538fSMark Fasheh static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, 161dcd0538fSMark Fasheh struct ocfs2_extent_list *root_el) 162dcd0538fSMark Fasheh { 163dcd0538fSMark Fasheh struct ocfs2_path *path; 164dcd0538fSMark Fasheh 165dcd0538fSMark Fasheh BUG_ON(le16_to_cpu(root_el->l_tree_depth) >= OCFS2_MAX_PATH_DEPTH); 166dcd0538fSMark Fasheh 167dcd0538fSMark Fasheh path = kzalloc(sizeof(*path), GFP_NOFS); 168dcd0538fSMark Fasheh if (path) { 169dcd0538fSMark Fasheh path->p_tree_depth = le16_to_cpu(root_el->l_tree_depth); 170dcd0538fSMark Fasheh get_bh(root_bh); 171dcd0538fSMark Fasheh path_root_bh(path) = root_bh; 172dcd0538fSMark Fasheh path_root_el(path) = root_el; 173dcd0538fSMark Fasheh } 174dcd0538fSMark Fasheh 175dcd0538fSMark Fasheh return path; 176dcd0538fSMark Fasheh } 177dcd0538fSMark Fasheh 178dcd0538fSMark Fasheh /* 179dcd0538fSMark Fasheh * Allocate and initialize a new path based on a disk inode tree. 180dcd0538fSMark Fasheh */ 181dcd0538fSMark Fasheh static struct ocfs2_path *ocfs2_new_inode_path(struct buffer_head *di_bh) 182dcd0538fSMark Fasheh { 183dcd0538fSMark Fasheh struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 184dcd0538fSMark Fasheh struct ocfs2_extent_list *el = &di->id2.i_list; 185dcd0538fSMark Fasheh 186dcd0538fSMark Fasheh return ocfs2_new_path(di_bh, el); 187dcd0538fSMark Fasheh } 188dcd0538fSMark Fasheh 189dcd0538fSMark Fasheh /* 190dcd0538fSMark Fasheh * Convenience function to journal all components in a path. 191dcd0538fSMark Fasheh */ 192dcd0538fSMark Fasheh static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, 193dcd0538fSMark Fasheh struct ocfs2_path *path) 194dcd0538fSMark Fasheh { 195dcd0538fSMark Fasheh int i, ret = 0; 196dcd0538fSMark Fasheh 197dcd0538fSMark Fasheh if (!path) 198dcd0538fSMark Fasheh goto out; 199dcd0538fSMark Fasheh 200dcd0538fSMark Fasheh for(i = 0; i < path_num_items(path); i++) { 201dcd0538fSMark Fasheh ret = ocfs2_journal_access(handle, inode, path->p_node[i].bh, 202dcd0538fSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 203dcd0538fSMark Fasheh if (ret < 0) { 204dcd0538fSMark Fasheh mlog_errno(ret); 205dcd0538fSMark Fasheh goto out; 206dcd0538fSMark Fasheh } 207dcd0538fSMark Fasheh } 208dcd0538fSMark Fasheh 209dcd0538fSMark Fasheh out: 210dcd0538fSMark Fasheh return ret; 211dcd0538fSMark Fasheh } 212dcd0538fSMark Fasheh 213dcd0538fSMark Fasheh enum ocfs2_contig_type { 214dcd0538fSMark Fasheh CONTIG_NONE = 0, 215dcd0538fSMark Fasheh CONTIG_LEFT, 216dcd0538fSMark Fasheh CONTIG_RIGHT 217dcd0538fSMark Fasheh }; 218dcd0538fSMark Fasheh 219dcd0538fSMark Fasheh static int ocfs2_block_extent_contig(struct super_block *sb, 220ccd979bdSMark Fasheh struct ocfs2_extent_rec *ext, 221ccd979bdSMark Fasheh u64 blkno) 222ccd979bdSMark Fasheh { 223ccd979bdSMark Fasheh return blkno == (le64_to_cpu(ext->e_blkno) + 224dcd0538fSMark Fasheh ocfs2_clusters_to_blocks(sb, 225ccd979bdSMark Fasheh le32_to_cpu(ext->e_clusters))); 226ccd979bdSMark Fasheh } 227ccd979bdSMark Fasheh 228dcd0538fSMark Fasheh static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left, 229dcd0538fSMark Fasheh struct ocfs2_extent_rec *right) 230dcd0538fSMark Fasheh { 231dcd0538fSMark Fasheh return (le32_to_cpu(left->e_cpos) + le32_to_cpu(left->e_clusters) == 232dcd0538fSMark Fasheh le32_to_cpu(right->e_cpos)); 233dcd0538fSMark Fasheh } 234dcd0538fSMark Fasheh 235dcd0538fSMark Fasheh static enum ocfs2_contig_type 236dcd0538fSMark Fasheh ocfs2_extent_contig(struct inode *inode, 237dcd0538fSMark Fasheh struct ocfs2_extent_rec *ext, 238dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec) 239dcd0538fSMark Fasheh { 240dcd0538fSMark Fasheh u64 blkno = le64_to_cpu(insert_rec->e_blkno); 241dcd0538fSMark Fasheh 242dcd0538fSMark Fasheh if (ocfs2_extents_adjacent(ext, insert_rec) && 243dcd0538fSMark Fasheh ocfs2_block_extent_contig(inode->i_sb, ext, blkno)) 244dcd0538fSMark Fasheh return CONTIG_RIGHT; 245dcd0538fSMark Fasheh 246dcd0538fSMark Fasheh blkno = le64_to_cpu(ext->e_blkno); 247dcd0538fSMark Fasheh if (ocfs2_extents_adjacent(insert_rec, ext) && 248dcd0538fSMark Fasheh ocfs2_block_extent_contig(inode->i_sb, insert_rec, blkno)) 249dcd0538fSMark Fasheh return CONTIG_LEFT; 250dcd0538fSMark Fasheh 251dcd0538fSMark Fasheh return CONTIG_NONE; 252dcd0538fSMark Fasheh } 253dcd0538fSMark Fasheh 254dcd0538fSMark Fasheh /* 255dcd0538fSMark Fasheh * NOTE: We can have pretty much any combination of contiguousness and 256dcd0538fSMark Fasheh * appending. 257dcd0538fSMark Fasheh * 258dcd0538fSMark Fasheh * The usefulness of APPEND_TAIL is more in that it lets us know that 259dcd0538fSMark Fasheh * we'll have to update the path to that leaf. 260dcd0538fSMark Fasheh */ 261dcd0538fSMark Fasheh enum ocfs2_append_type { 262dcd0538fSMark Fasheh APPEND_NONE = 0, 263dcd0538fSMark Fasheh APPEND_TAIL, 264dcd0538fSMark Fasheh }; 265dcd0538fSMark Fasheh 266dcd0538fSMark Fasheh struct ocfs2_insert_type { 267dcd0538fSMark Fasheh enum ocfs2_append_type ins_appending; 268dcd0538fSMark Fasheh enum ocfs2_contig_type ins_contig; 269dcd0538fSMark Fasheh int ins_contig_index; 270dcd0538fSMark Fasheh int ins_free_records; 271dcd0538fSMark Fasheh int ins_tree_depth; 272dcd0538fSMark Fasheh }; 273dcd0538fSMark Fasheh 274ccd979bdSMark Fasheh /* 275ccd979bdSMark Fasheh * How many free extents have we got before we need more meta data? 276ccd979bdSMark Fasheh */ 277ccd979bdSMark Fasheh int ocfs2_num_free_extents(struct ocfs2_super *osb, 278ccd979bdSMark Fasheh struct inode *inode, 279ccd979bdSMark Fasheh struct ocfs2_dinode *fe) 280ccd979bdSMark Fasheh { 281ccd979bdSMark Fasheh int retval; 282ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 283ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 284ccd979bdSMark Fasheh struct buffer_head *eb_bh = NULL; 285ccd979bdSMark Fasheh 286ccd979bdSMark Fasheh mlog_entry_void(); 287ccd979bdSMark Fasheh 288ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(fe)) { 289ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 290ccd979bdSMark Fasheh retval = -EIO; 291ccd979bdSMark Fasheh goto bail; 292ccd979bdSMark Fasheh } 293ccd979bdSMark Fasheh 294ccd979bdSMark Fasheh if (fe->i_last_eb_blk) { 295ccd979bdSMark Fasheh retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 296ccd979bdSMark Fasheh &eb_bh, OCFS2_BH_CACHED, inode); 297ccd979bdSMark Fasheh if (retval < 0) { 298ccd979bdSMark Fasheh mlog_errno(retval); 299ccd979bdSMark Fasheh goto bail; 300ccd979bdSMark Fasheh } 301ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) eb_bh->b_data; 302ccd979bdSMark Fasheh el = &eb->h_list; 303ccd979bdSMark Fasheh } else 304ccd979bdSMark Fasheh el = &fe->id2.i_list; 305ccd979bdSMark Fasheh 306ccd979bdSMark Fasheh BUG_ON(el->l_tree_depth != 0); 307ccd979bdSMark Fasheh 308ccd979bdSMark Fasheh retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); 309ccd979bdSMark Fasheh bail: 310ccd979bdSMark Fasheh if (eb_bh) 311ccd979bdSMark Fasheh brelse(eb_bh); 312ccd979bdSMark Fasheh 313ccd979bdSMark Fasheh mlog_exit(retval); 314ccd979bdSMark Fasheh return retval; 315ccd979bdSMark Fasheh } 316ccd979bdSMark Fasheh 317ccd979bdSMark Fasheh /* expects array to already be allocated 318ccd979bdSMark Fasheh * 319ccd979bdSMark Fasheh * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and 320ccd979bdSMark Fasheh * l_count for you 321ccd979bdSMark Fasheh */ 322ccd979bdSMark Fasheh static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, 3231fabe148SMark Fasheh handle_t *handle, 324ccd979bdSMark Fasheh struct inode *inode, 325ccd979bdSMark Fasheh int wanted, 326ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac, 327ccd979bdSMark Fasheh struct buffer_head *bhs[]) 328ccd979bdSMark Fasheh { 329ccd979bdSMark Fasheh int count, status, i; 330ccd979bdSMark Fasheh u16 suballoc_bit_start; 331ccd979bdSMark Fasheh u32 num_got; 332ccd979bdSMark Fasheh u64 first_blkno; 333ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 334ccd979bdSMark Fasheh 335ccd979bdSMark Fasheh mlog_entry_void(); 336ccd979bdSMark Fasheh 337ccd979bdSMark Fasheh count = 0; 338ccd979bdSMark Fasheh while (count < wanted) { 339ccd979bdSMark Fasheh status = ocfs2_claim_metadata(osb, 340ccd979bdSMark Fasheh handle, 341ccd979bdSMark Fasheh meta_ac, 342ccd979bdSMark Fasheh wanted - count, 343ccd979bdSMark Fasheh &suballoc_bit_start, 344ccd979bdSMark Fasheh &num_got, 345ccd979bdSMark Fasheh &first_blkno); 346ccd979bdSMark Fasheh if (status < 0) { 347ccd979bdSMark Fasheh mlog_errno(status); 348ccd979bdSMark Fasheh goto bail; 349ccd979bdSMark Fasheh } 350ccd979bdSMark Fasheh 351ccd979bdSMark Fasheh for(i = count; i < (num_got + count); i++) { 352ccd979bdSMark Fasheh bhs[i] = sb_getblk(osb->sb, first_blkno); 353ccd979bdSMark Fasheh if (bhs[i] == NULL) { 354ccd979bdSMark Fasheh status = -EIO; 355ccd979bdSMark Fasheh mlog_errno(status); 356ccd979bdSMark Fasheh goto bail; 357ccd979bdSMark Fasheh } 358ccd979bdSMark Fasheh ocfs2_set_new_buffer_uptodate(inode, bhs[i]); 359ccd979bdSMark Fasheh 360ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, bhs[i], 361ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE); 362ccd979bdSMark Fasheh if (status < 0) { 363ccd979bdSMark Fasheh mlog_errno(status); 364ccd979bdSMark Fasheh goto bail; 365ccd979bdSMark Fasheh } 366ccd979bdSMark Fasheh 367ccd979bdSMark Fasheh memset(bhs[i]->b_data, 0, osb->sb->s_blocksize); 368ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bhs[i]->b_data; 369ccd979bdSMark Fasheh /* Ok, setup the minimal stuff here. */ 370ccd979bdSMark Fasheh strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); 371ccd979bdSMark Fasheh eb->h_blkno = cpu_to_le64(first_blkno); 372ccd979bdSMark Fasheh eb->h_fs_generation = cpu_to_le32(osb->fs_generation); 373ccd979bdSMark Fasheh 374ccd979bdSMark Fasheh #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS 375ccd979bdSMark Fasheh /* we always use slot zero's suballocator */ 376ccd979bdSMark Fasheh eb->h_suballoc_slot = 0; 377ccd979bdSMark Fasheh #else 378ccd979bdSMark Fasheh eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); 379ccd979bdSMark Fasheh #endif 380ccd979bdSMark Fasheh eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); 381ccd979bdSMark Fasheh eb->h_list.l_count = 382ccd979bdSMark Fasheh cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); 383ccd979bdSMark Fasheh 384ccd979bdSMark Fasheh suballoc_bit_start++; 385ccd979bdSMark Fasheh first_blkno++; 386ccd979bdSMark Fasheh 387ccd979bdSMark Fasheh /* We'll also be dirtied by the caller, so 388ccd979bdSMark Fasheh * this isn't absolutely necessary. */ 389ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, bhs[i]); 390ccd979bdSMark Fasheh if (status < 0) { 391ccd979bdSMark Fasheh mlog_errno(status); 392ccd979bdSMark Fasheh goto bail; 393ccd979bdSMark Fasheh } 394ccd979bdSMark Fasheh } 395ccd979bdSMark Fasheh 396ccd979bdSMark Fasheh count += num_got; 397ccd979bdSMark Fasheh } 398ccd979bdSMark Fasheh 399ccd979bdSMark Fasheh status = 0; 400ccd979bdSMark Fasheh bail: 401ccd979bdSMark Fasheh if (status < 0) { 402ccd979bdSMark Fasheh for(i = 0; i < wanted; i++) { 403ccd979bdSMark Fasheh if (bhs[i]) 404ccd979bdSMark Fasheh brelse(bhs[i]); 405ccd979bdSMark Fasheh bhs[i] = NULL; 406ccd979bdSMark Fasheh } 407ccd979bdSMark Fasheh } 408ccd979bdSMark Fasheh mlog_exit(status); 409ccd979bdSMark Fasheh return status; 410ccd979bdSMark Fasheh } 411ccd979bdSMark Fasheh 412ccd979bdSMark Fasheh /* 413dcd0538fSMark Fasheh * Helper function for ocfs2_add_branch() and ocfs2_shift_tree_depth(). 414dcd0538fSMark Fasheh * 415dcd0538fSMark Fasheh * Returns the sum of the rightmost extent rec logical offset and 416dcd0538fSMark Fasheh * cluster count. 417dcd0538fSMark Fasheh * 418dcd0538fSMark Fasheh * ocfs2_add_branch() uses this to determine what logical cluster 419dcd0538fSMark Fasheh * value should be populated into the leftmost new branch records. 420dcd0538fSMark Fasheh * 421dcd0538fSMark Fasheh * ocfs2_shift_tree_depth() uses this to determine the # clusters 422dcd0538fSMark Fasheh * value for the new topmost tree record. 423dcd0538fSMark Fasheh */ 424dcd0538fSMark Fasheh static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el) 425dcd0538fSMark Fasheh { 426dcd0538fSMark Fasheh int i; 427dcd0538fSMark Fasheh 428dcd0538fSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 429dcd0538fSMark Fasheh 430dcd0538fSMark Fasheh return le32_to_cpu(el->l_recs[i].e_cpos) + 431dcd0538fSMark Fasheh le32_to_cpu(el->l_recs[i].e_clusters); 432dcd0538fSMark Fasheh } 433dcd0538fSMark Fasheh 434dcd0538fSMark Fasheh /* 435ccd979bdSMark Fasheh * Add an entire tree branch to our inode. eb_bh is the extent block 436ccd979bdSMark Fasheh * to start at, if we don't want to start the branch at the dinode 437ccd979bdSMark Fasheh * structure. 438ccd979bdSMark Fasheh * 439ccd979bdSMark Fasheh * last_eb_bh is required as we have to update it's next_leaf pointer 440ccd979bdSMark Fasheh * for the new last extent block. 441ccd979bdSMark Fasheh * 442ccd979bdSMark Fasheh * the new branch will be 'empty' in the sense that every block will 443ccd979bdSMark Fasheh * contain a single record with e_clusters == 0. 444ccd979bdSMark Fasheh */ 445ccd979bdSMark Fasheh static int ocfs2_add_branch(struct ocfs2_super *osb, 4461fabe148SMark Fasheh handle_t *handle, 447ccd979bdSMark Fasheh struct inode *inode, 448ccd979bdSMark Fasheh struct buffer_head *fe_bh, 449ccd979bdSMark Fasheh struct buffer_head *eb_bh, 450ccd979bdSMark Fasheh struct buffer_head *last_eb_bh, 451ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac) 452ccd979bdSMark Fasheh { 453ccd979bdSMark Fasheh int status, new_blocks, i; 454ccd979bdSMark Fasheh u64 next_blkno, new_last_eb_blk; 455ccd979bdSMark Fasheh struct buffer_head *bh; 456ccd979bdSMark Fasheh struct buffer_head **new_eb_bhs = NULL; 457ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 458ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 459ccd979bdSMark Fasheh struct ocfs2_extent_list *eb_el; 460ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 461dcd0538fSMark Fasheh u32 new_cpos; 462ccd979bdSMark Fasheh 463ccd979bdSMark Fasheh mlog_entry_void(); 464ccd979bdSMark Fasheh 465ccd979bdSMark Fasheh BUG_ON(!last_eb_bh); 466ccd979bdSMark Fasheh 467ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 468ccd979bdSMark Fasheh 469ccd979bdSMark Fasheh if (eb_bh) { 470ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) eb_bh->b_data; 471ccd979bdSMark Fasheh el = &eb->h_list; 472ccd979bdSMark Fasheh } else 473ccd979bdSMark Fasheh el = &fe->id2.i_list; 474ccd979bdSMark Fasheh 475ccd979bdSMark Fasheh /* we never add a branch to a leaf. */ 476ccd979bdSMark Fasheh BUG_ON(!el->l_tree_depth); 477ccd979bdSMark Fasheh 478ccd979bdSMark Fasheh new_blocks = le16_to_cpu(el->l_tree_depth); 479ccd979bdSMark Fasheh 480ccd979bdSMark Fasheh /* allocate the number of new eb blocks we need */ 481ccd979bdSMark Fasheh new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *), 482ccd979bdSMark Fasheh GFP_KERNEL); 483ccd979bdSMark Fasheh if (!new_eb_bhs) { 484ccd979bdSMark Fasheh status = -ENOMEM; 485ccd979bdSMark Fasheh mlog_errno(status); 486ccd979bdSMark Fasheh goto bail; 487ccd979bdSMark Fasheh } 488ccd979bdSMark Fasheh 489ccd979bdSMark Fasheh status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks, 490ccd979bdSMark Fasheh meta_ac, new_eb_bhs); 491ccd979bdSMark Fasheh if (status < 0) { 492ccd979bdSMark Fasheh mlog_errno(status); 493ccd979bdSMark Fasheh goto bail; 494ccd979bdSMark Fasheh } 495ccd979bdSMark Fasheh 496dcd0538fSMark Fasheh eb = (struct ocfs2_extent_block *)last_eb_bh->b_data; 497dcd0538fSMark Fasheh new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); 498dcd0538fSMark Fasheh 499ccd979bdSMark Fasheh /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be 500ccd979bdSMark Fasheh * linked with the rest of the tree. 501ccd979bdSMark Fasheh * conversly, new_eb_bhs[0] is the new bottommost leaf. 502ccd979bdSMark Fasheh * 503ccd979bdSMark Fasheh * when we leave the loop, new_last_eb_blk will point to the 504ccd979bdSMark Fasheh * newest leaf, and next_blkno will point to the topmost extent 505ccd979bdSMark Fasheh * block. */ 506ccd979bdSMark Fasheh next_blkno = new_last_eb_blk = 0; 507ccd979bdSMark Fasheh for(i = 0; i < new_blocks; i++) { 508ccd979bdSMark Fasheh bh = new_eb_bhs[i]; 509ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 510ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 511ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 512ccd979bdSMark Fasheh status = -EIO; 513ccd979bdSMark Fasheh goto bail; 514ccd979bdSMark Fasheh } 515ccd979bdSMark Fasheh eb_el = &eb->h_list; 516ccd979bdSMark Fasheh 517ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, bh, 518ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE); 519ccd979bdSMark Fasheh if (status < 0) { 520ccd979bdSMark Fasheh mlog_errno(status); 521ccd979bdSMark Fasheh goto bail; 522ccd979bdSMark Fasheh } 523ccd979bdSMark Fasheh 524ccd979bdSMark Fasheh eb->h_next_leaf_blk = 0; 525ccd979bdSMark Fasheh eb_el->l_tree_depth = cpu_to_le16(i); 526ccd979bdSMark Fasheh eb_el->l_next_free_rec = cpu_to_le16(1); 527dcd0538fSMark Fasheh /* 528dcd0538fSMark Fasheh * This actually counts as an empty extent as 529dcd0538fSMark Fasheh * c_clusters == 0 530dcd0538fSMark Fasheh */ 531dcd0538fSMark Fasheh eb_el->l_recs[0].e_cpos = cpu_to_le32(new_cpos); 532ccd979bdSMark Fasheh eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno); 533ccd979bdSMark Fasheh eb_el->l_recs[0].e_clusters = cpu_to_le32(0); 534ccd979bdSMark Fasheh if (!eb_el->l_tree_depth) 535ccd979bdSMark Fasheh new_last_eb_blk = le64_to_cpu(eb->h_blkno); 536ccd979bdSMark Fasheh 537ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, bh); 538ccd979bdSMark Fasheh if (status < 0) { 539ccd979bdSMark Fasheh mlog_errno(status); 540ccd979bdSMark Fasheh goto bail; 541ccd979bdSMark Fasheh } 542ccd979bdSMark Fasheh 543ccd979bdSMark Fasheh next_blkno = le64_to_cpu(eb->h_blkno); 544ccd979bdSMark Fasheh } 545ccd979bdSMark Fasheh 546ccd979bdSMark Fasheh /* This is a bit hairy. We want to update up to three blocks 547ccd979bdSMark Fasheh * here without leaving any of them in an inconsistent state 548ccd979bdSMark Fasheh * in case of error. We don't have to worry about 549ccd979bdSMark Fasheh * journal_dirty erroring as it won't unless we've aborted the 550ccd979bdSMark Fasheh * handle (in which case we would never be here) so reserving 551ccd979bdSMark Fasheh * the write with journal_access is all we need to do. */ 552ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, last_eb_bh, 553ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 554ccd979bdSMark Fasheh if (status < 0) { 555ccd979bdSMark Fasheh mlog_errno(status); 556ccd979bdSMark Fasheh goto bail; 557ccd979bdSMark Fasheh } 558ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, fe_bh, 559ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 560ccd979bdSMark Fasheh if (status < 0) { 561ccd979bdSMark Fasheh mlog_errno(status); 562ccd979bdSMark Fasheh goto bail; 563ccd979bdSMark Fasheh } 564ccd979bdSMark Fasheh if (eb_bh) { 565ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, eb_bh, 566ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 567ccd979bdSMark Fasheh if (status < 0) { 568ccd979bdSMark Fasheh mlog_errno(status); 569ccd979bdSMark Fasheh goto bail; 570ccd979bdSMark Fasheh } 571ccd979bdSMark Fasheh } 572ccd979bdSMark Fasheh 573ccd979bdSMark Fasheh /* Link the new branch into the rest of the tree (el will 574ccd979bdSMark Fasheh * either be on the fe, or the extent block passed in. */ 575ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec); 576ccd979bdSMark Fasheh el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); 577dcd0538fSMark Fasheh el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); 578ccd979bdSMark Fasheh el->l_recs[i].e_clusters = 0; 579ccd979bdSMark Fasheh le16_add_cpu(&el->l_next_free_rec, 1); 580ccd979bdSMark Fasheh 581ccd979bdSMark Fasheh /* fe needs a new last extent block pointer, as does the 582ccd979bdSMark Fasheh * next_leaf on the previously last-extent-block. */ 583ccd979bdSMark Fasheh fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); 584ccd979bdSMark Fasheh 585ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 586ccd979bdSMark Fasheh eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); 587ccd979bdSMark Fasheh 588ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, last_eb_bh); 589ccd979bdSMark Fasheh if (status < 0) 590ccd979bdSMark Fasheh mlog_errno(status); 591ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, fe_bh); 592ccd979bdSMark Fasheh if (status < 0) 593ccd979bdSMark Fasheh mlog_errno(status); 594ccd979bdSMark Fasheh if (eb_bh) { 595ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, eb_bh); 596ccd979bdSMark Fasheh if (status < 0) 597ccd979bdSMark Fasheh mlog_errno(status); 598ccd979bdSMark Fasheh } 599ccd979bdSMark Fasheh 600ccd979bdSMark Fasheh status = 0; 601ccd979bdSMark Fasheh bail: 602ccd979bdSMark Fasheh if (new_eb_bhs) { 603ccd979bdSMark Fasheh for (i = 0; i < new_blocks; i++) 604ccd979bdSMark Fasheh if (new_eb_bhs[i]) 605ccd979bdSMark Fasheh brelse(new_eb_bhs[i]); 606ccd979bdSMark Fasheh kfree(new_eb_bhs); 607ccd979bdSMark Fasheh } 608ccd979bdSMark Fasheh 609ccd979bdSMark Fasheh mlog_exit(status); 610ccd979bdSMark Fasheh return status; 611ccd979bdSMark Fasheh } 612ccd979bdSMark Fasheh 613ccd979bdSMark Fasheh /* 614ccd979bdSMark Fasheh * adds another level to the allocation tree. 615ccd979bdSMark Fasheh * returns back the new extent block so you can add a branch to it 616ccd979bdSMark Fasheh * after this call. 617ccd979bdSMark Fasheh */ 618ccd979bdSMark Fasheh static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, 6191fabe148SMark Fasheh handle_t *handle, 620ccd979bdSMark Fasheh struct inode *inode, 621ccd979bdSMark Fasheh struct buffer_head *fe_bh, 622ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac, 623ccd979bdSMark Fasheh struct buffer_head **ret_new_eb_bh) 624ccd979bdSMark Fasheh { 625ccd979bdSMark Fasheh int status, i; 626dcd0538fSMark Fasheh u32 new_clusters; 627ccd979bdSMark Fasheh struct buffer_head *new_eb_bh = NULL; 628ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 629ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 630ccd979bdSMark Fasheh struct ocfs2_extent_list *fe_el; 631ccd979bdSMark Fasheh struct ocfs2_extent_list *eb_el; 632ccd979bdSMark Fasheh 633ccd979bdSMark Fasheh mlog_entry_void(); 634ccd979bdSMark Fasheh 635ccd979bdSMark Fasheh status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac, 636ccd979bdSMark Fasheh &new_eb_bh); 637ccd979bdSMark Fasheh if (status < 0) { 638ccd979bdSMark Fasheh mlog_errno(status); 639ccd979bdSMark Fasheh goto bail; 640ccd979bdSMark Fasheh } 641ccd979bdSMark Fasheh 642ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) new_eb_bh->b_data; 643ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 644ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 645ccd979bdSMark Fasheh status = -EIO; 646ccd979bdSMark Fasheh goto bail; 647ccd979bdSMark Fasheh } 648ccd979bdSMark Fasheh 649ccd979bdSMark Fasheh eb_el = &eb->h_list; 650ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 651ccd979bdSMark Fasheh fe_el = &fe->id2.i_list; 652ccd979bdSMark Fasheh 653ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, new_eb_bh, 654ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE); 655ccd979bdSMark Fasheh if (status < 0) { 656ccd979bdSMark Fasheh mlog_errno(status); 657ccd979bdSMark Fasheh goto bail; 658ccd979bdSMark Fasheh } 659ccd979bdSMark Fasheh 660ccd979bdSMark Fasheh /* copy the fe data into the new extent block */ 661ccd979bdSMark Fasheh eb_el->l_tree_depth = fe_el->l_tree_depth; 662ccd979bdSMark Fasheh eb_el->l_next_free_rec = fe_el->l_next_free_rec; 663ccd979bdSMark Fasheh for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) { 664ccd979bdSMark Fasheh eb_el->l_recs[i].e_cpos = fe_el->l_recs[i].e_cpos; 665ccd979bdSMark Fasheh eb_el->l_recs[i].e_clusters = fe_el->l_recs[i].e_clusters; 666ccd979bdSMark Fasheh eb_el->l_recs[i].e_blkno = fe_el->l_recs[i].e_blkno; 667ccd979bdSMark Fasheh } 668ccd979bdSMark Fasheh 669ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, new_eb_bh); 670ccd979bdSMark Fasheh if (status < 0) { 671ccd979bdSMark Fasheh mlog_errno(status); 672ccd979bdSMark Fasheh goto bail; 673ccd979bdSMark Fasheh } 674ccd979bdSMark Fasheh 675ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, inode, fe_bh, 676ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 677ccd979bdSMark Fasheh if (status < 0) { 678ccd979bdSMark Fasheh mlog_errno(status); 679ccd979bdSMark Fasheh goto bail; 680ccd979bdSMark Fasheh } 681ccd979bdSMark Fasheh 682dcd0538fSMark Fasheh new_clusters = ocfs2_sum_rightmost_rec(eb_el); 683dcd0538fSMark Fasheh 684ccd979bdSMark Fasheh /* update fe now */ 685ccd979bdSMark Fasheh le16_add_cpu(&fe_el->l_tree_depth, 1); 686ccd979bdSMark Fasheh fe_el->l_recs[0].e_cpos = 0; 687ccd979bdSMark Fasheh fe_el->l_recs[0].e_blkno = eb->h_blkno; 688dcd0538fSMark Fasheh fe_el->l_recs[0].e_clusters = cpu_to_le32(new_clusters); 689ccd979bdSMark Fasheh for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) { 690ccd979bdSMark Fasheh fe_el->l_recs[i].e_cpos = 0; 691ccd979bdSMark Fasheh fe_el->l_recs[i].e_clusters = 0; 692ccd979bdSMark Fasheh fe_el->l_recs[i].e_blkno = 0; 693ccd979bdSMark Fasheh } 694ccd979bdSMark Fasheh fe_el->l_next_free_rec = cpu_to_le16(1); 695ccd979bdSMark Fasheh 696ccd979bdSMark Fasheh /* If this is our 1st tree depth shift, then last_eb_blk 697ccd979bdSMark Fasheh * becomes the allocated extent block */ 698ccd979bdSMark Fasheh if (fe_el->l_tree_depth == cpu_to_le16(1)) 699ccd979bdSMark Fasheh fe->i_last_eb_blk = eb->h_blkno; 700ccd979bdSMark Fasheh 701ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, fe_bh); 702ccd979bdSMark Fasheh if (status < 0) { 703ccd979bdSMark Fasheh mlog_errno(status); 704ccd979bdSMark Fasheh goto bail; 705ccd979bdSMark Fasheh } 706ccd979bdSMark Fasheh 707ccd979bdSMark Fasheh *ret_new_eb_bh = new_eb_bh; 708ccd979bdSMark Fasheh new_eb_bh = NULL; 709ccd979bdSMark Fasheh status = 0; 710ccd979bdSMark Fasheh bail: 711ccd979bdSMark Fasheh if (new_eb_bh) 712ccd979bdSMark Fasheh brelse(new_eb_bh); 713ccd979bdSMark Fasheh 714ccd979bdSMark Fasheh mlog_exit(status); 715ccd979bdSMark Fasheh return status; 716ccd979bdSMark Fasheh } 717ccd979bdSMark Fasheh 718ccd979bdSMark Fasheh /* 719ccd979bdSMark Fasheh * Should only be called when there is no space left in any of the 720ccd979bdSMark Fasheh * leaf nodes. What we want to do is find the lowest tree depth 721ccd979bdSMark Fasheh * non-leaf extent block with room for new records. There are three 722ccd979bdSMark Fasheh * valid results of this search: 723ccd979bdSMark Fasheh * 724ccd979bdSMark Fasheh * 1) a lowest extent block is found, then we pass it back in 725ccd979bdSMark Fasheh * *lowest_eb_bh and return '0' 726ccd979bdSMark Fasheh * 727ccd979bdSMark Fasheh * 2) the search fails to find anything, but the dinode has room. We 728ccd979bdSMark Fasheh * pass NULL back in *lowest_eb_bh, but still return '0' 729ccd979bdSMark Fasheh * 730ccd979bdSMark Fasheh * 3) the search fails to find anything AND the dinode is full, in 731ccd979bdSMark Fasheh * which case we return > 0 732ccd979bdSMark Fasheh * 733ccd979bdSMark Fasheh * return status < 0 indicates an error. 734ccd979bdSMark Fasheh */ 735ccd979bdSMark Fasheh static int ocfs2_find_branch_target(struct ocfs2_super *osb, 736ccd979bdSMark Fasheh struct inode *inode, 737ccd979bdSMark Fasheh struct buffer_head *fe_bh, 738ccd979bdSMark Fasheh struct buffer_head **target_bh) 739ccd979bdSMark Fasheh { 740ccd979bdSMark Fasheh int status = 0, i; 741ccd979bdSMark Fasheh u64 blkno; 742ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 743ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 744ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 745ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 746ccd979bdSMark Fasheh struct buffer_head *lowest_bh = NULL; 747ccd979bdSMark Fasheh 748ccd979bdSMark Fasheh mlog_entry_void(); 749ccd979bdSMark Fasheh 750ccd979bdSMark Fasheh *target_bh = NULL; 751ccd979bdSMark Fasheh 752ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 753ccd979bdSMark Fasheh el = &fe->id2.i_list; 754ccd979bdSMark Fasheh 755ccd979bdSMark Fasheh while(le16_to_cpu(el->l_tree_depth) > 1) { 756ccd979bdSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 0) { 757b0697053SMark Fasheh ocfs2_error(inode->i_sb, "Dinode %llu has empty " 758ccd979bdSMark Fasheh "extent list (next_free_rec == 0)", 759b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 760ccd979bdSMark Fasheh status = -EIO; 761ccd979bdSMark Fasheh goto bail; 762ccd979bdSMark Fasheh } 763ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 764ccd979bdSMark Fasheh blkno = le64_to_cpu(el->l_recs[i].e_blkno); 765ccd979bdSMark Fasheh if (!blkno) { 766b0697053SMark Fasheh ocfs2_error(inode->i_sb, "Dinode %llu has extent " 767ccd979bdSMark Fasheh "list where extent # %d has no physical " 768ccd979bdSMark Fasheh "block start", 769b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, i); 770ccd979bdSMark Fasheh status = -EIO; 771ccd979bdSMark Fasheh goto bail; 772ccd979bdSMark Fasheh } 773ccd979bdSMark Fasheh 774ccd979bdSMark Fasheh if (bh) { 775ccd979bdSMark Fasheh brelse(bh); 776ccd979bdSMark Fasheh bh = NULL; 777ccd979bdSMark Fasheh } 778ccd979bdSMark Fasheh 779ccd979bdSMark Fasheh status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED, 780ccd979bdSMark Fasheh inode); 781ccd979bdSMark Fasheh if (status < 0) { 782ccd979bdSMark Fasheh mlog_errno(status); 783ccd979bdSMark Fasheh goto bail; 784ccd979bdSMark Fasheh } 785ccd979bdSMark Fasheh 786ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 787ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 788ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 789ccd979bdSMark Fasheh status = -EIO; 790ccd979bdSMark Fasheh goto bail; 791ccd979bdSMark Fasheh } 792ccd979bdSMark Fasheh el = &eb->h_list; 793ccd979bdSMark Fasheh 794ccd979bdSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) < 795ccd979bdSMark Fasheh le16_to_cpu(el->l_count)) { 796ccd979bdSMark Fasheh if (lowest_bh) 797ccd979bdSMark Fasheh brelse(lowest_bh); 798ccd979bdSMark Fasheh lowest_bh = bh; 799ccd979bdSMark Fasheh get_bh(lowest_bh); 800ccd979bdSMark Fasheh } 801ccd979bdSMark Fasheh } 802ccd979bdSMark Fasheh 803ccd979bdSMark Fasheh /* If we didn't find one and the fe doesn't have any room, 804ccd979bdSMark Fasheh * then return '1' */ 805ccd979bdSMark Fasheh if (!lowest_bh 806ccd979bdSMark Fasheh && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count)) 807ccd979bdSMark Fasheh status = 1; 808ccd979bdSMark Fasheh 809ccd979bdSMark Fasheh *target_bh = lowest_bh; 810ccd979bdSMark Fasheh bail: 811ccd979bdSMark Fasheh if (bh) 812ccd979bdSMark Fasheh brelse(bh); 813ccd979bdSMark Fasheh 814ccd979bdSMark Fasheh mlog_exit(status); 815ccd979bdSMark Fasheh return status; 816ccd979bdSMark Fasheh } 817ccd979bdSMark Fasheh 818dcd0538fSMark Fasheh static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) 819dcd0538fSMark Fasheh { 820dcd0538fSMark Fasheh return !rec->e_clusters; 821dcd0538fSMark Fasheh } 822dcd0538fSMark Fasheh 823dcd0538fSMark Fasheh /* 824dcd0538fSMark Fasheh * This function will discard the rightmost extent record. 825dcd0538fSMark Fasheh */ 826dcd0538fSMark Fasheh static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) 827dcd0538fSMark Fasheh { 828dcd0538fSMark Fasheh int next_free = le16_to_cpu(el->l_next_free_rec); 829dcd0538fSMark Fasheh int count = le16_to_cpu(el->l_count); 830dcd0538fSMark Fasheh unsigned int num_bytes; 831dcd0538fSMark Fasheh 832dcd0538fSMark Fasheh BUG_ON(!next_free); 833dcd0538fSMark Fasheh /* This will cause us to go off the end of our extent list. */ 834dcd0538fSMark Fasheh BUG_ON(next_free >= count); 835dcd0538fSMark Fasheh 836dcd0538fSMark Fasheh num_bytes = sizeof(struct ocfs2_extent_rec) * next_free; 837dcd0538fSMark Fasheh 838dcd0538fSMark Fasheh memmove(&el->l_recs[1], &el->l_recs[0], num_bytes); 839dcd0538fSMark Fasheh } 840dcd0538fSMark Fasheh 841dcd0538fSMark Fasheh static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el, 842dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec) 843dcd0538fSMark Fasheh { 844dcd0538fSMark Fasheh int i, insert_index, next_free, has_empty, num_bytes; 845dcd0538fSMark Fasheh u32 insert_cpos = le32_to_cpu(insert_rec->e_cpos); 846dcd0538fSMark Fasheh struct ocfs2_extent_rec *rec; 847dcd0538fSMark Fasheh 848dcd0538fSMark Fasheh next_free = le16_to_cpu(el->l_next_free_rec); 849dcd0538fSMark Fasheh has_empty = ocfs2_is_empty_extent(&el->l_recs[0]); 850dcd0538fSMark Fasheh 851dcd0538fSMark Fasheh BUG_ON(!next_free); 852dcd0538fSMark Fasheh 853dcd0538fSMark Fasheh /* The tree code before us didn't allow enough room in the leaf. */ 854dcd0538fSMark Fasheh if (el->l_next_free_rec == el->l_count && !has_empty) 855dcd0538fSMark Fasheh BUG(); 856dcd0538fSMark Fasheh 857dcd0538fSMark Fasheh /* 858dcd0538fSMark Fasheh * The easiest way to approach this is to just remove the 859dcd0538fSMark Fasheh * empty extent and temporarily decrement next_free. 860dcd0538fSMark Fasheh */ 861dcd0538fSMark Fasheh if (has_empty) { 862dcd0538fSMark Fasheh /* 863dcd0538fSMark Fasheh * If next_free was 1 (only an empty extent), this 864dcd0538fSMark Fasheh * loop won't execute, which is fine. We still want 865dcd0538fSMark Fasheh * the decrement above to happen. 866dcd0538fSMark Fasheh */ 867dcd0538fSMark Fasheh for(i = 0; i < (next_free - 1); i++) 868dcd0538fSMark Fasheh el->l_recs[i] = el->l_recs[i+1]; 869dcd0538fSMark Fasheh 870dcd0538fSMark Fasheh next_free--; 871dcd0538fSMark Fasheh } 872dcd0538fSMark Fasheh 873dcd0538fSMark Fasheh /* 874dcd0538fSMark Fasheh * Figure out what the new record index should be. 875dcd0538fSMark Fasheh */ 876dcd0538fSMark Fasheh for(i = 0; i < next_free; i++) { 877dcd0538fSMark Fasheh rec = &el->l_recs[i]; 878dcd0538fSMark Fasheh 879dcd0538fSMark Fasheh if (insert_cpos < le32_to_cpu(rec->e_cpos)) 880dcd0538fSMark Fasheh break; 881dcd0538fSMark Fasheh } 882dcd0538fSMark Fasheh insert_index = i; 883dcd0538fSMark Fasheh 884dcd0538fSMark Fasheh mlog(0, "ins %u: index %d, has_empty %d, next_free %d, count %d\n", 885dcd0538fSMark Fasheh insert_cpos, insert_index, has_empty, next_free, le16_to_cpu(el->l_count)); 886dcd0538fSMark Fasheh 887dcd0538fSMark Fasheh BUG_ON(insert_index < 0); 888dcd0538fSMark Fasheh BUG_ON(insert_index >= le16_to_cpu(el->l_count)); 889dcd0538fSMark Fasheh BUG_ON(insert_index > next_free); 890dcd0538fSMark Fasheh 891dcd0538fSMark Fasheh /* 892dcd0538fSMark Fasheh * No need to memmove if we're just adding to the tail. 893dcd0538fSMark Fasheh */ 894dcd0538fSMark Fasheh if (insert_index != next_free) { 895dcd0538fSMark Fasheh BUG_ON(next_free >= le16_to_cpu(el->l_count)); 896dcd0538fSMark Fasheh 897dcd0538fSMark Fasheh num_bytes = next_free - insert_index; 898dcd0538fSMark Fasheh num_bytes *= sizeof(struct ocfs2_extent_rec); 899dcd0538fSMark Fasheh memmove(&el->l_recs[insert_index + 1], 900dcd0538fSMark Fasheh &el->l_recs[insert_index], 901dcd0538fSMark Fasheh num_bytes); 902dcd0538fSMark Fasheh } 903dcd0538fSMark Fasheh 904dcd0538fSMark Fasheh /* 905dcd0538fSMark Fasheh * Either we had an empty extent, and need to re-increment or 906dcd0538fSMark Fasheh * there was no empty extent on a non full rightmost leaf node, 907dcd0538fSMark Fasheh * in which case we still need to increment. 908dcd0538fSMark Fasheh */ 909dcd0538fSMark Fasheh next_free++; 910dcd0538fSMark Fasheh el->l_next_free_rec = cpu_to_le16(next_free); 911dcd0538fSMark Fasheh /* 912dcd0538fSMark Fasheh * Make sure none of the math above just messed up our tree. 913dcd0538fSMark Fasheh */ 914dcd0538fSMark Fasheh BUG_ON(le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count)); 915dcd0538fSMark Fasheh 916dcd0538fSMark Fasheh el->l_recs[insert_index] = *insert_rec; 917dcd0538fSMark Fasheh 918dcd0538fSMark Fasheh } 919dcd0538fSMark Fasheh 920dcd0538fSMark Fasheh /* 921dcd0538fSMark Fasheh * Create an empty extent record . 922dcd0538fSMark Fasheh * 923dcd0538fSMark Fasheh * l_next_free_rec may be updated. 924dcd0538fSMark Fasheh * 925dcd0538fSMark Fasheh * If an empty extent already exists do nothing. 926dcd0538fSMark Fasheh */ 927dcd0538fSMark Fasheh static void ocfs2_create_empty_extent(struct ocfs2_extent_list *el) 928dcd0538fSMark Fasheh { 929dcd0538fSMark Fasheh int next_free = le16_to_cpu(el->l_next_free_rec); 930dcd0538fSMark Fasheh 931dcd0538fSMark Fasheh if (next_free == 0) 932dcd0538fSMark Fasheh goto set_and_inc; 933dcd0538fSMark Fasheh 934dcd0538fSMark Fasheh if (ocfs2_is_empty_extent(&el->l_recs[0])) 935dcd0538fSMark Fasheh return; 936dcd0538fSMark Fasheh 937dcd0538fSMark Fasheh mlog_bug_on_msg(el->l_count == el->l_next_free_rec, 938dcd0538fSMark Fasheh "Asked to create an empty extent in a full list:\n" 939dcd0538fSMark Fasheh "count = %u, tree depth = %u", 940dcd0538fSMark Fasheh le16_to_cpu(el->l_count), 941dcd0538fSMark Fasheh le16_to_cpu(el->l_tree_depth)); 942dcd0538fSMark Fasheh 943dcd0538fSMark Fasheh ocfs2_shift_records_right(el); 944dcd0538fSMark Fasheh 945dcd0538fSMark Fasheh set_and_inc: 946dcd0538fSMark Fasheh le16_add_cpu(&el->l_next_free_rec, 1); 947dcd0538fSMark Fasheh memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); 948dcd0538fSMark Fasheh } 949dcd0538fSMark Fasheh 950dcd0538fSMark Fasheh /* 951dcd0538fSMark Fasheh * For a rotation which involves two leaf nodes, the "root node" is 952dcd0538fSMark Fasheh * the lowest level tree node which contains a path to both leafs. This 953dcd0538fSMark Fasheh * resulting set of information can be used to form a complete "subtree" 954dcd0538fSMark Fasheh * 955dcd0538fSMark Fasheh * This function is passed two full paths from the dinode down to a 956dcd0538fSMark Fasheh * pair of adjacent leaves. It's task is to figure out which path 957dcd0538fSMark Fasheh * index contains the subtree root - this can be the root index itself 958dcd0538fSMark Fasheh * in a worst-case rotation. 959dcd0538fSMark Fasheh * 960dcd0538fSMark Fasheh * The array index of the subtree root is passed back. 961dcd0538fSMark Fasheh */ 962dcd0538fSMark Fasheh static int ocfs2_find_subtree_root(struct inode *inode, 963dcd0538fSMark Fasheh struct ocfs2_path *left, 964dcd0538fSMark Fasheh struct ocfs2_path *right) 965dcd0538fSMark Fasheh { 966dcd0538fSMark Fasheh int i = 0; 967dcd0538fSMark Fasheh 968dcd0538fSMark Fasheh /* 969dcd0538fSMark Fasheh * Check that the caller passed in two paths from the same tree. 970dcd0538fSMark Fasheh */ 971dcd0538fSMark Fasheh BUG_ON(path_root_bh(left) != path_root_bh(right)); 972dcd0538fSMark Fasheh 973dcd0538fSMark Fasheh do { 974dcd0538fSMark Fasheh i++; 975dcd0538fSMark Fasheh 976dcd0538fSMark Fasheh /* 977dcd0538fSMark Fasheh * The caller didn't pass two adjacent paths. 978dcd0538fSMark Fasheh */ 979dcd0538fSMark Fasheh mlog_bug_on_msg(i > left->p_tree_depth, 980dcd0538fSMark Fasheh "Inode %lu, left depth %u, right depth %u\n" 981dcd0538fSMark Fasheh "left leaf blk %llu, right leaf blk %llu\n", 982dcd0538fSMark Fasheh inode->i_ino, left->p_tree_depth, 983dcd0538fSMark Fasheh right->p_tree_depth, 984dcd0538fSMark Fasheh (unsigned long long)path_leaf_bh(left)->b_blocknr, 985dcd0538fSMark Fasheh (unsigned long long)path_leaf_bh(right)->b_blocknr); 986dcd0538fSMark Fasheh } while (left->p_node[i].bh->b_blocknr == 987dcd0538fSMark Fasheh right->p_node[i].bh->b_blocknr); 988dcd0538fSMark Fasheh 989dcd0538fSMark Fasheh return i - 1; 990dcd0538fSMark Fasheh } 991dcd0538fSMark Fasheh 992dcd0538fSMark Fasheh typedef void (path_insert_t)(void *, struct buffer_head *); 993dcd0538fSMark Fasheh 994dcd0538fSMark Fasheh /* 995dcd0538fSMark Fasheh * Traverse a btree path in search of cpos, starting at root_el. 996dcd0538fSMark Fasheh * 997dcd0538fSMark Fasheh * This code can be called with a cpos larger than the tree, in which 998dcd0538fSMark Fasheh * case it will return the rightmost path. 999dcd0538fSMark Fasheh */ 1000dcd0538fSMark Fasheh static int __ocfs2_find_path(struct inode *inode, 1001dcd0538fSMark Fasheh struct ocfs2_extent_list *root_el, u32 cpos, 1002dcd0538fSMark Fasheh path_insert_t *func, void *data) 1003dcd0538fSMark Fasheh { 1004dcd0538fSMark Fasheh int i, ret = 0; 1005dcd0538fSMark Fasheh u32 range; 1006dcd0538fSMark Fasheh u64 blkno; 1007dcd0538fSMark Fasheh struct buffer_head *bh = NULL; 1008dcd0538fSMark Fasheh struct ocfs2_extent_block *eb; 1009dcd0538fSMark Fasheh struct ocfs2_extent_list *el; 1010dcd0538fSMark Fasheh struct ocfs2_extent_rec *rec; 1011dcd0538fSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 1012dcd0538fSMark Fasheh 1013dcd0538fSMark Fasheh el = root_el; 1014dcd0538fSMark Fasheh while (el->l_tree_depth) { 1015dcd0538fSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 0) { 1016dcd0538fSMark Fasheh ocfs2_error(inode->i_sb, 1017dcd0538fSMark Fasheh "Inode %llu has empty extent list at " 1018dcd0538fSMark Fasheh "depth %u\n", 1019dcd0538fSMark Fasheh (unsigned long long)oi->ip_blkno, 1020dcd0538fSMark Fasheh le16_to_cpu(el->l_tree_depth)); 1021dcd0538fSMark Fasheh ret = -EROFS; 1022dcd0538fSMark Fasheh goto out; 1023dcd0538fSMark Fasheh 1024dcd0538fSMark Fasheh } 1025dcd0538fSMark Fasheh 1026dcd0538fSMark Fasheh for(i = 0; i < le16_to_cpu(el->l_next_free_rec) - 1; i++) { 1027dcd0538fSMark Fasheh rec = &el->l_recs[i]; 1028dcd0538fSMark Fasheh 1029dcd0538fSMark Fasheh /* 1030dcd0538fSMark Fasheh * In the case that cpos is off the allocation 1031dcd0538fSMark Fasheh * tree, this should just wind up returning the 1032dcd0538fSMark Fasheh * rightmost record. 1033dcd0538fSMark Fasheh */ 1034dcd0538fSMark Fasheh range = le32_to_cpu(rec->e_cpos) + 1035dcd0538fSMark Fasheh le32_to_cpu(rec->e_clusters); 1036dcd0538fSMark Fasheh if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range) 1037dcd0538fSMark Fasheh break; 1038dcd0538fSMark Fasheh } 1039dcd0538fSMark Fasheh 1040dcd0538fSMark Fasheh blkno = le64_to_cpu(el->l_recs[i].e_blkno); 1041dcd0538fSMark Fasheh if (blkno == 0) { 1042dcd0538fSMark Fasheh ocfs2_error(inode->i_sb, 1043dcd0538fSMark Fasheh "Inode %llu has bad blkno in extent list " 1044dcd0538fSMark Fasheh "at depth %u (index %d)\n", 1045dcd0538fSMark Fasheh (unsigned long long)oi->ip_blkno, 1046dcd0538fSMark Fasheh le16_to_cpu(el->l_tree_depth), i); 1047dcd0538fSMark Fasheh ret = -EROFS; 1048dcd0538fSMark Fasheh goto out; 1049dcd0538fSMark Fasheh } 1050dcd0538fSMark Fasheh 1051dcd0538fSMark Fasheh brelse(bh); 1052dcd0538fSMark Fasheh bh = NULL; 1053dcd0538fSMark Fasheh ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, 1054dcd0538fSMark Fasheh &bh, OCFS2_BH_CACHED, inode); 1055dcd0538fSMark Fasheh if (ret) { 1056dcd0538fSMark Fasheh mlog_errno(ret); 1057dcd0538fSMark Fasheh goto out; 1058dcd0538fSMark Fasheh } 1059dcd0538fSMark Fasheh 1060dcd0538fSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 1061dcd0538fSMark Fasheh el = &eb->h_list; 1062dcd0538fSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1063dcd0538fSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1064dcd0538fSMark Fasheh ret = -EIO; 1065dcd0538fSMark Fasheh goto out; 1066dcd0538fSMark Fasheh } 1067dcd0538fSMark Fasheh 1068dcd0538fSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) > 1069dcd0538fSMark Fasheh le16_to_cpu(el->l_count)) { 1070dcd0538fSMark Fasheh ocfs2_error(inode->i_sb, 1071dcd0538fSMark Fasheh "Inode %llu has bad count in extent list " 1072dcd0538fSMark Fasheh "at block %llu (next free=%u, count=%u)\n", 1073dcd0538fSMark Fasheh (unsigned long long)oi->ip_blkno, 1074dcd0538fSMark Fasheh (unsigned long long)bh->b_blocknr, 1075dcd0538fSMark Fasheh le16_to_cpu(el->l_next_free_rec), 1076dcd0538fSMark Fasheh le16_to_cpu(el->l_count)); 1077dcd0538fSMark Fasheh ret = -EROFS; 1078dcd0538fSMark Fasheh goto out; 1079dcd0538fSMark Fasheh } 1080dcd0538fSMark Fasheh 1081dcd0538fSMark Fasheh if (func) 1082dcd0538fSMark Fasheh func(data, bh); 1083dcd0538fSMark Fasheh } 1084dcd0538fSMark Fasheh 1085dcd0538fSMark Fasheh out: 1086dcd0538fSMark Fasheh /* 1087dcd0538fSMark Fasheh * Catch any trailing bh that the loop didn't handle. 1088dcd0538fSMark Fasheh */ 1089dcd0538fSMark Fasheh brelse(bh); 1090dcd0538fSMark Fasheh 1091dcd0538fSMark Fasheh return ret; 1092dcd0538fSMark Fasheh } 1093dcd0538fSMark Fasheh 1094dcd0538fSMark Fasheh /* 1095dcd0538fSMark Fasheh * Given an initialized path (that is, it has a valid root extent 1096dcd0538fSMark Fasheh * list), this function will traverse the btree in search of the path 1097dcd0538fSMark Fasheh * which would contain cpos. 1098dcd0538fSMark Fasheh * 1099dcd0538fSMark Fasheh * The path traveled is recorded in the path structure. 1100dcd0538fSMark Fasheh * 1101dcd0538fSMark Fasheh * Note that this will not do any comparisons on leaf node extent 1102dcd0538fSMark Fasheh * records, so it will work fine in the case that we just added a tree 1103dcd0538fSMark Fasheh * branch. 1104dcd0538fSMark Fasheh */ 1105dcd0538fSMark Fasheh struct find_path_data { 1106dcd0538fSMark Fasheh int index; 1107dcd0538fSMark Fasheh struct ocfs2_path *path; 1108dcd0538fSMark Fasheh }; 1109dcd0538fSMark Fasheh static void find_path_ins(void *data, struct buffer_head *bh) 1110dcd0538fSMark Fasheh { 1111dcd0538fSMark Fasheh struct find_path_data *fp = data; 1112dcd0538fSMark Fasheh 1113dcd0538fSMark Fasheh get_bh(bh); 1114dcd0538fSMark Fasheh ocfs2_path_insert_eb(fp->path, fp->index, bh); 1115dcd0538fSMark Fasheh fp->index++; 1116dcd0538fSMark Fasheh } 1117dcd0538fSMark Fasheh static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path, 1118dcd0538fSMark Fasheh u32 cpos) 1119dcd0538fSMark Fasheh { 1120dcd0538fSMark Fasheh struct find_path_data data; 1121dcd0538fSMark Fasheh 1122dcd0538fSMark Fasheh data.index = 1; 1123dcd0538fSMark Fasheh data.path = path; 1124dcd0538fSMark Fasheh return __ocfs2_find_path(inode, path_root_el(path), cpos, 1125dcd0538fSMark Fasheh find_path_ins, &data); 1126dcd0538fSMark Fasheh } 1127dcd0538fSMark Fasheh 1128dcd0538fSMark Fasheh static void find_leaf_ins(void *data, struct buffer_head *bh) 1129dcd0538fSMark Fasheh { 1130dcd0538fSMark Fasheh struct ocfs2_extent_block *eb =(struct ocfs2_extent_block *)bh->b_data; 1131dcd0538fSMark Fasheh struct ocfs2_extent_list *el = &eb->h_list; 1132dcd0538fSMark Fasheh struct buffer_head **ret = data; 1133dcd0538fSMark Fasheh 1134dcd0538fSMark Fasheh /* We want to retain only the leaf block. */ 1135dcd0538fSMark Fasheh if (le16_to_cpu(el->l_tree_depth) == 0) { 1136dcd0538fSMark Fasheh get_bh(bh); 1137dcd0538fSMark Fasheh *ret = bh; 1138dcd0538fSMark Fasheh } 1139dcd0538fSMark Fasheh } 1140dcd0538fSMark Fasheh /* 1141dcd0538fSMark Fasheh * Find the leaf block in the tree which would contain cpos. No 1142dcd0538fSMark Fasheh * checking of the actual leaf is done. 1143dcd0538fSMark Fasheh * 1144dcd0538fSMark Fasheh * Some paths want to call this instead of allocating a path structure 1145dcd0538fSMark Fasheh * and calling ocfs2_find_path(). 1146dcd0538fSMark Fasheh * 1147dcd0538fSMark Fasheh * This function doesn't handle non btree extent lists. 1148dcd0538fSMark Fasheh */ 1149363041a5SMark Fasheh int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, 1150363041a5SMark Fasheh u32 cpos, struct buffer_head **leaf_bh) 1151dcd0538fSMark Fasheh { 1152dcd0538fSMark Fasheh int ret; 1153dcd0538fSMark Fasheh struct buffer_head *bh = NULL; 1154dcd0538fSMark Fasheh 1155dcd0538fSMark Fasheh ret = __ocfs2_find_path(inode, root_el, cpos, find_leaf_ins, &bh); 1156dcd0538fSMark Fasheh if (ret) { 1157dcd0538fSMark Fasheh mlog_errno(ret); 1158dcd0538fSMark Fasheh goto out; 1159dcd0538fSMark Fasheh } 1160dcd0538fSMark Fasheh 1161dcd0538fSMark Fasheh *leaf_bh = bh; 1162dcd0538fSMark Fasheh out: 1163dcd0538fSMark Fasheh return ret; 1164dcd0538fSMark Fasheh } 1165dcd0538fSMark Fasheh 1166dcd0538fSMark Fasheh /* 1167dcd0538fSMark Fasheh * Adjust the adjacent records (left_rec, right_rec) involved in a rotation. 1168dcd0538fSMark Fasheh * 1169dcd0538fSMark Fasheh * Basically, we've moved stuff around at the bottom of the tree and 1170dcd0538fSMark Fasheh * we need to fix up the extent records above the changes to reflect 1171dcd0538fSMark Fasheh * the new changes. 1172dcd0538fSMark Fasheh * 1173dcd0538fSMark Fasheh * left_rec: the record on the left. 1174dcd0538fSMark Fasheh * left_child_el: is the child list pointed to by left_rec 1175dcd0538fSMark Fasheh * right_rec: the record to the right of left_rec 1176dcd0538fSMark Fasheh * right_child_el: is the child list pointed to by right_rec 1177dcd0538fSMark Fasheh * 1178dcd0538fSMark Fasheh * By definition, this only works on interior nodes. 1179dcd0538fSMark Fasheh */ 1180dcd0538fSMark Fasheh static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec, 1181dcd0538fSMark Fasheh struct ocfs2_extent_list *left_child_el, 1182dcd0538fSMark Fasheh struct ocfs2_extent_rec *right_rec, 1183dcd0538fSMark Fasheh struct ocfs2_extent_list *right_child_el) 1184dcd0538fSMark Fasheh { 1185dcd0538fSMark Fasheh u32 left_clusters, right_end; 1186dcd0538fSMark Fasheh 1187dcd0538fSMark Fasheh /* 1188dcd0538fSMark Fasheh * Interior nodes never have holes. Their cpos is the cpos of 1189dcd0538fSMark Fasheh * the leftmost record in their child list. Their cluster 1190dcd0538fSMark Fasheh * count covers the full theoretical range of their child list 1191dcd0538fSMark Fasheh * - the range between their cpos and the cpos of the record 1192dcd0538fSMark Fasheh * immediately to their right. 1193dcd0538fSMark Fasheh */ 1194dcd0538fSMark Fasheh left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); 1195dcd0538fSMark Fasheh left_clusters -= le32_to_cpu(left_rec->e_cpos); 1196dcd0538fSMark Fasheh left_rec->e_clusters = cpu_to_le32(left_clusters); 1197dcd0538fSMark Fasheh 1198dcd0538fSMark Fasheh /* 1199dcd0538fSMark Fasheh * Calculate the rightmost cluster count boundary before 1200dcd0538fSMark Fasheh * moving cpos - we will need to adjust e_clusters after 1201dcd0538fSMark Fasheh * updating e_cpos to keep the same highest cluster count. 1202dcd0538fSMark Fasheh */ 1203dcd0538fSMark Fasheh right_end = le32_to_cpu(right_rec->e_cpos); 1204dcd0538fSMark Fasheh right_end += le32_to_cpu(right_rec->e_clusters); 1205dcd0538fSMark Fasheh 1206dcd0538fSMark Fasheh right_rec->e_cpos = left_rec->e_cpos; 1207dcd0538fSMark Fasheh le32_add_cpu(&right_rec->e_cpos, left_clusters); 1208dcd0538fSMark Fasheh 1209dcd0538fSMark Fasheh right_end -= le32_to_cpu(right_rec->e_cpos); 1210dcd0538fSMark Fasheh right_rec->e_clusters = cpu_to_le32(right_end); 1211dcd0538fSMark Fasheh } 1212dcd0538fSMark Fasheh 1213dcd0538fSMark Fasheh /* 1214dcd0538fSMark Fasheh * Adjust the adjacent root node records involved in a 1215dcd0538fSMark Fasheh * rotation. left_el_blkno is passed in as a key so that we can easily 1216dcd0538fSMark Fasheh * find it's index in the root list. 1217dcd0538fSMark Fasheh */ 1218dcd0538fSMark Fasheh static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el, 1219dcd0538fSMark Fasheh struct ocfs2_extent_list *left_el, 1220dcd0538fSMark Fasheh struct ocfs2_extent_list *right_el, 1221dcd0538fSMark Fasheh u64 left_el_blkno) 1222dcd0538fSMark Fasheh { 1223dcd0538fSMark Fasheh int i; 1224dcd0538fSMark Fasheh 1225dcd0538fSMark Fasheh BUG_ON(le16_to_cpu(root_el->l_tree_depth) <= 1226dcd0538fSMark Fasheh le16_to_cpu(left_el->l_tree_depth)); 1227dcd0538fSMark Fasheh 1228dcd0538fSMark Fasheh for(i = 0; i < le16_to_cpu(root_el->l_next_free_rec) - 1; i++) { 1229dcd0538fSMark Fasheh if (le64_to_cpu(root_el->l_recs[i].e_blkno) == left_el_blkno) 1230dcd0538fSMark Fasheh break; 1231dcd0538fSMark Fasheh } 1232dcd0538fSMark Fasheh 1233dcd0538fSMark Fasheh /* 1234dcd0538fSMark Fasheh * The path walking code should have never returned a root and 1235dcd0538fSMark Fasheh * two paths which are not adjacent. 1236dcd0538fSMark Fasheh */ 1237dcd0538fSMark Fasheh BUG_ON(i >= (le16_to_cpu(root_el->l_next_free_rec) - 1)); 1238dcd0538fSMark Fasheh 1239dcd0538fSMark Fasheh ocfs2_adjust_adjacent_records(&root_el->l_recs[i], left_el, 1240dcd0538fSMark Fasheh &root_el->l_recs[i + 1], right_el); 1241dcd0538fSMark Fasheh } 1242dcd0538fSMark Fasheh 1243dcd0538fSMark Fasheh /* 1244dcd0538fSMark Fasheh * We've changed a leaf block (in right_path) and need to reflect that 1245dcd0538fSMark Fasheh * change back up the subtree. 1246dcd0538fSMark Fasheh * 1247dcd0538fSMark Fasheh * This happens in multiple places: 1248dcd0538fSMark Fasheh * - When we've moved an extent record from the left path leaf to the right 1249dcd0538fSMark Fasheh * path leaf to make room for an empty extent in the left path leaf. 1250dcd0538fSMark Fasheh * - When our insert into the right path leaf is at the leftmost edge 1251dcd0538fSMark Fasheh * and requires an update of the path immediately to it's left. This 1252dcd0538fSMark Fasheh * can occur at the end of some types of rotation and appending inserts. 1253dcd0538fSMark Fasheh */ 1254dcd0538fSMark Fasheh static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, 1255dcd0538fSMark Fasheh struct ocfs2_path *left_path, 1256dcd0538fSMark Fasheh struct ocfs2_path *right_path, 1257dcd0538fSMark Fasheh int subtree_index) 1258dcd0538fSMark Fasheh { 1259dcd0538fSMark Fasheh int ret, i, idx; 1260dcd0538fSMark Fasheh struct ocfs2_extent_list *el, *left_el, *right_el; 1261dcd0538fSMark Fasheh struct ocfs2_extent_rec *left_rec, *right_rec; 1262dcd0538fSMark Fasheh struct buffer_head *root_bh = left_path->p_node[subtree_index].bh; 1263dcd0538fSMark Fasheh 1264dcd0538fSMark Fasheh /* 1265dcd0538fSMark Fasheh * Update the counts and position values within all the 1266dcd0538fSMark Fasheh * interior nodes to reflect the leaf rotation we just did. 1267dcd0538fSMark Fasheh * 1268dcd0538fSMark Fasheh * The root node is handled below the loop. 1269dcd0538fSMark Fasheh * 1270dcd0538fSMark Fasheh * We begin the loop with right_el and left_el pointing to the 1271dcd0538fSMark Fasheh * leaf lists and work our way up. 1272dcd0538fSMark Fasheh * 1273dcd0538fSMark Fasheh * NOTE: within this loop, left_el and right_el always refer 1274dcd0538fSMark Fasheh * to the *child* lists. 1275dcd0538fSMark Fasheh */ 1276dcd0538fSMark Fasheh left_el = path_leaf_el(left_path); 1277dcd0538fSMark Fasheh right_el = path_leaf_el(right_path); 1278dcd0538fSMark Fasheh for(i = left_path->p_tree_depth - 1; i > subtree_index; i--) { 1279dcd0538fSMark Fasheh mlog(0, "Adjust records at index %u\n", i); 1280dcd0538fSMark Fasheh 1281dcd0538fSMark Fasheh /* 1282dcd0538fSMark Fasheh * One nice property of knowing that all of these 1283dcd0538fSMark Fasheh * nodes are below the root is that we only deal with 1284dcd0538fSMark Fasheh * the leftmost right node record and the rightmost 1285dcd0538fSMark Fasheh * left node record. 1286dcd0538fSMark Fasheh */ 1287dcd0538fSMark Fasheh el = left_path->p_node[i].el; 1288dcd0538fSMark Fasheh idx = le16_to_cpu(left_el->l_next_free_rec) - 1; 1289dcd0538fSMark Fasheh left_rec = &el->l_recs[idx]; 1290dcd0538fSMark Fasheh 1291dcd0538fSMark Fasheh el = right_path->p_node[i].el; 1292dcd0538fSMark Fasheh right_rec = &el->l_recs[0]; 1293dcd0538fSMark Fasheh 1294dcd0538fSMark Fasheh ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec, 1295dcd0538fSMark Fasheh right_el); 1296dcd0538fSMark Fasheh 1297dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, left_path->p_node[i].bh); 1298dcd0538fSMark Fasheh if (ret) 1299dcd0538fSMark Fasheh mlog_errno(ret); 1300dcd0538fSMark Fasheh 1301dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, right_path->p_node[i].bh); 1302dcd0538fSMark Fasheh if (ret) 1303dcd0538fSMark Fasheh mlog_errno(ret); 1304dcd0538fSMark Fasheh 1305dcd0538fSMark Fasheh /* 1306dcd0538fSMark Fasheh * Setup our list pointers now so that the current 1307dcd0538fSMark Fasheh * parents become children in the next iteration. 1308dcd0538fSMark Fasheh */ 1309dcd0538fSMark Fasheh left_el = left_path->p_node[i].el; 1310dcd0538fSMark Fasheh right_el = right_path->p_node[i].el; 1311dcd0538fSMark Fasheh } 1312dcd0538fSMark Fasheh 1313dcd0538fSMark Fasheh /* 1314dcd0538fSMark Fasheh * At the root node, adjust the two adjacent records which 1315dcd0538fSMark Fasheh * begin our path to the leaves. 1316dcd0538fSMark Fasheh */ 1317dcd0538fSMark Fasheh 1318dcd0538fSMark Fasheh el = left_path->p_node[subtree_index].el; 1319dcd0538fSMark Fasheh left_el = left_path->p_node[subtree_index + 1].el; 1320dcd0538fSMark Fasheh right_el = right_path->p_node[subtree_index + 1].el; 1321dcd0538fSMark Fasheh 1322dcd0538fSMark Fasheh ocfs2_adjust_root_records(el, left_el, right_el, 1323dcd0538fSMark Fasheh left_path->p_node[subtree_index + 1].bh->b_blocknr); 1324dcd0538fSMark Fasheh 1325dcd0538fSMark Fasheh root_bh = left_path->p_node[subtree_index].bh; 1326dcd0538fSMark Fasheh 1327dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, root_bh); 1328dcd0538fSMark Fasheh if (ret) 1329dcd0538fSMark Fasheh mlog_errno(ret); 1330dcd0538fSMark Fasheh } 1331dcd0538fSMark Fasheh 1332dcd0538fSMark Fasheh static int ocfs2_rotate_subtree_right(struct inode *inode, 1333dcd0538fSMark Fasheh handle_t *handle, 1334dcd0538fSMark Fasheh struct ocfs2_path *left_path, 1335dcd0538fSMark Fasheh struct ocfs2_path *right_path, 1336dcd0538fSMark Fasheh int subtree_index) 1337dcd0538fSMark Fasheh { 1338dcd0538fSMark Fasheh int ret, i; 1339dcd0538fSMark Fasheh struct buffer_head *right_leaf_bh; 1340dcd0538fSMark Fasheh struct buffer_head *left_leaf_bh = NULL; 1341dcd0538fSMark Fasheh struct buffer_head *root_bh; 1342dcd0538fSMark Fasheh struct ocfs2_extent_list *right_el, *left_el; 1343dcd0538fSMark Fasheh struct ocfs2_extent_rec move_rec; 1344dcd0538fSMark Fasheh 1345dcd0538fSMark Fasheh left_leaf_bh = path_leaf_bh(left_path); 1346dcd0538fSMark Fasheh left_el = path_leaf_el(left_path); 1347dcd0538fSMark Fasheh 1348dcd0538fSMark Fasheh if (left_el->l_next_free_rec != left_el->l_count) { 1349dcd0538fSMark Fasheh ocfs2_error(inode->i_sb, 1350dcd0538fSMark Fasheh "Inode %llu has non-full interior leaf node %llu" 1351dcd0538fSMark Fasheh "(next free = %u)", 1352dcd0538fSMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1353dcd0538fSMark Fasheh (unsigned long long)left_leaf_bh->b_blocknr, 1354dcd0538fSMark Fasheh le16_to_cpu(left_el->l_next_free_rec)); 1355dcd0538fSMark Fasheh return -EROFS; 1356dcd0538fSMark Fasheh } 1357dcd0538fSMark Fasheh 1358dcd0538fSMark Fasheh /* 1359dcd0538fSMark Fasheh * This extent block may already have an empty record, so we 1360dcd0538fSMark Fasheh * return early if so. 1361dcd0538fSMark Fasheh */ 1362dcd0538fSMark Fasheh if (ocfs2_is_empty_extent(&left_el->l_recs[0])) 1363dcd0538fSMark Fasheh return 0; 1364dcd0538fSMark Fasheh 1365dcd0538fSMark Fasheh root_bh = left_path->p_node[subtree_index].bh; 1366dcd0538fSMark Fasheh BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 1367dcd0538fSMark Fasheh 1368dcd0538fSMark Fasheh ret = ocfs2_journal_access(handle, inode, root_bh, 1369dcd0538fSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1370dcd0538fSMark Fasheh if (ret) { 1371dcd0538fSMark Fasheh mlog_errno(ret); 1372dcd0538fSMark Fasheh goto out; 1373dcd0538fSMark Fasheh } 1374dcd0538fSMark Fasheh 1375dcd0538fSMark Fasheh for(i = subtree_index + 1; i < path_num_items(right_path); i++) { 1376dcd0538fSMark Fasheh ret = ocfs2_journal_access(handle, inode, 1377dcd0538fSMark Fasheh right_path->p_node[i].bh, 1378dcd0538fSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1379dcd0538fSMark Fasheh if (ret) { 1380dcd0538fSMark Fasheh mlog_errno(ret); 1381dcd0538fSMark Fasheh goto out; 1382dcd0538fSMark Fasheh } 1383dcd0538fSMark Fasheh 1384dcd0538fSMark Fasheh ret = ocfs2_journal_access(handle, inode, 1385dcd0538fSMark Fasheh left_path->p_node[i].bh, 1386dcd0538fSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1387dcd0538fSMark Fasheh if (ret) { 1388dcd0538fSMark Fasheh mlog_errno(ret); 1389dcd0538fSMark Fasheh goto out; 1390dcd0538fSMark Fasheh } 1391dcd0538fSMark Fasheh } 1392dcd0538fSMark Fasheh 1393dcd0538fSMark Fasheh right_leaf_bh = path_leaf_bh(right_path); 1394dcd0538fSMark Fasheh right_el = path_leaf_el(right_path); 1395dcd0538fSMark Fasheh 1396dcd0538fSMark Fasheh /* This is a code error, not a disk corruption. */ 1397dcd0538fSMark Fasheh mlog_bug_on_msg(!right_el->l_next_free_rec, "Inode %llu: Rotate fails " 1398dcd0538fSMark Fasheh "because rightmost leaf block %llu is empty\n", 1399dcd0538fSMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1400dcd0538fSMark Fasheh (unsigned long long)right_leaf_bh->b_blocknr); 1401dcd0538fSMark Fasheh 1402dcd0538fSMark Fasheh ocfs2_create_empty_extent(right_el); 1403dcd0538fSMark Fasheh 1404dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, right_leaf_bh); 1405dcd0538fSMark Fasheh if (ret) { 1406dcd0538fSMark Fasheh mlog_errno(ret); 1407dcd0538fSMark Fasheh goto out; 1408dcd0538fSMark Fasheh } 1409dcd0538fSMark Fasheh 1410dcd0538fSMark Fasheh /* Do the copy now. */ 1411dcd0538fSMark Fasheh i = le16_to_cpu(left_el->l_next_free_rec) - 1; 1412dcd0538fSMark Fasheh move_rec = left_el->l_recs[i]; 1413dcd0538fSMark Fasheh right_el->l_recs[0] = move_rec; 1414dcd0538fSMark Fasheh 1415dcd0538fSMark Fasheh /* 1416dcd0538fSMark Fasheh * Clear out the record we just copied and shift everything 1417dcd0538fSMark Fasheh * over, leaving an empty extent in the left leaf. 1418dcd0538fSMark Fasheh * 1419dcd0538fSMark Fasheh * We temporarily subtract from next_free_rec so that the 1420dcd0538fSMark Fasheh * shift will lose the tail record (which is now defunct). 1421dcd0538fSMark Fasheh */ 1422dcd0538fSMark Fasheh le16_add_cpu(&left_el->l_next_free_rec, -1); 1423dcd0538fSMark Fasheh ocfs2_shift_records_right(left_el); 1424dcd0538fSMark Fasheh memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); 1425dcd0538fSMark Fasheh le16_add_cpu(&left_el->l_next_free_rec, 1); 1426dcd0538fSMark Fasheh 1427dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, left_leaf_bh); 1428dcd0538fSMark Fasheh if (ret) { 1429dcd0538fSMark Fasheh mlog_errno(ret); 1430dcd0538fSMark Fasheh goto out; 1431dcd0538fSMark Fasheh } 1432dcd0538fSMark Fasheh 1433dcd0538fSMark Fasheh ocfs2_complete_edge_insert(inode, handle, left_path, right_path, 1434dcd0538fSMark Fasheh subtree_index); 1435dcd0538fSMark Fasheh 1436dcd0538fSMark Fasheh out: 1437dcd0538fSMark Fasheh return ret; 1438dcd0538fSMark Fasheh } 1439dcd0538fSMark Fasheh 1440dcd0538fSMark Fasheh /* 1441dcd0538fSMark Fasheh * Given a full path, determine what cpos value would return us a path 1442dcd0538fSMark Fasheh * containing the leaf immediately to the left of the current one. 1443dcd0538fSMark Fasheh * 1444dcd0538fSMark Fasheh * Will return zero if the path passed in is already the leftmost path. 1445dcd0538fSMark Fasheh */ 1446dcd0538fSMark Fasheh static int ocfs2_find_cpos_for_left_leaf(struct super_block *sb, 1447dcd0538fSMark Fasheh struct ocfs2_path *path, u32 *cpos) 1448dcd0538fSMark Fasheh { 1449dcd0538fSMark Fasheh int i, j, ret = 0; 1450dcd0538fSMark Fasheh u64 blkno; 1451dcd0538fSMark Fasheh struct ocfs2_extent_list *el; 1452dcd0538fSMark Fasheh 1453dcd0538fSMark Fasheh *cpos = 0; 1454dcd0538fSMark Fasheh 1455dcd0538fSMark Fasheh blkno = path_leaf_bh(path)->b_blocknr; 1456dcd0538fSMark Fasheh 1457dcd0538fSMark Fasheh /* Start at the tree node just above the leaf and work our way up. */ 1458dcd0538fSMark Fasheh i = path->p_tree_depth - 1; 1459dcd0538fSMark Fasheh while (i >= 0) { 1460dcd0538fSMark Fasheh el = path->p_node[i].el; 1461dcd0538fSMark Fasheh 1462dcd0538fSMark Fasheh /* 1463dcd0538fSMark Fasheh * Find the extent record just before the one in our 1464dcd0538fSMark Fasheh * path. 1465dcd0538fSMark Fasheh */ 1466dcd0538fSMark Fasheh for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) { 1467dcd0538fSMark Fasheh if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) { 1468dcd0538fSMark Fasheh if (j == 0) { 1469dcd0538fSMark Fasheh if (i == 0) { 1470dcd0538fSMark Fasheh /* 1471dcd0538fSMark Fasheh * We've determined that the 1472dcd0538fSMark Fasheh * path specified is already 1473dcd0538fSMark Fasheh * the leftmost one - return a 1474dcd0538fSMark Fasheh * cpos of zero. 1475dcd0538fSMark Fasheh */ 1476dcd0538fSMark Fasheh goto out; 1477dcd0538fSMark Fasheh } 1478dcd0538fSMark Fasheh /* 1479dcd0538fSMark Fasheh * The leftmost record points to our 1480dcd0538fSMark Fasheh * leaf - we need to travel up the 1481dcd0538fSMark Fasheh * tree one level. 1482dcd0538fSMark Fasheh */ 1483dcd0538fSMark Fasheh goto next_node; 1484dcd0538fSMark Fasheh } 1485dcd0538fSMark Fasheh 1486dcd0538fSMark Fasheh *cpos = le32_to_cpu(el->l_recs[j - 1].e_cpos); 1487dcd0538fSMark Fasheh *cpos = *cpos + le32_to_cpu(el->l_recs[j - 1].e_clusters) - 1; 1488dcd0538fSMark Fasheh goto out; 1489dcd0538fSMark Fasheh } 1490dcd0538fSMark Fasheh } 1491dcd0538fSMark Fasheh 1492dcd0538fSMark Fasheh /* 1493dcd0538fSMark Fasheh * If we got here, we never found a valid node where 1494dcd0538fSMark Fasheh * the tree indicated one should be. 1495dcd0538fSMark Fasheh */ 1496dcd0538fSMark Fasheh ocfs2_error(sb, 1497dcd0538fSMark Fasheh "Invalid extent tree at extent block %llu\n", 1498dcd0538fSMark Fasheh (unsigned long long)blkno); 1499dcd0538fSMark Fasheh ret = -EROFS; 1500dcd0538fSMark Fasheh goto out; 1501dcd0538fSMark Fasheh 1502dcd0538fSMark Fasheh next_node: 1503dcd0538fSMark Fasheh blkno = path->p_node[i].bh->b_blocknr; 1504dcd0538fSMark Fasheh i--; 1505dcd0538fSMark Fasheh } 1506dcd0538fSMark Fasheh 1507dcd0538fSMark Fasheh out: 1508dcd0538fSMark Fasheh return ret; 1509dcd0538fSMark Fasheh } 1510dcd0538fSMark Fasheh 1511dcd0538fSMark Fasheh static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth, 1512dcd0538fSMark Fasheh struct ocfs2_path *path) 1513dcd0538fSMark Fasheh { 1514dcd0538fSMark Fasheh int credits = (path->p_tree_depth - subtree_depth) * 2 + 1; 1515dcd0538fSMark Fasheh 1516dcd0538fSMark Fasheh if (handle->h_buffer_credits < credits) 1517dcd0538fSMark Fasheh return ocfs2_extend_trans(handle, credits); 1518dcd0538fSMark Fasheh 1519dcd0538fSMark Fasheh return 0; 1520dcd0538fSMark Fasheh } 1521dcd0538fSMark Fasheh 1522dcd0538fSMark Fasheh /* 1523dcd0538fSMark Fasheh * Trap the case where we're inserting into the theoretical range past 1524dcd0538fSMark Fasheh * the _actual_ left leaf range. Otherwise, we'll rotate a record 1525dcd0538fSMark Fasheh * whose cpos is less than ours into the right leaf. 1526dcd0538fSMark Fasheh * 1527dcd0538fSMark Fasheh * It's only necessary to look at the rightmost record of the left 1528dcd0538fSMark Fasheh * leaf because the logic that calls us should ensure that the 1529dcd0538fSMark Fasheh * theoretical ranges in the path components above the leaves are 1530dcd0538fSMark Fasheh * correct. 1531dcd0538fSMark Fasheh */ 1532dcd0538fSMark Fasheh static int ocfs2_rotate_requires_path_adjustment(struct ocfs2_path *left_path, 1533dcd0538fSMark Fasheh u32 insert_cpos) 1534dcd0538fSMark Fasheh { 1535dcd0538fSMark Fasheh struct ocfs2_extent_list *left_el; 1536dcd0538fSMark Fasheh struct ocfs2_extent_rec *rec; 1537dcd0538fSMark Fasheh int next_free; 1538dcd0538fSMark Fasheh 1539dcd0538fSMark Fasheh left_el = path_leaf_el(left_path); 1540dcd0538fSMark Fasheh next_free = le16_to_cpu(left_el->l_next_free_rec); 1541dcd0538fSMark Fasheh rec = &left_el->l_recs[next_free - 1]; 1542dcd0538fSMark Fasheh 1543dcd0538fSMark Fasheh if (insert_cpos > le32_to_cpu(rec->e_cpos)) 1544dcd0538fSMark Fasheh return 1; 1545dcd0538fSMark Fasheh return 0; 1546dcd0538fSMark Fasheh } 1547dcd0538fSMark Fasheh 1548dcd0538fSMark Fasheh /* 1549dcd0538fSMark Fasheh * Rotate all the records in a btree right one record, starting at insert_cpos. 1550dcd0538fSMark Fasheh * 1551dcd0538fSMark Fasheh * The path to the rightmost leaf should be passed in. 1552dcd0538fSMark Fasheh * 1553dcd0538fSMark Fasheh * The array is assumed to be large enough to hold an entire path (tree depth). 1554dcd0538fSMark Fasheh * 1555dcd0538fSMark Fasheh * Upon succesful return from this function: 1556dcd0538fSMark Fasheh * 1557dcd0538fSMark Fasheh * - The 'right_path' array will contain a path to the leaf block 1558dcd0538fSMark Fasheh * whose range contains e_cpos. 1559dcd0538fSMark Fasheh * - That leaf block will have a single empty extent in list index 0. 1560dcd0538fSMark Fasheh * - In the case that the rotation requires a post-insert update, 1561dcd0538fSMark Fasheh * *ret_left_path will contain a valid path which can be passed to 1562dcd0538fSMark Fasheh * ocfs2_insert_path(). 1563dcd0538fSMark Fasheh */ 1564dcd0538fSMark Fasheh static int ocfs2_rotate_tree_right(struct inode *inode, 1565dcd0538fSMark Fasheh handle_t *handle, 1566dcd0538fSMark Fasheh u32 insert_cpos, 1567dcd0538fSMark Fasheh struct ocfs2_path *right_path, 1568dcd0538fSMark Fasheh struct ocfs2_path **ret_left_path) 1569dcd0538fSMark Fasheh { 1570dcd0538fSMark Fasheh int ret, start; 1571dcd0538fSMark Fasheh u32 cpos; 1572dcd0538fSMark Fasheh struct ocfs2_path *left_path = NULL; 1573dcd0538fSMark Fasheh 1574dcd0538fSMark Fasheh *ret_left_path = NULL; 1575dcd0538fSMark Fasheh 1576dcd0538fSMark Fasheh left_path = ocfs2_new_path(path_root_bh(right_path), 1577dcd0538fSMark Fasheh path_root_el(right_path)); 1578dcd0538fSMark Fasheh if (!left_path) { 1579dcd0538fSMark Fasheh ret = -ENOMEM; 1580dcd0538fSMark Fasheh mlog_errno(ret); 1581dcd0538fSMark Fasheh goto out; 1582dcd0538fSMark Fasheh } 1583dcd0538fSMark Fasheh 1584dcd0538fSMark Fasheh ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, &cpos); 1585dcd0538fSMark Fasheh if (ret) { 1586dcd0538fSMark Fasheh mlog_errno(ret); 1587dcd0538fSMark Fasheh goto out; 1588dcd0538fSMark Fasheh } 1589dcd0538fSMark Fasheh 1590dcd0538fSMark Fasheh mlog(0, "Insert: %u, first left path cpos: %u\n", insert_cpos, cpos); 1591dcd0538fSMark Fasheh 1592dcd0538fSMark Fasheh /* 1593dcd0538fSMark Fasheh * What we want to do here is: 1594dcd0538fSMark Fasheh * 1595dcd0538fSMark Fasheh * 1) Start with the rightmost path. 1596dcd0538fSMark Fasheh * 1597dcd0538fSMark Fasheh * 2) Determine a path to the leaf block directly to the left 1598dcd0538fSMark Fasheh * of that leaf. 1599dcd0538fSMark Fasheh * 1600dcd0538fSMark Fasheh * 3) Determine the 'subtree root' - the lowest level tree node 1601dcd0538fSMark Fasheh * which contains a path to both leaves. 1602dcd0538fSMark Fasheh * 1603dcd0538fSMark Fasheh * 4) Rotate the subtree. 1604dcd0538fSMark Fasheh * 1605dcd0538fSMark Fasheh * 5) Find the next subtree by considering the left path to be 1606dcd0538fSMark Fasheh * the new right path. 1607dcd0538fSMark Fasheh * 1608dcd0538fSMark Fasheh * The check at the top of this while loop also accepts 1609dcd0538fSMark Fasheh * insert_cpos == cpos because cpos is only a _theoretical_ 1610dcd0538fSMark Fasheh * value to get us the left path - insert_cpos might very well 1611dcd0538fSMark Fasheh * be filling that hole. 1612dcd0538fSMark Fasheh * 1613dcd0538fSMark Fasheh * Stop at a cpos of '0' because we either started at the 1614dcd0538fSMark Fasheh * leftmost branch (i.e., a tree with one branch and a 1615dcd0538fSMark Fasheh * rotation inside of it), or we've gone as far as we can in 1616dcd0538fSMark Fasheh * rotating subtrees. 1617dcd0538fSMark Fasheh */ 1618dcd0538fSMark Fasheh while (cpos && insert_cpos <= cpos) { 1619dcd0538fSMark Fasheh mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n", 1620dcd0538fSMark Fasheh insert_cpos, cpos); 1621dcd0538fSMark Fasheh 1622dcd0538fSMark Fasheh ret = ocfs2_find_path(inode, left_path, cpos); 1623dcd0538fSMark Fasheh if (ret) { 1624dcd0538fSMark Fasheh mlog_errno(ret); 1625dcd0538fSMark Fasheh goto out; 1626dcd0538fSMark Fasheh } 1627dcd0538fSMark Fasheh 1628dcd0538fSMark Fasheh mlog_bug_on_msg(path_leaf_bh(left_path) == 1629dcd0538fSMark Fasheh path_leaf_bh(right_path), 1630dcd0538fSMark Fasheh "Inode %lu: error during insert of %u " 1631dcd0538fSMark Fasheh "(left path cpos %u) results in two identical " 1632dcd0538fSMark Fasheh "paths ending at %llu\n", 1633dcd0538fSMark Fasheh inode->i_ino, insert_cpos, cpos, 1634dcd0538fSMark Fasheh (unsigned long long) 1635dcd0538fSMark Fasheh path_leaf_bh(left_path)->b_blocknr); 1636dcd0538fSMark Fasheh 1637dcd0538fSMark Fasheh if (ocfs2_rotate_requires_path_adjustment(left_path, 1638dcd0538fSMark Fasheh insert_cpos)) { 1639dcd0538fSMark Fasheh mlog(0, "Path adjustment required\n"); 1640dcd0538fSMark Fasheh 1641dcd0538fSMark Fasheh /* 1642dcd0538fSMark Fasheh * We've rotated the tree as much as we 1643dcd0538fSMark Fasheh * should. The rest is up to 1644dcd0538fSMark Fasheh * ocfs2_insert_path() to complete, after the 1645dcd0538fSMark Fasheh * record insertion. We indicate this 1646dcd0538fSMark Fasheh * situation by returning the left path. 1647dcd0538fSMark Fasheh * 1648dcd0538fSMark Fasheh * The reason we don't adjust the records here 1649dcd0538fSMark Fasheh * before the record insert is that an error 1650dcd0538fSMark Fasheh * later might break the rule where a parent 1651dcd0538fSMark Fasheh * record e_cpos will reflect the actual 1652dcd0538fSMark Fasheh * e_cpos of the 1st nonempty record of the 1653dcd0538fSMark Fasheh * child list. 1654dcd0538fSMark Fasheh */ 1655dcd0538fSMark Fasheh *ret_left_path = left_path; 1656dcd0538fSMark Fasheh goto out_ret_path; 1657dcd0538fSMark Fasheh } 1658dcd0538fSMark Fasheh 1659dcd0538fSMark Fasheh start = ocfs2_find_subtree_root(inode, left_path, right_path); 1660dcd0538fSMark Fasheh 1661dcd0538fSMark Fasheh mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", 1662dcd0538fSMark Fasheh start, 1663dcd0538fSMark Fasheh (unsigned long long) right_path->p_node[start].bh->b_blocknr, 1664dcd0538fSMark Fasheh right_path->p_tree_depth); 1665dcd0538fSMark Fasheh 1666dcd0538fSMark Fasheh ret = ocfs2_extend_rotate_transaction(handle, start, 1667dcd0538fSMark Fasheh right_path); 1668dcd0538fSMark Fasheh if (ret) { 1669dcd0538fSMark Fasheh mlog_errno(ret); 1670dcd0538fSMark Fasheh goto out; 1671dcd0538fSMark Fasheh } 1672dcd0538fSMark Fasheh 1673dcd0538fSMark Fasheh ret = ocfs2_rotate_subtree_right(inode, handle, left_path, 1674dcd0538fSMark Fasheh right_path, start); 1675dcd0538fSMark Fasheh if (ret) { 1676dcd0538fSMark Fasheh mlog_errno(ret); 1677dcd0538fSMark Fasheh goto out; 1678dcd0538fSMark Fasheh } 1679dcd0538fSMark Fasheh 1680dcd0538fSMark Fasheh /* 1681dcd0538fSMark Fasheh * There is no need to re-read the next right path 1682dcd0538fSMark Fasheh * as we know that it'll be our current left 1683dcd0538fSMark Fasheh * path. Optimize by copying values instead. 1684dcd0538fSMark Fasheh */ 1685dcd0538fSMark Fasheh ocfs2_mv_path(right_path, left_path); 1686dcd0538fSMark Fasheh 1687dcd0538fSMark Fasheh ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, 1688dcd0538fSMark Fasheh &cpos); 1689dcd0538fSMark Fasheh if (ret) { 1690dcd0538fSMark Fasheh mlog_errno(ret); 1691dcd0538fSMark Fasheh goto out; 1692dcd0538fSMark Fasheh } 1693dcd0538fSMark Fasheh } 1694dcd0538fSMark Fasheh 1695dcd0538fSMark Fasheh out: 1696dcd0538fSMark Fasheh ocfs2_free_path(left_path); 1697dcd0538fSMark Fasheh 1698dcd0538fSMark Fasheh out_ret_path: 1699dcd0538fSMark Fasheh return ret; 1700dcd0538fSMark Fasheh } 1701dcd0538fSMark Fasheh 1702dcd0538fSMark Fasheh /* 1703dcd0538fSMark Fasheh * Do the final bits of extent record insertion at the target leaf 1704dcd0538fSMark Fasheh * list. If this leaf is part of an allocation tree, it is assumed 1705dcd0538fSMark Fasheh * that the tree above has been prepared. 1706dcd0538fSMark Fasheh */ 1707dcd0538fSMark Fasheh static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, 1708dcd0538fSMark Fasheh struct ocfs2_extent_list *el, 1709dcd0538fSMark Fasheh struct ocfs2_insert_type *insert, 1710dcd0538fSMark Fasheh struct inode *inode) 1711dcd0538fSMark Fasheh { 1712dcd0538fSMark Fasheh int i = insert->ins_contig_index; 1713dcd0538fSMark Fasheh unsigned int range; 1714dcd0538fSMark Fasheh struct ocfs2_extent_rec *rec; 1715dcd0538fSMark Fasheh 1716dcd0538fSMark Fasheh BUG_ON(el->l_tree_depth); 1717dcd0538fSMark Fasheh 1718dcd0538fSMark Fasheh /* 1719dcd0538fSMark Fasheh * Contiguous insert - either left or right. 1720dcd0538fSMark Fasheh */ 1721dcd0538fSMark Fasheh if (insert->ins_contig != CONTIG_NONE) { 1722dcd0538fSMark Fasheh rec = &el->l_recs[i]; 1723dcd0538fSMark Fasheh if (insert->ins_contig == CONTIG_LEFT) { 1724dcd0538fSMark Fasheh rec->e_blkno = insert_rec->e_blkno; 1725dcd0538fSMark Fasheh rec->e_cpos = insert_rec->e_cpos; 1726dcd0538fSMark Fasheh } 1727dcd0538fSMark Fasheh le32_add_cpu(&rec->e_clusters, 1728dcd0538fSMark Fasheh le32_to_cpu(insert_rec->e_clusters)); 1729dcd0538fSMark Fasheh return; 1730dcd0538fSMark Fasheh } 1731dcd0538fSMark Fasheh 1732dcd0538fSMark Fasheh /* 1733dcd0538fSMark Fasheh * Handle insert into an empty leaf. 1734dcd0538fSMark Fasheh */ 1735dcd0538fSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 0 || 1736dcd0538fSMark Fasheh ((le16_to_cpu(el->l_next_free_rec) == 1) && 1737dcd0538fSMark Fasheh ocfs2_is_empty_extent(&el->l_recs[0]))) { 1738dcd0538fSMark Fasheh el->l_recs[0] = *insert_rec; 1739dcd0538fSMark Fasheh el->l_next_free_rec = cpu_to_le16(1); 1740dcd0538fSMark Fasheh return; 1741dcd0538fSMark Fasheh } 1742dcd0538fSMark Fasheh 1743dcd0538fSMark Fasheh /* 1744dcd0538fSMark Fasheh * Appending insert. 1745dcd0538fSMark Fasheh */ 1746dcd0538fSMark Fasheh if (insert->ins_appending == APPEND_TAIL) { 1747dcd0538fSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 1748dcd0538fSMark Fasheh rec = &el->l_recs[i]; 1749dcd0538fSMark Fasheh range = le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters); 1750dcd0538fSMark Fasheh BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range); 1751dcd0538fSMark Fasheh 1752dcd0538fSMark Fasheh mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >= 1753dcd0538fSMark Fasheh le16_to_cpu(el->l_count), 1754dcd0538fSMark Fasheh "inode %lu, depth %u, count %u, next free %u, " 1755dcd0538fSMark Fasheh "rec.cpos %u, rec.clusters %u, " 1756dcd0538fSMark Fasheh "insert.cpos %u, insert.clusters %u\n", 1757dcd0538fSMark Fasheh inode->i_ino, 1758dcd0538fSMark Fasheh le16_to_cpu(el->l_tree_depth), 1759dcd0538fSMark Fasheh le16_to_cpu(el->l_count), 1760dcd0538fSMark Fasheh le16_to_cpu(el->l_next_free_rec), 1761dcd0538fSMark Fasheh le32_to_cpu(el->l_recs[i].e_cpos), 1762dcd0538fSMark Fasheh le32_to_cpu(el->l_recs[i].e_clusters), 1763dcd0538fSMark Fasheh le32_to_cpu(insert_rec->e_cpos), 1764dcd0538fSMark Fasheh le32_to_cpu(insert_rec->e_clusters)); 1765dcd0538fSMark Fasheh i++; 1766dcd0538fSMark Fasheh el->l_recs[i] = *insert_rec; 1767dcd0538fSMark Fasheh le16_add_cpu(&el->l_next_free_rec, 1); 1768dcd0538fSMark Fasheh return; 1769dcd0538fSMark Fasheh } 1770dcd0538fSMark Fasheh 1771dcd0538fSMark Fasheh /* 1772dcd0538fSMark Fasheh * Ok, we have to rotate. 1773dcd0538fSMark Fasheh * 1774dcd0538fSMark Fasheh * At this point, it is safe to assume that inserting into an 1775dcd0538fSMark Fasheh * empty leaf and appending to a leaf have both been handled 1776dcd0538fSMark Fasheh * above. 1777dcd0538fSMark Fasheh * 1778dcd0538fSMark Fasheh * This leaf needs to have space, either by the empty 1st 1779dcd0538fSMark Fasheh * extent record, or by virtue of an l_next_rec < l_count. 1780dcd0538fSMark Fasheh */ 1781dcd0538fSMark Fasheh ocfs2_rotate_leaf(el, insert_rec); 1782dcd0538fSMark Fasheh } 1783dcd0538fSMark Fasheh 1784dcd0538fSMark Fasheh static inline void ocfs2_update_dinode_clusters(struct inode *inode, 1785dcd0538fSMark Fasheh struct ocfs2_dinode *di, 1786dcd0538fSMark Fasheh u32 clusters) 1787dcd0538fSMark Fasheh { 1788dcd0538fSMark Fasheh le32_add_cpu(&di->i_clusters, clusters); 1789dcd0538fSMark Fasheh spin_lock(&OCFS2_I(inode)->ip_lock); 1790dcd0538fSMark Fasheh OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); 1791dcd0538fSMark Fasheh spin_unlock(&OCFS2_I(inode)->ip_lock); 1792dcd0538fSMark Fasheh } 1793dcd0538fSMark Fasheh 1794dcd0538fSMark Fasheh static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, 1795dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec, 1796dcd0538fSMark Fasheh struct ocfs2_path *right_path, 1797dcd0538fSMark Fasheh struct ocfs2_path **ret_left_path) 1798dcd0538fSMark Fasheh { 1799dcd0538fSMark Fasheh int ret, i, next_free; 1800dcd0538fSMark Fasheh struct buffer_head *bh; 1801dcd0538fSMark Fasheh struct ocfs2_extent_list *el; 1802dcd0538fSMark Fasheh struct ocfs2_path *left_path = NULL; 1803dcd0538fSMark Fasheh 1804dcd0538fSMark Fasheh *ret_left_path = NULL; 1805dcd0538fSMark Fasheh 1806dcd0538fSMark Fasheh /* 1807dcd0538fSMark Fasheh * If our appending insert is at the leftmost edge of a leaf, 1808dcd0538fSMark Fasheh * then we might need to update the rightmost records of the 1809dcd0538fSMark Fasheh * neighboring path. 1810dcd0538fSMark Fasheh */ 1811dcd0538fSMark Fasheh el = path_leaf_el(right_path); 1812dcd0538fSMark Fasheh next_free = le16_to_cpu(el->l_next_free_rec); 1813dcd0538fSMark Fasheh if (next_free == 0 || 1814dcd0538fSMark Fasheh (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) { 1815dcd0538fSMark Fasheh u32 left_cpos; 1816dcd0538fSMark Fasheh 1817dcd0538fSMark Fasheh ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, 1818dcd0538fSMark Fasheh &left_cpos); 1819dcd0538fSMark Fasheh if (ret) { 1820dcd0538fSMark Fasheh mlog_errno(ret); 1821dcd0538fSMark Fasheh goto out; 1822dcd0538fSMark Fasheh } 1823dcd0538fSMark Fasheh 1824dcd0538fSMark Fasheh mlog(0, "Append may need a left path update. cpos: %u, " 1825dcd0538fSMark Fasheh "left_cpos: %u\n", le32_to_cpu(insert_rec->e_cpos), 1826dcd0538fSMark Fasheh left_cpos); 1827dcd0538fSMark Fasheh 1828dcd0538fSMark Fasheh /* 1829dcd0538fSMark Fasheh * No need to worry if the append is already in the 1830dcd0538fSMark Fasheh * leftmost leaf. 1831dcd0538fSMark Fasheh */ 1832dcd0538fSMark Fasheh if (left_cpos) { 1833dcd0538fSMark Fasheh left_path = ocfs2_new_path(path_root_bh(right_path), 1834dcd0538fSMark Fasheh path_root_el(right_path)); 1835dcd0538fSMark Fasheh if (!left_path) { 1836dcd0538fSMark Fasheh ret = -ENOMEM; 1837dcd0538fSMark Fasheh mlog_errno(ret); 1838dcd0538fSMark Fasheh goto out; 1839dcd0538fSMark Fasheh } 1840dcd0538fSMark Fasheh 1841dcd0538fSMark Fasheh ret = ocfs2_find_path(inode, left_path, left_cpos); 1842dcd0538fSMark Fasheh if (ret) { 1843dcd0538fSMark Fasheh mlog_errno(ret); 1844dcd0538fSMark Fasheh goto out; 1845dcd0538fSMark Fasheh } 1846dcd0538fSMark Fasheh 1847dcd0538fSMark Fasheh /* 1848dcd0538fSMark Fasheh * ocfs2_insert_path() will pass the left_path to the 1849dcd0538fSMark Fasheh * journal for us. 1850dcd0538fSMark Fasheh */ 1851dcd0538fSMark Fasheh } 1852dcd0538fSMark Fasheh } 1853dcd0538fSMark Fasheh 1854dcd0538fSMark Fasheh ret = ocfs2_journal_access_path(inode, handle, right_path); 1855dcd0538fSMark Fasheh if (ret) { 1856dcd0538fSMark Fasheh mlog_errno(ret); 1857dcd0538fSMark Fasheh goto out; 1858dcd0538fSMark Fasheh } 1859dcd0538fSMark Fasheh 1860dcd0538fSMark Fasheh el = path_root_el(right_path); 1861dcd0538fSMark Fasheh bh = path_root_bh(right_path); 1862dcd0538fSMark Fasheh i = 0; 1863dcd0538fSMark Fasheh while (1) { 1864dcd0538fSMark Fasheh next_free = le16_to_cpu(el->l_next_free_rec); 1865dcd0538fSMark Fasheh if (next_free == 0) { 1866dcd0538fSMark Fasheh ocfs2_error(inode->i_sb, 1867dcd0538fSMark Fasheh "Dinode %llu has a bad extent list", 1868dcd0538fSMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 1869dcd0538fSMark Fasheh ret = -EIO; 1870dcd0538fSMark Fasheh goto out; 1871dcd0538fSMark Fasheh } 1872dcd0538fSMark Fasheh 1873dcd0538fSMark Fasheh el->l_recs[next_free - 1].e_clusters = insert_rec->e_cpos; 1874dcd0538fSMark Fasheh le32_add_cpu(&el->l_recs[next_free - 1].e_clusters, 1875dcd0538fSMark Fasheh le32_to_cpu(insert_rec->e_clusters)); 1876dcd0538fSMark Fasheh le32_add_cpu(&el->l_recs[next_free - 1].e_clusters, 1877dcd0538fSMark Fasheh -le32_to_cpu(el->l_recs[next_free - 1].e_cpos)); 1878dcd0538fSMark Fasheh 1879dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, bh); 1880dcd0538fSMark Fasheh if (ret) 1881dcd0538fSMark Fasheh mlog_errno(ret); 1882dcd0538fSMark Fasheh 1883dcd0538fSMark Fasheh if (++i >= right_path->p_tree_depth) 1884dcd0538fSMark Fasheh break; 1885dcd0538fSMark Fasheh 1886dcd0538fSMark Fasheh bh = right_path->p_node[i].bh; 1887dcd0538fSMark Fasheh el = right_path->p_node[i].el; 1888dcd0538fSMark Fasheh } 1889dcd0538fSMark Fasheh 1890dcd0538fSMark Fasheh *ret_left_path = left_path; 1891dcd0538fSMark Fasheh ret = 0; 1892dcd0538fSMark Fasheh out: 1893dcd0538fSMark Fasheh if (ret != 0) 1894dcd0538fSMark Fasheh ocfs2_free_path(left_path); 1895dcd0538fSMark Fasheh 1896dcd0538fSMark Fasheh return ret; 1897dcd0538fSMark Fasheh } 1898dcd0538fSMark Fasheh 1899dcd0538fSMark Fasheh /* 1900dcd0538fSMark Fasheh * This function only does inserts on an allocation b-tree. For dinode 1901dcd0538fSMark Fasheh * lists, ocfs2_insert_at_leaf() is called directly. 1902dcd0538fSMark Fasheh * 1903dcd0538fSMark Fasheh * right_path is the path we want to do the actual insert 1904dcd0538fSMark Fasheh * in. left_path should only be passed in if we need to update that 1905dcd0538fSMark Fasheh * portion of the tree after an edge insert. 1906dcd0538fSMark Fasheh */ 1907dcd0538fSMark Fasheh static int ocfs2_insert_path(struct inode *inode, 1908dcd0538fSMark Fasheh handle_t *handle, 1909dcd0538fSMark Fasheh struct ocfs2_path *left_path, 1910dcd0538fSMark Fasheh struct ocfs2_path *right_path, 1911dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec, 1912dcd0538fSMark Fasheh struct ocfs2_insert_type *insert) 1913dcd0538fSMark Fasheh { 1914dcd0538fSMark Fasheh int ret, subtree_index; 1915dcd0538fSMark Fasheh struct buffer_head *leaf_bh = path_leaf_bh(right_path); 1916dcd0538fSMark Fasheh struct ocfs2_extent_list *el; 1917dcd0538fSMark Fasheh 1918dcd0538fSMark Fasheh /* 1919dcd0538fSMark Fasheh * Pass both paths to the journal. The majority of inserts 1920dcd0538fSMark Fasheh * will be touching all components anyway. 1921dcd0538fSMark Fasheh */ 1922dcd0538fSMark Fasheh ret = ocfs2_journal_access_path(inode, handle, right_path); 1923dcd0538fSMark Fasheh if (ret < 0) { 1924dcd0538fSMark Fasheh mlog_errno(ret); 1925dcd0538fSMark Fasheh goto out; 1926dcd0538fSMark Fasheh } 1927dcd0538fSMark Fasheh 1928dcd0538fSMark Fasheh if (left_path) { 1929dcd0538fSMark Fasheh int credits = handle->h_buffer_credits; 1930dcd0538fSMark Fasheh 1931dcd0538fSMark Fasheh /* 1932dcd0538fSMark Fasheh * There's a chance that left_path got passed back to 1933dcd0538fSMark Fasheh * us without being accounted for in the 1934dcd0538fSMark Fasheh * journal. Extend our transaction here to be sure we 1935dcd0538fSMark Fasheh * can change those blocks. 1936dcd0538fSMark Fasheh */ 1937dcd0538fSMark Fasheh credits += left_path->p_tree_depth; 1938dcd0538fSMark Fasheh 1939dcd0538fSMark Fasheh ret = ocfs2_extend_trans(handle, credits); 1940dcd0538fSMark Fasheh if (ret < 0) { 1941dcd0538fSMark Fasheh mlog_errno(ret); 1942dcd0538fSMark Fasheh goto out; 1943dcd0538fSMark Fasheh } 1944dcd0538fSMark Fasheh 1945dcd0538fSMark Fasheh ret = ocfs2_journal_access_path(inode, handle, left_path); 1946dcd0538fSMark Fasheh if (ret < 0) { 1947dcd0538fSMark Fasheh mlog_errno(ret); 1948dcd0538fSMark Fasheh goto out; 1949dcd0538fSMark Fasheh } 1950dcd0538fSMark Fasheh } 1951dcd0538fSMark Fasheh 1952dcd0538fSMark Fasheh el = path_leaf_el(right_path); 1953dcd0538fSMark Fasheh 1954dcd0538fSMark Fasheh ocfs2_insert_at_leaf(insert_rec, el, insert, inode); 1955dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, leaf_bh); 1956dcd0538fSMark Fasheh if (ret) 1957dcd0538fSMark Fasheh mlog_errno(ret); 1958dcd0538fSMark Fasheh 1959dcd0538fSMark Fasheh if (left_path) { 1960dcd0538fSMark Fasheh /* 1961dcd0538fSMark Fasheh * The rotate code has indicated that we need to fix 1962dcd0538fSMark Fasheh * up portions of the tree after the insert. 1963dcd0538fSMark Fasheh * 1964dcd0538fSMark Fasheh * XXX: Should we extend the transaction here? 1965dcd0538fSMark Fasheh */ 1966dcd0538fSMark Fasheh subtree_index = ocfs2_find_subtree_root(inode, left_path, 1967dcd0538fSMark Fasheh right_path); 1968dcd0538fSMark Fasheh ocfs2_complete_edge_insert(inode, handle, left_path, 1969dcd0538fSMark Fasheh right_path, subtree_index); 1970dcd0538fSMark Fasheh } 1971dcd0538fSMark Fasheh 1972dcd0538fSMark Fasheh ret = 0; 1973dcd0538fSMark Fasheh out: 1974dcd0538fSMark Fasheh return ret; 1975dcd0538fSMark Fasheh } 1976dcd0538fSMark Fasheh 1977dcd0538fSMark Fasheh static int ocfs2_do_insert_extent(struct inode *inode, 1978dcd0538fSMark Fasheh handle_t *handle, 1979dcd0538fSMark Fasheh struct buffer_head *di_bh, 1980dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec, 1981dcd0538fSMark Fasheh struct ocfs2_insert_type *type) 1982dcd0538fSMark Fasheh { 1983dcd0538fSMark Fasheh int ret, rotate = 0; 1984dcd0538fSMark Fasheh u32 cpos; 1985dcd0538fSMark Fasheh struct ocfs2_path *right_path = NULL; 1986dcd0538fSMark Fasheh struct ocfs2_path *left_path = NULL; 1987dcd0538fSMark Fasheh struct ocfs2_dinode *di; 1988dcd0538fSMark Fasheh struct ocfs2_extent_list *el; 1989dcd0538fSMark Fasheh 1990dcd0538fSMark Fasheh di = (struct ocfs2_dinode *) di_bh->b_data; 1991dcd0538fSMark Fasheh el = &di->id2.i_list; 1992dcd0538fSMark Fasheh 1993dcd0538fSMark Fasheh ret = ocfs2_journal_access(handle, inode, di_bh, 1994dcd0538fSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 1995dcd0538fSMark Fasheh if (ret) { 1996dcd0538fSMark Fasheh mlog_errno(ret); 1997dcd0538fSMark Fasheh goto out; 1998dcd0538fSMark Fasheh } 1999dcd0538fSMark Fasheh 2000dcd0538fSMark Fasheh if (le16_to_cpu(el->l_tree_depth) == 0) { 2001dcd0538fSMark Fasheh ocfs2_insert_at_leaf(insert_rec, el, type, inode); 2002dcd0538fSMark Fasheh goto out_update_clusters; 2003dcd0538fSMark Fasheh } 2004dcd0538fSMark Fasheh 2005dcd0538fSMark Fasheh right_path = ocfs2_new_inode_path(di_bh); 2006dcd0538fSMark Fasheh if (!right_path) { 2007dcd0538fSMark Fasheh ret = -ENOMEM; 2008dcd0538fSMark Fasheh mlog_errno(ret); 2009dcd0538fSMark Fasheh goto out; 2010dcd0538fSMark Fasheh } 2011dcd0538fSMark Fasheh 2012dcd0538fSMark Fasheh /* 2013dcd0538fSMark Fasheh * Determine the path to start with. Rotations need the 2014dcd0538fSMark Fasheh * rightmost path, everything else can go directly to the 2015dcd0538fSMark Fasheh * target leaf. 2016dcd0538fSMark Fasheh */ 2017dcd0538fSMark Fasheh cpos = le32_to_cpu(insert_rec->e_cpos); 2018dcd0538fSMark Fasheh if (type->ins_appending == APPEND_NONE && 2019dcd0538fSMark Fasheh type->ins_contig == CONTIG_NONE) { 2020dcd0538fSMark Fasheh rotate = 1; 2021dcd0538fSMark Fasheh cpos = UINT_MAX; 2022dcd0538fSMark Fasheh } 2023dcd0538fSMark Fasheh 2024dcd0538fSMark Fasheh ret = ocfs2_find_path(inode, right_path, cpos); 2025dcd0538fSMark Fasheh if (ret) { 2026dcd0538fSMark Fasheh mlog_errno(ret); 2027dcd0538fSMark Fasheh goto out; 2028dcd0538fSMark Fasheh } 2029dcd0538fSMark Fasheh 2030dcd0538fSMark Fasheh /* 2031dcd0538fSMark Fasheh * Rotations and appends need special treatment - they modify 2032dcd0538fSMark Fasheh * parts of the tree's above them. 2033dcd0538fSMark Fasheh * 2034dcd0538fSMark Fasheh * Both might pass back a path immediate to the left of the 2035dcd0538fSMark Fasheh * one being inserted to. This will be cause 2036dcd0538fSMark Fasheh * ocfs2_insert_path() to modify the rightmost records of 2037dcd0538fSMark Fasheh * left_path to account for an edge insert. 2038dcd0538fSMark Fasheh * 2039dcd0538fSMark Fasheh * XXX: When modifying this code, keep in mind that an insert 2040dcd0538fSMark Fasheh * can wind up skipping both of these two special cases... 2041dcd0538fSMark Fasheh */ 2042dcd0538fSMark Fasheh if (rotate) { 2043dcd0538fSMark Fasheh ret = ocfs2_rotate_tree_right(inode, handle, 2044dcd0538fSMark Fasheh le32_to_cpu(insert_rec->e_cpos), 2045dcd0538fSMark Fasheh right_path, &left_path); 2046dcd0538fSMark Fasheh if (ret) { 2047dcd0538fSMark Fasheh mlog_errno(ret); 2048dcd0538fSMark Fasheh goto out; 2049dcd0538fSMark Fasheh } 2050dcd0538fSMark Fasheh } else if (type->ins_appending == APPEND_TAIL 2051dcd0538fSMark Fasheh && type->ins_contig != CONTIG_LEFT) { 2052dcd0538fSMark Fasheh ret = ocfs2_append_rec_to_path(inode, handle, insert_rec, 2053dcd0538fSMark Fasheh right_path, &left_path); 2054dcd0538fSMark Fasheh if (ret) { 2055dcd0538fSMark Fasheh mlog_errno(ret); 2056dcd0538fSMark Fasheh goto out; 2057dcd0538fSMark Fasheh } 2058dcd0538fSMark Fasheh } 2059dcd0538fSMark Fasheh 2060dcd0538fSMark Fasheh ret = ocfs2_insert_path(inode, handle, left_path, right_path, 2061dcd0538fSMark Fasheh insert_rec, type); 2062dcd0538fSMark Fasheh if (ret) { 2063dcd0538fSMark Fasheh mlog_errno(ret); 2064dcd0538fSMark Fasheh goto out; 2065dcd0538fSMark Fasheh } 2066dcd0538fSMark Fasheh 2067dcd0538fSMark Fasheh out_update_clusters: 2068dcd0538fSMark Fasheh ocfs2_update_dinode_clusters(inode, di, 2069dcd0538fSMark Fasheh le32_to_cpu(insert_rec->e_clusters)); 2070dcd0538fSMark Fasheh 2071dcd0538fSMark Fasheh ret = ocfs2_journal_dirty(handle, di_bh); 2072dcd0538fSMark Fasheh if (ret) 2073dcd0538fSMark Fasheh mlog_errno(ret); 2074dcd0538fSMark Fasheh 2075dcd0538fSMark Fasheh out: 2076dcd0538fSMark Fasheh ocfs2_free_path(left_path); 2077dcd0538fSMark Fasheh ocfs2_free_path(right_path); 2078dcd0538fSMark Fasheh 2079dcd0538fSMark Fasheh return ret; 2080dcd0538fSMark Fasheh } 2081dcd0538fSMark Fasheh 2082dcd0538fSMark Fasheh static void ocfs2_figure_contig_type(struct inode *inode, 2083dcd0538fSMark Fasheh struct ocfs2_insert_type *insert, 2084dcd0538fSMark Fasheh struct ocfs2_extent_list *el, 2085dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec) 2086dcd0538fSMark Fasheh { 2087dcd0538fSMark Fasheh int i; 2088dcd0538fSMark Fasheh enum ocfs2_contig_type contig_type = CONTIG_NONE; 2089dcd0538fSMark Fasheh 2090dcd0538fSMark Fasheh for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 2091dcd0538fSMark Fasheh contig_type = ocfs2_extent_contig(inode, &el->l_recs[i], 2092dcd0538fSMark Fasheh insert_rec); 2093dcd0538fSMark Fasheh if (contig_type != CONTIG_NONE) { 2094dcd0538fSMark Fasheh insert->ins_contig_index = i; 2095dcd0538fSMark Fasheh break; 2096dcd0538fSMark Fasheh } 2097dcd0538fSMark Fasheh } 2098dcd0538fSMark Fasheh insert->ins_contig = contig_type; 2099dcd0538fSMark Fasheh } 2100dcd0538fSMark Fasheh 2101dcd0538fSMark Fasheh /* 2102dcd0538fSMark Fasheh * This should only be called against the righmost leaf extent list. 2103dcd0538fSMark Fasheh * 2104dcd0538fSMark Fasheh * ocfs2_figure_appending_type() will figure out whether we'll have to 2105dcd0538fSMark Fasheh * insert at the tail of the rightmost leaf. 2106dcd0538fSMark Fasheh * 2107dcd0538fSMark Fasheh * This should also work against the dinode list for tree's with 0 2108dcd0538fSMark Fasheh * depth. If we consider the dinode list to be the rightmost leaf node 2109dcd0538fSMark Fasheh * then the logic here makes sense. 2110dcd0538fSMark Fasheh */ 2111dcd0538fSMark Fasheh static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert, 2112dcd0538fSMark Fasheh struct ocfs2_extent_list *el, 2113dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec) 2114dcd0538fSMark Fasheh { 2115dcd0538fSMark Fasheh int i; 2116dcd0538fSMark Fasheh u32 cpos = le32_to_cpu(insert_rec->e_cpos); 2117dcd0538fSMark Fasheh struct ocfs2_extent_rec *rec; 2118dcd0538fSMark Fasheh 2119dcd0538fSMark Fasheh insert->ins_appending = APPEND_NONE; 2120dcd0538fSMark Fasheh 2121dcd0538fSMark Fasheh BUG_ON(el->l_tree_depth); 2122dcd0538fSMark Fasheh 2123dcd0538fSMark Fasheh if (!el->l_next_free_rec) 2124dcd0538fSMark Fasheh goto set_tail_append; 2125dcd0538fSMark Fasheh 2126dcd0538fSMark Fasheh if (ocfs2_is_empty_extent(&el->l_recs[0])) { 2127dcd0538fSMark Fasheh /* Were all records empty? */ 2128dcd0538fSMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 1) 2129dcd0538fSMark Fasheh goto set_tail_append; 2130dcd0538fSMark Fasheh } 2131dcd0538fSMark Fasheh 2132dcd0538fSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 2133dcd0538fSMark Fasheh rec = &el->l_recs[i]; 2134dcd0538fSMark Fasheh 2135dcd0538fSMark Fasheh if (cpos >= (le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters))) 2136dcd0538fSMark Fasheh goto set_tail_append; 2137dcd0538fSMark Fasheh 2138dcd0538fSMark Fasheh return; 2139dcd0538fSMark Fasheh 2140dcd0538fSMark Fasheh set_tail_append: 2141dcd0538fSMark Fasheh insert->ins_appending = APPEND_TAIL; 2142dcd0538fSMark Fasheh } 2143dcd0538fSMark Fasheh 2144dcd0538fSMark Fasheh /* 2145dcd0538fSMark Fasheh * Helper function called at the begining of an insert. 2146dcd0538fSMark Fasheh * 2147dcd0538fSMark Fasheh * This computes a few things that are commonly used in the process of 2148dcd0538fSMark Fasheh * inserting into the btree: 2149dcd0538fSMark Fasheh * - Whether the new extent is contiguous with an existing one. 2150dcd0538fSMark Fasheh * - The current tree depth. 2151dcd0538fSMark Fasheh * - Whether the insert is an appending one. 2152dcd0538fSMark Fasheh * - The total # of free records in the tree. 2153dcd0538fSMark Fasheh * 2154dcd0538fSMark Fasheh * All of the information is stored on the ocfs2_insert_type 2155dcd0538fSMark Fasheh * structure. 2156dcd0538fSMark Fasheh */ 2157dcd0538fSMark Fasheh static int ocfs2_figure_insert_type(struct inode *inode, 2158dcd0538fSMark Fasheh struct buffer_head *di_bh, 2159dcd0538fSMark Fasheh struct buffer_head **last_eb_bh, 2160dcd0538fSMark Fasheh struct ocfs2_extent_rec *insert_rec, 2161dcd0538fSMark Fasheh struct ocfs2_insert_type *insert) 2162dcd0538fSMark Fasheh { 2163dcd0538fSMark Fasheh int ret; 2164dcd0538fSMark Fasheh struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2165dcd0538fSMark Fasheh struct ocfs2_extent_block *eb; 2166dcd0538fSMark Fasheh struct ocfs2_extent_list *el; 2167dcd0538fSMark Fasheh struct ocfs2_path *path = NULL; 2168dcd0538fSMark Fasheh struct buffer_head *bh = NULL; 2169dcd0538fSMark Fasheh 2170dcd0538fSMark Fasheh el = &di->id2.i_list; 2171dcd0538fSMark Fasheh insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); 2172dcd0538fSMark Fasheh 2173dcd0538fSMark Fasheh if (el->l_tree_depth) { 2174dcd0538fSMark Fasheh /* 2175dcd0538fSMark Fasheh * If we have tree depth, we read in the 2176dcd0538fSMark Fasheh * rightmost extent block ahead of time as 2177dcd0538fSMark Fasheh * ocfs2_figure_insert_type() and ocfs2_add_branch() 2178dcd0538fSMark Fasheh * may want it later. 2179dcd0538fSMark Fasheh */ 2180dcd0538fSMark Fasheh ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 2181dcd0538fSMark Fasheh le64_to_cpu(di->i_last_eb_blk), &bh, 2182dcd0538fSMark Fasheh OCFS2_BH_CACHED, inode); 2183dcd0538fSMark Fasheh if (ret) { 2184dcd0538fSMark Fasheh mlog_exit(ret); 2185dcd0538fSMark Fasheh goto out; 2186dcd0538fSMark Fasheh } 2187dcd0538fSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 2188dcd0538fSMark Fasheh el = &eb->h_list; 2189dcd0538fSMark Fasheh } 2190dcd0538fSMark Fasheh 2191dcd0538fSMark Fasheh /* 2192dcd0538fSMark Fasheh * Unless we have a contiguous insert, we'll need to know if 2193dcd0538fSMark Fasheh * there is room left in our allocation tree for another 2194dcd0538fSMark Fasheh * extent record. 2195dcd0538fSMark Fasheh * 2196dcd0538fSMark Fasheh * XXX: This test is simplistic, we can search for empty 2197dcd0538fSMark Fasheh * extent records too. 2198dcd0538fSMark Fasheh */ 2199dcd0538fSMark Fasheh insert->ins_free_records = le16_to_cpu(el->l_count) - 2200dcd0538fSMark Fasheh le16_to_cpu(el->l_next_free_rec); 2201dcd0538fSMark Fasheh 2202dcd0538fSMark Fasheh if (!insert->ins_tree_depth) { 2203dcd0538fSMark Fasheh ocfs2_figure_contig_type(inode, insert, el, insert_rec); 2204dcd0538fSMark Fasheh ocfs2_figure_appending_type(insert, el, insert_rec); 2205dcd0538fSMark Fasheh return 0; 2206dcd0538fSMark Fasheh } 2207dcd0538fSMark Fasheh 2208dcd0538fSMark Fasheh path = ocfs2_new_inode_path(di_bh); 2209dcd0538fSMark Fasheh if (!path) { 2210dcd0538fSMark Fasheh ret = -ENOMEM; 2211dcd0538fSMark Fasheh mlog_errno(ret); 2212dcd0538fSMark Fasheh goto out; 2213dcd0538fSMark Fasheh } 2214dcd0538fSMark Fasheh 2215dcd0538fSMark Fasheh /* 2216dcd0538fSMark Fasheh * In the case that we're inserting past what the tree 2217dcd0538fSMark Fasheh * currently accounts for, ocfs2_find_path() will return for 2218dcd0538fSMark Fasheh * us the rightmost tree path. This is accounted for below in 2219dcd0538fSMark Fasheh * the appending code. 2220dcd0538fSMark Fasheh */ 2221dcd0538fSMark Fasheh ret = ocfs2_find_path(inode, path, le32_to_cpu(insert_rec->e_cpos)); 2222dcd0538fSMark Fasheh if (ret) { 2223dcd0538fSMark Fasheh mlog_errno(ret); 2224dcd0538fSMark Fasheh goto out; 2225dcd0538fSMark Fasheh } 2226dcd0538fSMark Fasheh 2227dcd0538fSMark Fasheh el = path_leaf_el(path); 2228dcd0538fSMark Fasheh 2229dcd0538fSMark Fasheh /* 2230dcd0538fSMark Fasheh * Now that we have the path, there's two things we want to determine: 2231dcd0538fSMark Fasheh * 1) Contiguousness (also set contig_index if this is so) 2232dcd0538fSMark Fasheh * 2233dcd0538fSMark Fasheh * 2) Are we doing an append? We can trivially break this up 2234dcd0538fSMark Fasheh * into two types of appends: simple record append, or a 2235dcd0538fSMark Fasheh * rotate inside the tail leaf. 2236dcd0538fSMark Fasheh */ 2237dcd0538fSMark Fasheh ocfs2_figure_contig_type(inode, insert, el, insert_rec); 2238dcd0538fSMark Fasheh 2239dcd0538fSMark Fasheh /* 2240dcd0538fSMark Fasheh * The insert code isn't quite ready to deal with all cases of 2241dcd0538fSMark Fasheh * left contiguousness. Specifically, if it's an insert into 2242dcd0538fSMark Fasheh * the 1st record in a leaf, it will require the adjustment of 2243dcd0538fSMark Fasheh * e_clusters on the last record of the path directly to it's 2244dcd0538fSMark Fasheh * left. For now, just catch that case and fool the layers 2245dcd0538fSMark Fasheh * above us. This works just fine for tree_depth == 0, which 2246dcd0538fSMark Fasheh * is why we allow that above. 2247dcd0538fSMark Fasheh */ 2248dcd0538fSMark Fasheh if (insert->ins_contig == CONTIG_LEFT && 2249dcd0538fSMark Fasheh insert->ins_contig_index == 0) 2250dcd0538fSMark Fasheh insert->ins_contig = CONTIG_NONE; 2251dcd0538fSMark Fasheh 2252dcd0538fSMark Fasheh /* 2253dcd0538fSMark Fasheh * Ok, so we can simply compare against last_eb to figure out 2254dcd0538fSMark Fasheh * whether the path doesn't exist. This will only happen in 2255dcd0538fSMark Fasheh * the case that we're doing a tail append, so maybe we can 2256dcd0538fSMark Fasheh * take advantage of that information somehow. 2257dcd0538fSMark Fasheh */ 2258dcd0538fSMark Fasheh if (le64_to_cpu(di->i_last_eb_blk) == path_leaf_bh(path)->b_blocknr) { 2259dcd0538fSMark Fasheh /* 2260dcd0538fSMark Fasheh * Ok, ocfs2_find_path() returned us the rightmost 2261dcd0538fSMark Fasheh * tree path. This might be an appending insert. There are 2262dcd0538fSMark Fasheh * two cases: 2263dcd0538fSMark Fasheh * 1) We're doing a true append at the tail: 2264dcd0538fSMark Fasheh * -This might even be off the end of the leaf 2265dcd0538fSMark Fasheh * 2) We're "appending" by rotating in the tail 2266dcd0538fSMark Fasheh */ 2267dcd0538fSMark Fasheh ocfs2_figure_appending_type(insert, el, insert_rec); 2268dcd0538fSMark Fasheh } 2269dcd0538fSMark Fasheh 2270dcd0538fSMark Fasheh out: 2271dcd0538fSMark Fasheh ocfs2_free_path(path); 2272dcd0538fSMark Fasheh 2273dcd0538fSMark Fasheh if (ret == 0) 2274dcd0538fSMark Fasheh *last_eb_bh = bh; 2275dcd0538fSMark Fasheh else 2276dcd0538fSMark Fasheh brelse(bh); 2277dcd0538fSMark Fasheh return ret; 2278dcd0538fSMark Fasheh } 2279dcd0538fSMark Fasheh 2280dcd0538fSMark Fasheh /* 2281dcd0538fSMark Fasheh * Insert an extent into an inode btree. 2282dcd0538fSMark Fasheh * 2283dcd0538fSMark Fasheh * The caller needs to update fe->i_clusters 2284dcd0538fSMark Fasheh */ 2285ccd979bdSMark Fasheh int ocfs2_insert_extent(struct ocfs2_super *osb, 22861fabe148SMark Fasheh handle_t *handle, 2287ccd979bdSMark Fasheh struct inode *inode, 2288ccd979bdSMark Fasheh struct buffer_head *fe_bh, 2289dcd0538fSMark Fasheh u32 cpos, 2290ccd979bdSMark Fasheh u64 start_blk, 2291ccd979bdSMark Fasheh u32 new_clusters, 2292ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac) 2293ccd979bdSMark Fasheh { 2294dcd0538fSMark Fasheh int status, shift; 2295ccd979bdSMark Fasheh struct buffer_head *last_eb_bh = NULL; 2296ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 2297dcd0538fSMark Fasheh struct ocfs2_insert_type insert = {0, }; 2298dcd0538fSMark Fasheh struct ocfs2_extent_rec rec; 2299ccd979bdSMark Fasheh 2300dcd0538fSMark Fasheh mlog(0, "add %u clusters at position %u to inode %llu\n", 2301dcd0538fSMark Fasheh new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); 2302ccd979bdSMark Fasheh 2303dcd0538fSMark Fasheh mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && 2304dcd0538fSMark Fasheh (OCFS2_I(inode)->ip_clusters != cpos), 2305dcd0538fSMark Fasheh "Device %s, asking for sparse allocation: inode %llu, " 2306dcd0538fSMark Fasheh "cpos %u, clusters %u\n", 2307dcd0538fSMark Fasheh osb->dev_str, 2308dcd0538fSMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, 2309dcd0538fSMark Fasheh OCFS2_I(inode)->ip_clusters); 2310ccd979bdSMark Fasheh 2311dcd0538fSMark Fasheh rec.e_cpos = cpu_to_le32(cpos); 2312dcd0538fSMark Fasheh rec.e_blkno = cpu_to_le64(start_blk); 2313dcd0538fSMark Fasheh rec.e_clusters = cpu_to_le32(new_clusters); 2314ccd979bdSMark Fasheh 2315dcd0538fSMark Fasheh status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, 2316dcd0538fSMark Fasheh &insert); 2317ccd979bdSMark Fasheh if (status < 0) { 2318dcd0538fSMark Fasheh mlog_errno(status); 2319ccd979bdSMark Fasheh goto bail; 2320ccd979bdSMark Fasheh } 2321ccd979bdSMark Fasheh 2322dcd0538fSMark Fasheh mlog(0, "Insert.appending: %u, Insert.Contig: %u, " 2323dcd0538fSMark Fasheh "Insert.contig_index: %d, Insert.free_records: %d, " 2324dcd0538fSMark Fasheh "Insert.tree_depth: %d\n", 2325dcd0538fSMark Fasheh insert.ins_appending, insert.ins_contig, insert.ins_contig_index, 2326dcd0538fSMark Fasheh insert.ins_free_records, insert.ins_tree_depth); 2327dcd0538fSMark Fasheh 2328dcd0538fSMark Fasheh /* 2329dcd0538fSMark Fasheh * Avoid growing the tree unless we're out of records and the 2330dcd0538fSMark Fasheh * insert type requres one. 2331dcd0538fSMark Fasheh */ 2332dcd0538fSMark Fasheh if (insert.ins_contig != CONTIG_NONE || insert.ins_free_records) 2333ccd979bdSMark Fasheh goto out_add; 2334ccd979bdSMark Fasheh 2335ccd979bdSMark Fasheh shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh); 2336ccd979bdSMark Fasheh if (shift < 0) { 2337ccd979bdSMark Fasheh status = shift; 2338ccd979bdSMark Fasheh mlog_errno(status); 2339ccd979bdSMark Fasheh goto bail; 2340ccd979bdSMark Fasheh } 2341ccd979bdSMark Fasheh 2342ccd979bdSMark Fasheh /* We traveled all the way to the bottom of the allocation tree 2343ccd979bdSMark Fasheh * and didn't find room for any more extents - we need to add 2344ccd979bdSMark Fasheh * another tree level */ 2345ccd979bdSMark Fasheh if (shift) { 2346ccd979bdSMark Fasheh BUG_ON(bh); 2347dcd0538fSMark Fasheh mlog(0, "need to shift tree depth " 2348dcd0538fSMark Fasheh "(current = %d)\n", insert.ins_tree_depth); 2349ccd979bdSMark Fasheh 2350ccd979bdSMark Fasheh /* ocfs2_shift_tree_depth will return us a buffer with 2351ccd979bdSMark Fasheh * the new extent block (so we can pass that to 2352ccd979bdSMark Fasheh * ocfs2_add_branch). */ 2353ccd979bdSMark Fasheh status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh, 2354ccd979bdSMark Fasheh meta_ac, &bh); 2355ccd979bdSMark Fasheh if (status < 0) { 2356ccd979bdSMark Fasheh mlog_errno(status); 2357ccd979bdSMark Fasheh goto bail; 2358ccd979bdSMark Fasheh } 2359dcd0538fSMark Fasheh insert.ins_tree_depth++; 2360ccd979bdSMark Fasheh /* Special case: we have room now if we shifted from 2361ccd979bdSMark Fasheh * tree_depth 0 */ 2362dcd0538fSMark Fasheh if (insert.ins_tree_depth == 1) 2363ccd979bdSMark Fasheh goto out_add; 2364ccd979bdSMark Fasheh } 2365ccd979bdSMark Fasheh 2366ccd979bdSMark Fasheh /* call ocfs2_add_branch to add the final part of the tree with 2367ccd979bdSMark Fasheh * the new data. */ 2368dcd0538fSMark Fasheh mlog(0, "add branch. bh = %p\n", bh); 2369ccd979bdSMark Fasheh status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh, 2370ccd979bdSMark Fasheh meta_ac); 2371ccd979bdSMark Fasheh if (status < 0) { 2372ccd979bdSMark Fasheh mlog_errno(status); 2373ccd979bdSMark Fasheh goto bail; 2374ccd979bdSMark Fasheh } 2375ccd979bdSMark Fasheh 2376ccd979bdSMark Fasheh out_add: 2377dcd0538fSMark Fasheh /* Finally, we can add clusters. This might rotate the tree for us. */ 2378dcd0538fSMark Fasheh status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); 2379ccd979bdSMark Fasheh if (status < 0) 2380ccd979bdSMark Fasheh mlog_errno(status); 2381ccd979bdSMark Fasheh 2382ccd979bdSMark Fasheh bail: 2383ccd979bdSMark Fasheh if (bh) 2384ccd979bdSMark Fasheh brelse(bh); 2385ccd979bdSMark Fasheh 2386ccd979bdSMark Fasheh if (last_eb_bh) 2387ccd979bdSMark Fasheh brelse(last_eb_bh); 2388ccd979bdSMark Fasheh 2389ccd979bdSMark Fasheh mlog_exit(status); 2390ccd979bdSMark Fasheh return status; 2391ccd979bdSMark Fasheh } 2392ccd979bdSMark Fasheh 2393ccd979bdSMark Fasheh static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) 2394ccd979bdSMark Fasheh { 2395ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 2396ccd979bdSMark Fasheh struct ocfs2_dinode *di; 2397ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 2398ccd979bdSMark Fasheh 2399ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 2400ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 2401ccd979bdSMark Fasheh 2402ccd979bdSMark Fasheh mlog_bug_on_msg(le16_to_cpu(tl->tl_used) > le16_to_cpu(tl->tl_count), 2403ccd979bdSMark Fasheh "slot %d, invalid truncate log parameters: used = " 2404ccd979bdSMark Fasheh "%u, count = %u\n", osb->slot_num, 2405ccd979bdSMark Fasheh le16_to_cpu(tl->tl_used), le16_to_cpu(tl->tl_count)); 2406ccd979bdSMark Fasheh return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count); 2407ccd979bdSMark Fasheh } 2408ccd979bdSMark Fasheh 2409ccd979bdSMark Fasheh static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl, 2410ccd979bdSMark Fasheh unsigned int new_start) 2411ccd979bdSMark Fasheh { 2412ccd979bdSMark Fasheh unsigned int tail_index; 2413ccd979bdSMark Fasheh unsigned int current_tail; 2414ccd979bdSMark Fasheh 2415ccd979bdSMark Fasheh /* No records, nothing to coalesce */ 2416ccd979bdSMark Fasheh if (!le16_to_cpu(tl->tl_used)) 2417ccd979bdSMark Fasheh return 0; 2418ccd979bdSMark Fasheh 2419ccd979bdSMark Fasheh tail_index = le16_to_cpu(tl->tl_used) - 1; 2420ccd979bdSMark Fasheh current_tail = le32_to_cpu(tl->tl_recs[tail_index].t_start); 2421ccd979bdSMark Fasheh current_tail += le32_to_cpu(tl->tl_recs[tail_index].t_clusters); 2422ccd979bdSMark Fasheh 2423ccd979bdSMark Fasheh return current_tail == new_start; 2424ccd979bdSMark Fasheh } 2425ccd979bdSMark Fasheh 2426ccd979bdSMark Fasheh static int ocfs2_truncate_log_append(struct ocfs2_super *osb, 24271fabe148SMark Fasheh handle_t *handle, 2428ccd979bdSMark Fasheh u64 start_blk, 2429ccd979bdSMark Fasheh unsigned int num_clusters) 2430ccd979bdSMark Fasheh { 2431ccd979bdSMark Fasheh int status, index; 2432ccd979bdSMark Fasheh unsigned int start_cluster, tl_count; 2433ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 2434ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 2435ccd979bdSMark Fasheh struct ocfs2_dinode *di; 2436ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 2437ccd979bdSMark Fasheh 2438b0697053SMark Fasheh mlog_entry("start_blk = %llu, num_clusters = %u\n", 2439b0697053SMark Fasheh (unsigned long long)start_blk, num_clusters); 2440ccd979bdSMark Fasheh 24411b1dcc1bSJes Sorensen BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 2442ccd979bdSMark Fasheh 2443ccd979bdSMark Fasheh start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); 2444ccd979bdSMark Fasheh 2445ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 2446ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 2447ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(di)) { 2448ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); 2449ccd979bdSMark Fasheh status = -EIO; 2450ccd979bdSMark Fasheh goto bail; 2451ccd979bdSMark Fasheh } 2452ccd979bdSMark Fasheh 2453ccd979bdSMark Fasheh tl_count = le16_to_cpu(tl->tl_count); 2454ccd979bdSMark Fasheh mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) || 2455ccd979bdSMark Fasheh tl_count == 0, 2456b0697053SMark Fasheh "Truncate record count on #%llu invalid " 2457b0697053SMark Fasheh "wanted %u, actual %u\n", 2458b0697053SMark Fasheh (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, 2459ccd979bdSMark Fasheh ocfs2_truncate_recs_per_inode(osb->sb), 2460ccd979bdSMark Fasheh le16_to_cpu(tl->tl_count)); 2461ccd979bdSMark Fasheh 2462ccd979bdSMark Fasheh /* Caller should have known to flush before calling us. */ 2463ccd979bdSMark Fasheh index = le16_to_cpu(tl->tl_used); 2464ccd979bdSMark Fasheh if (index >= tl_count) { 2465ccd979bdSMark Fasheh status = -ENOSPC; 2466ccd979bdSMark Fasheh mlog_errno(status); 2467ccd979bdSMark Fasheh goto bail; 2468ccd979bdSMark Fasheh } 2469ccd979bdSMark Fasheh 2470ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, tl_inode, tl_bh, 2471ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 2472ccd979bdSMark Fasheh if (status < 0) { 2473ccd979bdSMark Fasheh mlog_errno(status); 2474ccd979bdSMark Fasheh goto bail; 2475ccd979bdSMark Fasheh } 2476ccd979bdSMark Fasheh 2477ccd979bdSMark Fasheh mlog(0, "Log truncate of %u clusters starting at cluster %u to " 2478b0697053SMark Fasheh "%llu (index = %d)\n", num_clusters, start_cluster, 2479b0697053SMark Fasheh (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index); 2480ccd979bdSMark Fasheh 2481ccd979bdSMark Fasheh if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) { 2482ccd979bdSMark Fasheh /* 2483ccd979bdSMark Fasheh * Move index back to the record we are coalescing with. 2484ccd979bdSMark Fasheh * ocfs2_truncate_log_can_coalesce() guarantees nonzero 2485ccd979bdSMark Fasheh */ 2486ccd979bdSMark Fasheh index--; 2487ccd979bdSMark Fasheh 2488ccd979bdSMark Fasheh num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters); 2489ccd979bdSMark Fasheh mlog(0, "Coalesce with index %u (start = %u, clusters = %u)\n", 2490ccd979bdSMark Fasheh index, le32_to_cpu(tl->tl_recs[index].t_start), 2491ccd979bdSMark Fasheh num_clusters); 2492ccd979bdSMark Fasheh } else { 2493ccd979bdSMark Fasheh tl->tl_recs[index].t_start = cpu_to_le32(start_cluster); 2494ccd979bdSMark Fasheh tl->tl_used = cpu_to_le16(index + 1); 2495ccd979bdSMark Fasheh } 2496ccd979bdSMark Fasheh tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters); 2497ccd979bdSMark Fasheh 2498ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, tl_bh); 2499ccd979bdSMark Fasheh if (status < 0) { 2500ccd979bdSMark Fasheh mlog_errno(status); 2501ccd979bdSMark Fasheh goto bail; 2502ccd979bdSMark Fasheh } 2503ccd979bdSMark Fasheh 2504ccd979bdSMark Fasheh bail: 2505ccd979bdSMark Fasheh mlog_exit(status); 2506ccd979bdSMark Fasheh return status; 2507ccd979bdSMark Fasheh } 2508ccd979bdSMark Fasheh 2509ccd979bdSMark Fasheh static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, 25101fabe148SMark Fasheh handle_t *handle, 2511ccd979bdSMark Fasheh struct inode *data_alloc_inode, 2512ccd979bdSMark Fasheh struct buffer_head *data_alloc_bh) 2513ccd979bdSMark Fasheh { 2514ccd979bdSMark Fasheh int status = 0; 2515ccd979bdSMark Fasheh int i; 2516ccd979bdSMark Fasheh unsigned int num_clusters; 2517ccd979bdSMark Fasheh u64 start_blk; 2518ccd979bdSMark Fasheh struct ocfs2_truncate_rec rec; 2519ccd979bdSMark Fasheh struct ocfs2_dinode *di; 2520ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 2521ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 2522ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 2523ccd979bdSMark Fasheh 2524ccd979bdSMark Fasheh mlog_entry_void(); 2525ccd979bdSMark Fasheh 2526ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 2527ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 2528ccd979bdSMark Fasheh i = le16_to_cpu(tl->tl_used) - 1; 2529ccd979bdSMark Fasheh while (i >= 0) { 2530ccd979bdSMark Fasheh /* Caller has given us at least enough credits to 2531ccd979bdSMark Fasheh * update the truncate log dinode */ 2532ccd979bdSMark Fasheh status = ocfs2_journal_access(handle, tl_inode, tl_bh, 2533ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 2534ccd979bdSMark Fasheh if (status < 0) { 2535ccd979bdSMark Fasheh mlog_errno(status); 2536ccd979bdSMark Fasheh goto bail; 2537ccd979bdSMark Fasheh } 2538ccd979bdSMark Fasheh 2539ccd979bdSMark Fasheh tl->tl_used = cpu_to_le16(i); 2540ccd979bdSMark Fasheh 2541ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, tl_bh); 2542ccd979bdSMark Fasheh if (status < 0) { 2543ccd979bdSMark Fasheh mlog_errno(status); 2544ccd979bdSMark Fasheh goto bail; 2545ccd979bdSMark Fasheh } 2546ccd979bdSMark Fasheh 2547ccd979bdSMark Fasheh /* TODO: Perhaps we can calculate the bulk of the 2548ccd979bdSMark Fasheh * credits up front rather than extending like 2549ccd979bdSMark Fasheh * this. */ 2550ccd979bdSMark Fasheh status = ocfs2_extend_trans(handle, 2551ccd979bdSMark Fasheh OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); 2552ccd979bdSMark Fasheh if (status < 0) { 2553ccd979bdSMark Fasheh mlog_errno(status); 2554ccd979bdSMark Fasheh goto bail; 2555ccd979bdSMark Fasheh } 2556ccd979bdSMark Fasheh 2557ccd979bdSMark Fasheh rec = tl->tl_recs[i]; 2558ccd979bdSMark Fasheh start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb, 2559ccd979bdSMark Fasheh le32_to_cpu(rec.t_start)); 2560ccd979bdSMark Fasheh num_clusters = le32_to_cpu(rec.t_clusters); 2561ccd979bdSMark Fasheh 2562ccd979bdSMark Fasheh /* if start_blk is not set, we ignore the record as 2563ccd979bdSMark Fasheh * invalid. */ 2564ccd979bdSMark Fasheh if (start_blk) { 2565ccd979bdSMark Fasheh mlog(0, "free record %d, start = %u, clusters = %u\n", 2566ccd979bdSMark Fasheh i, le32_to_cpu(rec.t_start), num_clusters); 2567ccd979bdSMark Fasheh 2568ccd979bdSMark Fasheh status = ocfs2_free_clusters(handle, data_alloc_inode, 2569ccd979bdSMark Fasheh data_alloc_bh, start_blk, 2570ccd979bdSMark Fasheh num_clusters); 2571ccd979bdSMark Fasheh if (status < 0) { 2572ccd979bdSMark Fasheh mlog_errno(status); 2573ccd979bdSMark Fasheh goto bail; 2574ccd979bdSMark Fasheh } 2575ccd979bdSMark Fasheh } 2576ccd979bdSMark Fasheh i--; 2577ccd979bdSMark Fasheh } 2578ccd979bdSMark Fasheh 2579ccd979bdSMark Fasheh bail: 2580ccd979bdSMark Fasheh mlog_exit(status); 2581ccd979bdSMark Fasheh return status; 2582ccd979bdSMark Fasheh } 2583ccd979bdSMark Fasheh 25841b1dcc1bSJes Sorensen /* Expects you to already be holding tl_inode->i_mutex */ 2585ccd979bdSMark Fasheh static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) 2586ccd979bdSMark Fasheh { 2587ccd979bdSMark Fasheh int status; 2588ccd979bdSMark Fasheh unsigned int num_to_flush; 25891fabe148SMark Fasheh handle_t *handle; 2590ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 2591ccd979bdSMark Fasheh struct inode *data_alloc_inode = NULL; 2592ccd979bdSMark Fasheh struct buffer_head *tl_bh = osb->osb_tl_bh; 2593ccd979bdSMark Fasheh struct buffer_head *data_alloc_bh = NULL; 2594ccd979bdSMark Fasheh struct ocfs2_dinode *di; 2595ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 2596ccd979bdSMark Fasheh 2597ccd979bdSMark Fasheh mlog_entry_void(); 2598ccd979bdSMark Fasheh 25991b1dcc1bSJes Sorensen BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 2600ccd979bdSMark Fasheh 2601ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 2602ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 2603ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(di)) { 2604ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); 2605ccd979bdSMark Fasheh status = -EIO; 2606e08dc8b9SMark Fasheh goto out; 2607ccd979bdSMark Fasheh } 2608ccd979bdSMark Fasheh 2609ccd979bdSMark Fasheh num_to_flush = le16_to_cpu(tl->tl_used); 2610b0697053SMark Fasheh mlog(0, "Flush %u records from truncate log #%llu\n", 2611b0697053SMark Fasheh num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno); 2612ccd979bdSMark Fasheh if (!num_to_flush) { 2613ccd979bdSMark Fasheh status = 0; 2614e08dc8b9SMark Fasheh goto out; 2615ccd979bdSMark Fasheh } 2616ccd979bdSMark Fasheh 2617ccd979bdSMark Fasheh data_alloc_inode = ocfs2_get_system_file_inode(osb, 2618ccd979bdSMark Fasheh GLOBAL_BITMAP_SYSTEM_INODE, 2619ccd979bdSMark Fasheh OCFS2_INVALID_SLOT); 2620ccd979bdSMark Fasheh if (!data_alloc_inode) { 2621ccd979bdSMark Fasheh status = -EINVAL; 2622ccd979bdSMark Fasheh mlog(ML_ERROR, "Could not get bitmap inode!\n"); 2623e08dc8b9SMark Fasheh goto out; 2624ccd979bdSMark Fasheh } 2625ccd979bdSMark Fasheh 2626e08dc8b9SMark Fasheh mutex_lock(&data_alloc_inode->i_mutex); 2627e08dc8b9SMark Fasheh 26284bcec184SMark Fasheh status = ocfs2_meta_lock(data_alloc_inode, &data_alloc_bh, 1); 2629ccd979bdSMark Fasheh if (status < 0) { 2630ccd979bdSMark Fasheh mlog_errno(status); 2631e08dc8b9SMark Fasheh goto out_mutex; 2632ccd979bdSMark Fasheh } 2633ccd979bdSMark Fasheh 263465eff9ccSMark Fasheh handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE); 2635ccd979bdSMark Fasheh if (IS_ERR(handle)) { 2636ccd979bdSMark Fasheh status = PTR_ERR(handle); 2637ccd979bdSMark Fasheh mlog_errno(status); 2638e08dc8b9SMark Fasheh goto out_unlock; 2639ccd979bdSMark Fasheh } 2640ccd979bdSMark Fasheh 2641ccd979bdSMark Fasheh status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode, 2642ccd979bdSMark Fasheh data_alloc_bh); 2643e08dc8b9SMark Fasheh if (status < 0) 2644ccd979bdSMark Fasheh mlog_errno(status); 2645ccd979bdSMark Fasheh 264602dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle); 2647ccd979bdSMark Fasheh 2648e08dc8b9SMark Fasheh out_unlock: 2649e08dc8b9SMark Fasheh brelse(data_alloc_bh); 2650e08dc8b9SMark Fasheh ocfs2_meta_unlock(data_alloc_inode, 1); 2651e08dc8b9SMark Fasheh 2652e08dc8b9SMark Fasheh out_mutex: 2653e08dc8b9SMark Fasheh mutex_unlock(&data_alloc_inode->i_mutex); 2654ccd979bdSMark Fasheh iput(data_alloc_inode); 2655ccd979bdSMark Fasheh 2656e08dc8b9SMark Fasheh out: 2657ccd979bdSMark Fasheh mlog_exit(status); 2658ccd979bdSMark Fasheh return status; 2659ccd979bdSMark Fasheh } 2660ccd979bdSMark Fasheh 2661ccd979bdSMark Fasheh int ocfs2_flush_truncate_log(struct ocfs2_super *osb) 2662ccd979bdSMark Fasheh { 2663ccd979bdSMark Fasheh int status; 2664ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 2665ccd979bdSMark Fasheh 26661b1dcc1bSJes Sorensen mutex_lock(&tl_inode->i_mutex); 2667ccd979bdSMark Fasheh status = __ocfs2_flush_truncate_log(osb); 26681b1dcc1bSJes Sorensen mutex_unlock(&tl_inode->i_mutex); 2669ccd979bdSMark Fasheh 2670ccd979bdSMark Fasheh return status; 2671ccd979bdSMark Fasheh } 2672ccd979bdSMark Fasheh 2673c4028958SDavid Howells static void ocfs2_truncate_log_worker(struct work_struct *work) 2674ccd979bdSMark Fasheh { 2675ccd979bdSMark Fasheh int status; 2676c4028958SDavid Howells struct ocfs2_super *osb = 2677c4028958SDavid Howells container_of(work, struct ocfs2_super, 2678c4028958SDavid Howells osb_truncate_log_wq.work); 2679ccd979bdSMark Fasheh 2680ccd979bdSMark Fasheh mlog_entry_void(); 2681ccd979bdSMark Fasheh 2682ccd979bdSMark Fasheh status = ocfs2_flush_truncate_log(osb); 2683ccd979bdSMark Fasheh if (status < 0) 2684ccd979bdSMark Fasheh mlog_errno(status); 2685ccd979bdSMark Fasheh 2686ccd979bdSMark Fasheh mlog_exit(status); 2687ccd979bdSMark Fasheh } 2688ccd979bdSMark Fasheh 2689ccd979bdSMark Fasheh #define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ) 2690ccd979bdSMark Fasheh void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb, 2691ccd979bdSMark Fasheh int cancel) 2692ccd979bdSMark Fasheh { 2693ccd979bdSMark Fasheh if (osb->osb_tl_inode) { 2694ccd979bdSMark Fasheh /* We want to push off log flushes while truncates are 2695ccd979bdSMark Fasheh * still running. */ 2696ccd979bdSMark Fasheh if (cancel) 2697ccd979bdSMark Fasheh cancel_delayed_work(&osb->osb_truncate_log_wq); 2698ccd979bdSMark Fasheh 2699ccd979bdSMark Fasheh queue_delayed_work(ocfs2_wq, &osb->osb_truncate_log_wq, 2700ccd979bdSMark Fasheh OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL); 2701ccd979bdSMark Fasheh } 2702ccd979bdSMark Fasheh } 2703ccd979bdSMark Fasheh 2704ccd979bdSMark Fasheh static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, 2705ccd979bdSMark Fasheh int slot_num, 2706ccd979bdSMark Fasheh struct inode **tl_inode, 2707ccd979bdSMark Fasheh struct buffer_head **tl_bh) 2708ccd979bdSMark Fasheh { 2709ccd979bdSMark Fasheh int status; 2710ccd979bdSMark Fasheh struct inode *inode = NULL; 2711ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 2712ccd979bdSMark Fasheh 2713ccd979bdSMark Fasheh inode = ocfs2_get_system_file_inode(osb, 2714ccd979bdSMark Fasheh TRUNCATE_LOG_SYSTEM_INODE, 2715ccd979bdSMark Fasheh slot_num); 2716ccd979bdSMark Fasheh if (!inode) { 2717ccd979bdSMark Fasheh status = -EINVAL; 2718ccd979bdSMark Fasheh mlog(ML_ERROR, "Could not get load truncate log inode!\n"); 2719ccd979bdSMark Fasheh goto bail; 2720ccd979bdSMark Fasheh } 2721ccd979bdSMark Fasheh 2722ccd979bdSMark Fasheh status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, 2723ccd979bdSMark Fasheh OCFS2_BH_CACHED, inode); 2724ccd979bdSMark Fasheh if (status < 0) { 2725ccd979bdSMark Fasheh iput(inode); 2726ccd979bdSMark Fasheh mlog_errno(status); 2727ccd979bdSMark Fasheh goto bail; 2728ccd979bdSMark Fasheh } 2729ccd979bdSMark Fasheh 2730ccd979bdSMark Fasheh *tl_inode = inode; 2731ccd979bdSMark Fasheh *tl_bh = bh; 2732ccd979bdSMark Fasheh bail: 2733ccd979bdSMark Fasheh mlog_exit(status); 2734ccd979bdSMark Fasheh return status; 2735ccd979bdSMark Fasheh } 2736ccd979bdSMark Fasheh 2737ccd979bdSMark Fasheh /* called during the 1st stage of node recovery. we stamp a clean 2738ccd979bdSMark Fasheh * truncate log and pass back a copy for processing later. if the 2739ccd979bdSMark Fasheh * truncate log does not require processing, a *tl_copy is set to 2740ccd979bdSMark Fasheh * NULL. */ 2741ccd979bdSMark Fasheh int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, 2742ccd979bdSMark Fasheh int slot_num, 2743ccd979bdSMark Fasheh struct ocfs2_dinode **tl_copy) 2744ccd979bdSMark Fasheh { 2745ccd979bdSMark Fasheh int status; 2746ccd979bdSMark Fasheh struct inode *tl_inode = NULL; 2747ccd979bdSMark Fasheh struct buffer_head *tl_bh = NULL; 2748ccd979bdSMark Fasheh struct ocfs2_dinode *di; 2749ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 2750ccd979bdSMark Fasheh 2751ccd979bdSMark Fasheh *tl_copy = NULL; 2752ccd979bdSMark Fasheh 2753ccd979bdSMark Fasheh mlog(0, "recover truncate log from slot %d\n", slot_num); 2754ccd979bdSMark Fasheh 2755ccd979bdSMark Fasheh status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh); 2756ccd979bdSMark Fasheh if (status < 0) { 2757ccd979bdSMark Fasheh mlog_errno(status); 2758ccd979bdSMark Fasheh goto bail; 2759ccd979bdSMark Fasheh } 2760ccd979bdSMark Fasheh 2761ccd979bdSMark Fasheh di = (struct ocfs2_dinode *) tl_bh->b_data; 2762ccd979bdSMark Fasheh tl = &di->id2.i_dealloc; 2763ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(di)) { 2764ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di); 2765ccd979bdSMark Fasheh status = -EIO; 2766ccd979bdSMark Fasheh goto bail; 2767ccd979bdSMark Fasheh } 2768ccd979bdSMark Fasheh 2769ccd979bdSMark Fasheh if (le16_to_cpu(tl->tl_used)) { 2770ccd979bdSMark Fasheh mlog(0, "We'll have %u logs to recover\n", 2771ccd979bdSMark Fasheh le16_to_cpu(tl->tl_used)); 2772ccd979bdSMark Fasheh 2773ccd979bdSMark Fasheh *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL); 2774ccd979bdSMark Fasheh if (!(*tl_copy)) { 2775ccd979bdSMark Fasheh status = -ENOMEM; 2776ccd979bdSMark Fasheh mlog_errno(status); 2777ccd979bdSMark Fasheh goto bail; 2778ccd979bdSMark Fasheh } 2779ccd979bdSMark Fasheh 2780ccd979bdSMark Fasheh /* Assuming the write-out below goes well, this copy 2781ccd979bdSMark Fasheh * will be passed back to recovery for processing. */ 2782ccd979bdSMark Fasheh memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size); 2783ccd979bdSMark Fasheh 2784ccd979bdSMark Fasheh /* All we need to do to clear the truncate log is set 2785ccd979bdSMark Fasheh * tl_used. */ 2786ccd979bdSMark Fasheh tl->tl_used = 0; 2787ccd979bdSMark Fasheh 2788ccd979bdSMark Fasheh status = ocfs2_write_block(osb, tl_bh, tl_inode); 2789ccd979bdSMark Fasheh if (status < 0) { 2790ccd979bdSMark Fasheh mlog_errno(status); 2791ccd979bdSMark Fasheh goto bail; 2792ccd979bdSMark Fasheh } 2793ccd979bdSMark Fasheh } 2794ccd979bdSMark Fasheh 2795ccd979bdSMark Fasheh bail: 2796ccd979bdSMark Fasheh if (tl_inode) 2797ccd979bdSMark Fasheh iput(tl_inode); 2798ccd979bdSMark Fasheh if (tl_bh) 2799ccd979bdSMark Fasheh brelse(tl_bh); 2800ccd979bdSMark Fasheh 2801ccd979bdSMark Fasheh if (status < 0 && (*tl_copy)) { 2802ccd979bdSMark Fasheh kfree(*tl_copy); 2803ccd979bdSMark Fasheh *tl_copy = NULL; 2804ccd979bdSMark Fasheh } 2805ccd979bdSMark Fasheh 2806ccd979bdSMark Fasheh mlog_exit(status); 2807ccd979bdSMark Fasheh return status; 2808ccd979bdSMark Fasheh } 2809ccd979bdSMark Fasheh 2810ccd979bdSMark Fasheh int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, 2811ccd979bdSMark Fasheh struct ocfs2_dinode *tl_copy) 2812ccd979bdSMark Fasheh { 2813ccd979bdSMark Fasheh int status = 0; 2814ccd979bdSMark Fasheh int i; 2815ccd979bdSMark Fasheh unsigned int clusters, num_recs, start_cluster; 2816ccd979bdSMark Fasheh u64 start_blk; 28171fabe148SMark Fasheh handle_t *handle; 2818ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 2819ccd979bdSMark Fasheh struct ocfs2_truncate_log *tl; 2820ccd979bdSMark Fasheh 2821ccd979bdSMark Fasheh mlog_entry_void(); 2822ccd979bdSMark Fasheh 2823ccd979bdSMark Fasheh if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) { 2824ccd979bdSMark Fasheh mlog(ML_ERROR, "Asked to recover my own truncate log!\n"); 2825ccd979bdSMark Fasheh return -EINVAL; 2826ccd979bdSMark Fasheh } 2827ccd979bdSMark Fasheh 2828ccd979bdSMark Fasheh tl = &tl_copy->id2.i_dealloc; 2829ccd979bdSMark Fasheh num_recs = le16_to_cpu(tl->tl_used); 2830b0697053SMark Fasheh mlog(0, "cleanup %u records from %llu\n", num_recs, 2831b0697053SMark Fasheh (unsigned long long)tl_copy->i_blkno); 2832ccd979bdSMark Fasheh 28331b1dcc1bSJes Sorensen mutex_lock(&tl_inode->i_mutex); 2834ccd979bdSMark Fasheh for(i = 0; i < num_recs; i++) { 2835ccd979bdSMark Fasheh if (ocfs2_truncate_log_needs_flush(osb)) { 2836ccd979bdSMark Fasheh status = __ocfs2_flush_truncate_log(osb); 2837ccd979bdSMark Fasheh if (status < 0) { 2838ccd979bdSMark Fasheh mlog_errno(status); 2839ccd979bdSMark Fasheh goto bail_up; 2840ccd979bdSMark Fasheh } 2841ccd979bdSMark Fasheh } 2842ccd979bdSMark Fasheh 284365eff9ccSMark Fasheh handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE); 2844ccd979bdSMark Fasheh if (IS_ERR(handle)) { 2845ccd979bdSMark Fasheh status = PTR_ERR(handle); 2846ccd979bdSMark Fasheh mlog_errno(status); 2847ccd979bdSMark Fasheh goto bail_up; 2848ccd979bdSMark Fasheh } 2849ccd979bdSMark Fasheh 2850ccd979bdSMark Fasheh clusters = le32_to_cpu(tl->tl_recs[i].t_clusters); 2851ccd979bdSMark Fasheh start_cluster = le32_to_cpu(tl->tl_recs[i].t_start); 2852ccd979bdSMark Fasheh start_blk = ocfs2_clusters_to_blocks(osb->sb, start_cluster); 2853ccd979bdSMark Fasheh 2854ccd979bdSMark Fasheh status = ocfs2_truncate_log_append(osb, handle, 2855ccd979bdSMark Fasheh start_blk, clusters); 285602dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle); 2857ccd979bdSMark Fasheh if (status < 0) { 2858ccd979bdSMark Fasheh mlog_errno(status); 2859ccd979bdSMark Fasheh goto bail_up; 2860ccd979bdSMark Fasheh } 2861ccd979bdSMark Fasheh } 2862ccd979bdSMark Fasheh 2863ccd979bdSMark Fasheh bail_up: 28641b1dcc1bSJes Sorensen mutex_unlock(&tl_inode->i_mutex); 2865ccd979bdSMark Fasheh 2866ccd979bdSMark Fasheh mlog_exit(status); 2867ccd979bdSMark Fasheh return status; 2868ccd979bdSMark Fasheh } 2869ccd979bdSMark Fasheh 2870ccd979bdSMark Fasheh void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb) 2871ccd979bdSMark Fasheh { 2872ccd979bdSMark Fasheh int status; 2873ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 2874ccd979bdSMark Fasheh 2875ccd979bdSMark Fasheh mlog_entry_void(); 2876ccd979bdSMark Fasheh 2877ccd979bdSMark Fasheh if (tl_inode) { 2878ccd979bdSMark Fasheh cancel_delayed_work(&osb->osb_truncate_log_wq); 2879ccd979bdSMark Fasheh flush_workqueue(ocfs2_wq); 2880ccd979bdSMark Fasheh 2881ccd979bdSMark Fasheh status = ocfs2_flush_truncate_log(osb); 2882ccd979bdSMark Fasheh if (status < 0) 2883ccd979bdSMark Fasheh mlog_errno(status); 2884ccd979bdSMark Fasheh 2885ccd979bdSMark Fasheh brelse(osb->osb_tl_bh); 2886ccd979bdSMark Fasheh iput(osb->osb_tl_inode); 2887ccd979bdSMark Fasheh } 2888ccd979bdSMark Fasheh 2889ccd979bdSMark Fasheh mlog_exit_void(); 2890ccd979bdSMark Fasheh } 2891ccd979bdSMark Fasheh 2892ccd979bdSMark Fasheh int ocfs2_truncate_log_init(struct ocfs2_super *osb) 2893ccd979bdSMark Fasheh { 2894ccd979bdSMark Fasheh int status; 2895ccd979bdSMark Fasheh struct inode *tl_inode = NULL; 2896ccd979bdSMark Fasheh struct buffer_head *tl_bh = NULL; 2897ccd979bdSMark Fasheh 2898ccd979bdSMark Fasheh mlog_entry_void(); 2899ccd979bdSMark Fasheh 2900ccd979bdSMark Fasheh status = ocfs2_get_truncate_log_info(osb, 2901ccd979bdSMark Fasheh osb->slot_num, 2902ccd979bdSMark Fasheh &tl_inode, 2903ccd979bdSMark Fasheh &tl_bh); 2904ccd979bdSMark Fasheh if (status < 0) 2905ccd979bdSMark Fasheh mlog_errno(status); 2906ccd979bdSMark Fasheh 2907ccd979bdSMark Fasheh /* ocfs2_truncate_log_shutdown keys on the existence of 2908ccd979bdSMark Fasheh * osb->osb_tl_inode so we don't set any of the osb variables 2909ccd979bdSMark Fasheh * until we're sure all is well. */ 2910c4028958SDavid Howells INIT_DELAYED_WORK(&osb->osb_truncate_log_wq, 2911c4028958SDavid Howells ocfs2_truncate_log_worker); 2912ccd979bdSMark Fasheh osb->osb_tl_bh = tl_bh; 2913ccd979bdSMark Fasheh osb->osb_tl_inode = tl_inode; 2914ccd979bdSMark Fasheh 2915ccd979bdSMark Fasheh mlog_exit(status); 2916ccd979bdSMark Fasheh return status; 2917ccd979bdSMark Fasheh } 2918ccd979bdSMark Fasheh 2919ccd979bdSMark Fasheh /* This function will figure out whether the currently last extent 2920ccd979bdSMark Fasheh * block will be deleted, and if it will, what the new last extent 2921ccd979bdSMark Fasheh * block will be so we can update his h_next_leaf_blk field, as well 2922ccd979bdSMark Fasheh * as the dinodes i_last_eb_blk */ 2923dcd0538fSMark Fasheh static int ocfs2_find_new_last_ext_blk(struct inode *inode, 2924*3a0782d0SMark Fasheh unsigned int clusters_to_del, 2925dcd0538fSMark Fasheh struct ocfs2_path *path, 2926ccd979bdSMark Fasheh struct buffer_head **new_last_eb) 2927ccd979bdSMark Fasheh { 2928*3a0782d0SMark Fasheh int next_free, ret = 0; 2929dcd0538fSMark Fasheh u32 cpos; 2930*3a0782d0SMark Fasheh struct ocfs2_extent_rec *rec; 2931ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 2932ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 2933ccd979bdSMark Fasheh struct buffer_head *bh = NULL; 2934ccd979bdSMark Fasheh 2935ccd979bdSMark Fasheh *new_last_eb = NULL; 2936ccd979bdSMark Fasheh 2937ccd979bdSMark Fasheh /* we have no tree, so of course, no last_eb. */ 2938dcd0538fSMark Fasheh if (!path->p_tree_depth) 2939dcd0538fSMark Fasheh goto out; 2940ccd979bdSMark Fasheh 2941ccd979bdSMark Fasheh /* trunc to zero special case - this makes tree_depth = 0 2942ccd979bdSMark Fasheh * regardless of what it is. */ 2943*3a0782d0SMark Fasheh if (OCFS2_I(inode)->ip_clusters == clusters_to_del) 2944dcd0538fSMark Fasheh goto out; 2945ccd979bdSMark Fasheh 2946dcd0538fSMark Fasheh el = path_leaf_el(path); 2947ccd979bdSMark Fasheh BUG_ON(!el->l_next_free_rec); 2948ccd979bdSMark Fasheh 2949*3a0782d0SMark Fasheh /* 2950*3a0782d0SMark Fasheh * Make sure that this extent list will actually be empty 2951*3a0782d0SMark Fasheh * after we clear away the data. We can shortcut out if 2952*3a0782d0SMark Fasheh * there's more than one non-empty extent in the 2953*3a0782d0SMark Fasheh * list. Otherwise, a check of the remaining extent is 2954*3a0782d0SMark Fasheh * necessary. 2955*3a0782d0SMark Fasheh */ 2956*3a0782d0SMark Fasheh next_free = le16_to_cpu(el->l_next_free_rec); 2957*3a0782d0SMark Fasheh rec = NULL; 2958dcd0538fSMark Fasheh if (ocfs2_is_empty_extent(&el->l_recs[0])) { 2959*3a0782d0SMark Fasheh if (next_free > 2) 2960dcd0538fSMark Fasheh goto out; 2961*3a0782d0SMark Fasheh 2962*3a0782d0SMark Fasheh /* We may have a valid extent in index 1, check it. */ 2963*3a0782d0SMark Fasheh if (next_free == 2) 2964*3a0782d0SMark Fasheh rec = &el->l_recs[1]; 2965*3a0782d0SMark Fasheh 2966*3a0782d0SMark Fasheh /* 2967*3a0782d0SMark Fasheh * Fall through - no more nonempty extents, so we want 2968*3a0782d0SMark Fasheh * to delete this leaf. 2969*3a0782d0SMark Fasheh */ 2970*3a0782d0SMark Fasheh } else { 2971*3a0782d0SMark Fasheh if (next_free > 1) 2972dcd0538fSMark Fasheh goto out; 2973ccd979bdSMark Fasheh 2974*3a0782d0SMark Fasheh rec = &el->l_recs[0]; 2975*3a0782d0SMark Fasheh } 2976*3a0782d0SMark Fasheh 2977*3a0782d0SMark Fasheh if (rec) { 2978*3a0782d0SMark Fasheh /* 2979*3a0782d0SMark Fasheh * Check it we'll only be trimming off the end of this 2980*3a0782d0SMark Fasheh * cluster. 2981*3a0782d0SMark Fasheh */ 2982*3a0782d0SMark Fasheh if (le16_to_cpu(rec->e_clusters) > clusters_to_del) 2983*3a0782d0SMark Fasheh goto out; 2984*3a0782d0SMark Fasheh } 2985*3a0782d0SMark Fasheh 2986dcd0538fSMark Fasheh ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos); 2987dcd0538fSMark Fasheh if (ret) { 2988dcd0538fSMark Fasheh mlog_errno(ret); 2989dcd0538fSMark Fasheh goto out; 2990ccd979bdSMark Fasheh } 2991ccd979bdSMark Fasheh 2992dcd0538fSMark Fasheh ret = ocfs2_find_leaf(inode, path_root_el(path), cpos, &bh); 2993dcd0538fSMark Fasheh if (ret) { 2994dcd0538fSMark Fasheh mlog_errno(ret); 2995dcd0538fSMark Fasheh goto out; 2996ccd979bdSMark Fasheh } 2997dcd0538fSMark Fasheh 2998ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) bh->b_data; 2999ccd979bdSMark Fasheh el = &eb->h_list; 3000ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 3001ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 3002dcd0538fSMark Fasheh ret = -EROFS; 3003dcd0538fSMark Fasheh goto out; 3004ccd979bdSMark Fasheh } 3005ccd979bdSMark Fasheh 3006ccd979bdSMark Fasheh *new_last_eb = bh; 3007ccd979bdSMark Fasheh get_bh(*new_last_eb); 3008dcd0538fSMark Fasheh mlog(0, "returning block %llu, (cpos: %u)\n", 3009dcd0538fSMark Fasheh (unsigned long long)le64_to_cpu(eb->h_blkno), cpos); 3010dcd0538fSMark Fasheh out: 3011ccd979bdSMark Fasheh brelse(bh); 3012ccd979bdSMark Fasheh 3013dcd0538fSMark Fasheh return ret; 3014ccd979bdSMark Fasheh } 3015ccd979bdSMark Fasheh 3016*3a0782d0SMark Fasheh /* 3017*3a0782d0SMark Fasheh * Trim some clusters off the rightmost edge of a tree. Only called 3018*3a0782d0SMark Fasheh * during truncate. 3019*3a0782d0SMark Fasheh * 3020*3a0782d0SMark Fasheh * The caller needs to: 3021*3a0782d0SMark Fasheh * - start journaling of each path component. 3022*3a0782d0SMark Fasheh * - compute and fully set up any new last ext block 3023*3a0782d0SMark Fasheh */ 3024*3a0782d0SMark Fasheh static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path, 3025*3a0782d0SMark Fasheh handle_t *handle, struct ocfs2_truncate_context *tc, 3026*3a0782d0SMark Fasheh u32 clusters_to_del, u64 *delete_start) 3027*3a0782d0SMark Fasheh { 3028*3a0782d0SMark Fasheh int ret, i, index = path->p_tree_depth; 3029*3a0782d0SMark Fasheh u32 new_edge = 0; 3030*3a0782d0SMark Fasheh u64 deleted_eb = 0; 3031*3a0782d0SMark Fasheh struct buffer_head *bh; 3032*3a0782d0SMark Fasheh struct ocfs2_extent_list *el; 3033*3a0782d0SMark Fasheh struct ocfs2_extent_rec *rec; 3034*3a0782d0SMark Fasheh 3035*3a0782d0SMark Fasheh *delete_start = 0; 3036*3a0782d0SMark Fasheh 3037*3a0782d0SMark Fasheh while (index >= 0) { 3038*3a0782d0SMark Fasheh bh = path->p_node[index].bh; 3039*3a0782d0SMark Fasheh el = path->p_node[index].el; 3040*3a0782d0SMark Fasheh 3041*3a0782d0SMark Fasheh mlog(0, "traveling tree (index = %d, block = %llu)\n", 3042*3a0782d0SMark Fasheh index, (unsigned long long)bh->b_blocknr); 3043*3a0782d0SMark Fasheh 3044*3a0782d0SMark Fasheh BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); 3045*3a0782d0SMark Fasheh 3046*3a0782d0SMark Fasheh if (index != 3047*3a0782d0SMark Fasheh (path->p_tree_depth - le16_to_cpu(el->l_tree_depth))) { 3048*3a0782d0SMark Fasheh ocfs2_error(inode->i_sb, 3049*3a0782d0SMark Fasheh "Inode %lu has invalid ext. block %llu", 3050*3a0782d0SMark Fasheh inode->i_ino, 3051*3a0782d0SMark Fasheh (unsigned long long)bh->b_blocknr); 3052*3a0782d0SMark Fasheh ret = -EROFS; 3053*3a0782d0SMark Fasheh goto out; 3054*3a0782d0SMark Fasheh } 3055*3a0782d0SMark Fasheh 3056*3a0782d0SMark Fasheh find_tail_record: 3057*3a0782d0SMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 3058*3a0782d0SMark Fasheh rec = &el->l_recs[i]; 3059*3a0782d0SMark Fasheh 3060*3a0782d0SMark Fasheh mlog(0, "Extent list before: record %d: (%u, %u, %llu), " 3061*3a0782d0SMark Fasheh "next = %u\n", i, le32_to_cpu(rec->e_cpos), 3062*3a0782d0SMark Fasheh le32_to_cpu(rec->e_clusters), 3063*3a0782d0SMark Fasheh (unsigned long long)le64_to_cpu(rec->e_blkno), 3064*3a0782d0SMark Fasheh le16_to_cpu(el->l_next_free_rec)); 3065*3a0782d0SMark Fasheh 3066*3a0782d0SMark Fasheh BUG_ON(le32_to_cpu(rec->e_clusters) < clusters_to_del); 3067*3a0782d0SMark Fasheh 3068*3a0782d0SMark Fasheh if (le16_to_cpu(el->l_tree_depth) == 0) { 3069*3a0782d0SMark Fasheh /* 3070*3a0782d0SMark Fasheh * If the leaf block contains a single empty 3071*3a0782d0SMark Fasheh * extent and no records, we can just remove 3072*3a0782d0SMark Fasheh * the block. 3073*3a0782d0SMark Fasheh */ 3074*3a0782d0SMark Fasheh if (i == 0 && ocfs2_is_empty_extent(rec)) { 3075*3a0782d0SMark Fasheh memset(rec, 0, 3076*3a0782d0SMark Fasheh sizeof(struct ocfs2_extent_rec)); 3077*3a0782d0SMark Fasheh el->l_next_free_rec = cpu_to_le16(0); 3078*3a0782d0SMark Fasheh 3079*3a0782d0SMark Fasheh goto delete; 3080*3a0782d0SMark Fasheh } 3081*3a0782d0SMark Fasheh 3082*3a0782d0SMark Fasheh /* 3083*3a0782d0SMark Fasheh * Remove any empty extents by shifting things 3084*3a0782d0SMark Fasheh * left. That should make life much easier on 3085*3a0782d0SMark Fasheh * the code below. This condition is rare 3086*3a0782d0SMark Fasheh * enough that we shouldn't see a performance 3087*3a0782d0SMark Fasheh * hit. 3088*3a0782d0SMark Fasheh */ 3089*3a0782d0SMark Fasheh if (ocfs2_is_empty_extent(&el->l_recs[0])) { 3090*3a0782d0SMark Fasheh le16_add_cpu(&el->l_next_free_rec, -1); 3091*3a0782d0SMark Fasheh 3092*3a0782d0SMark Fasheh for(i = 0; 3093*3a0782d0SMark Fasheh i < le16_to_cpu(el->l_next_free_rec); i++) 3094*3a0782d0SMark Fasheh el->l_recs[i] = el->l_recs[i + 1]; 3095*3a0782d0SMark Fasheh 3096*3a0782d0SMark Fasheh memset(&el->l_recs[i], 0, 3097*3a0782d0SMark Fasheh sizeof(struct ocfs2_extent_rec)); 3098*3a0782d0SMark Fasheh 3099*3a0782d0SMark Fasheh /* 3100*3a0782d0SMark Fasheh * We've modified our extent list. The 3101*3a0782d0SMark Fasheh * simplest way to handle this change 3102*3a0782d0SMark Fasheh * is to being the search from the 3103*3a0782d0SMark Fasheh * start again. 3104*3a0782d0SMark Fasheh */ 3105*3a0782d0SMark Fasheh goto find_tail_record; 3106*3a0782d0SMark Fasheh } 3107*3a0782d0SMark Fasheh 3108*3a0782d0SMark Fasheh le32_add_cpu(&rec->e_clusters, -clusters_to_del); 3109*3a0782d0SMark Fasheh 3110*3a0782d0SMark Fasheh /* 3111*3a0782d0SMark Fasheh * We'll use "new_edge" on our way back up the 3112*3a0782d0SMark Fasheh * tree to know what our rightmost cpos is. 3113*3a0782d0SMark Fasheh */ 3114*3a0782d0SMark Fasheh new_edge = le32_to_cpu(rec->e_clusters); 3115*3a0782d0SMark Fasheh new_edge += le32_to_cpu(rec->e_cpos); 3116*3a0782d0SMark Fasheh 3117*3a0782d0SMark Fasheh /* 3118*3a0782d0SMark Fasheh * The caller will use this to delete data blocks. 3119*3a0782d0SMark Fasheh */ 3120*3a0782d0SMark Fasheh *delete_start = le64_to_cpu(rec->e_blkno) 3121*3a0782d0SMark Fasheh + ocfs2_clusters_to_blocks(inode->i_sb, 3122*3a0782d0SMark Fasheh le32_to_cpu(rec->e_clusters)); 3123*3a0782d0SMark Fasheh 3124*3a0782d0SMark Fasheh /* 3125*3a0782d0SMark Fasheh * If it's now empty, remove this record. 3126*3a0782d0SMark Fasheh */ 3127*3a0782d0SMark Fasheh if (le32_to_cpu(rec->e_clusters) == 0) { 3128*3a0782d0SMark Fasheh memset(rec, 0, 3129*3a0782d0SMark Fasheh sizeof(struct ocfs2_extent_rec)); 3130*3a0782d0SMark Fasheh le16_add_cpu(&el->l_next_free_rec, -1); 3131*3a0782d0SMark Fasheh } 3132*3a0782d0SMark Fasheh } else { 3133*3a0782d0SMark Fasheh if (le64_to_cpu(rec->e_blkno) == deleted_eb) { 3134*3a0782d0SMark Fasheh memset(rec, 0, 3135*3a0782d0SMark Fasheh sizeof(struct ocfs2_extent_rec)); 3136*3a0782d0SMark Fasheh le16_add_cpu(&el->l_next_free_rec, -1); 3137*3a0782d0SMark Fasheh 3138*3a0782d0SMark Fasheh goto delete; 3139*3a0782d0SMark Fasheh } 3140*3a0782d0SMark Fasheh 3141*3a0782d0SMark Fasheh /* Can this actually happen? */ 3142*3a0782d0SMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 0) 3143*3a0782d0SMark Fasheh goto delete; 3144*3a0782d0SMark Fasheh 3145*3a0782d0SMark Fasheh /* 3146*3a0782d0SMark Fasheh * We never actually deleted any clusters 3147*3a0782d0SMark Fasheh * because our leaf was empty. There's no 3148*3a0782d0SMark Fasheh * reason to adjust the rightmost edge then. 3149*3a0782d0SMark Fasheh */ 3150*3a0782d0SMark Fasheh if (new_edge == 0) 3151*3a0782d0SMark Fasheh goto delete; 3152*3a0782d0SMark Fasheh 3153*3a0782d0SMark Fasheh rec->e_clusters = cpu_to_le32(new_edge); 3154*3a0782d0SMark Fasheh le32_add_cpu(&rec->e_clusters, 3155*3a0782d0SMark Fasheh -le32_to_cpu(rec->e_cpos)); 3156*3a0782d0SMark Fasheh 3157*3a0782d0SMark Fasheh /* 3158*3a0782d0SMark Fasheh * A deleted child record should have been 3159*3a0782d0SMark Fasheh * caught above. 3160*3a0782d0SMark Fasheh */ 3161*3a0782d0SMark Fasheh BUG_ON(le32_to_cpu(rec->e_clusters) == 0); 3162*3a0782d0SMark Fasheh } 3163*3a0782d0SMark Fasheh 3164*3a0782d0SMark Fasheh delete: 3165*3a0782d0SMark Fasheh ret = ocfs2_journal_dirty(handle, bh); 3166*3a0782d0SMark Fasheh if (ret) { 3167*3a0782d0SMark Fasheh mlog_errno(ret); 3168*3a0782d0SMark Fasheh goto out; 3169*3a0782d0SMark Fasheh } 3170*3a0782d0SMark Fasheh 3171*3a0782d0SMark Fasheh mlog(0, "extent list container %llu, after: record %d: " 3172*3a0782d0SMark Fasheh "(%u, %u, %llu), next = %u.\n", 3173*3a0782d0SMark Fasheh (unsigned long long)bh->b_blocknr, i, 3174*3a0782d0SMark Fasheh le32_to_cpu(rec->e_cpos), le32_to_cpu(rec->e_clusters), 3175*3a0782d0SMark Fasheh (unsigned long long)le64_to_cpu(rec->e_blkno), 3176*3a0782d0SMark Fasheh le16_to_cpu(el->l_next_free_rec)); 3177*3a0782d0SMark Fasheh 3178*3a0782d0SMark Fasheh /* 3179*3a0782d0SMark Fasheh * We must be careful to only attempt delete of an 3180*3a0782d0SMark Fasheh * extent block (and not the root inode block). 3181*3a0782d0SMark Fasheh */ 3182*3a0782d0SMark Fasheh if (index > 0 && le16_to_cpu(el->l_next_free_rec) == 0) { 3183*3a0782d0SMark Fasheh struct ocfs2_extent_block *eb = 3184*3a0782d0SMark Fasheh (struct ocfs2_extent_block *)bh->b_data; 3185*3a0782d0SMark Fasheh 3186*3a0782d0SMark Fasheh /* 3187*3a0782d0SMark Fasheh * Save this for use when processing the 3188*3a0782d0SMark Fasheh * parent block. 3189*3a0782d0SMark Fasheh */ 3190*3a0782d0SMark Fasheh deleted_eb = le64_to_cpu(eb->h_blkno); 3191*3a0782d0SMark Fasheh 3192*3a0782d0SMark Fasheh mlog(0, "deleting this extent block.\n"); 3193*3a0782d0SMark Fasheh 3194*3a0782d0SMark Fasheh ocfs2_remove_from_cache(inode, bh); 3195*3a0782d0SMark Fasheh 3196*3a0782d0SMark Fasheh BUG_ON(le32_to_cpu(el->l_recs[0].e_clusters)); 3197*3a0782d0SMark Fasheh BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); 3198*3a0782d0SMark Fasheh BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); 3199*3a0782d0SMark Fasheh 3200*3a0782d0SMark Fasheh if (le16_to_cpu(eb->h_suballoc_slot) == 0) { 3201*3a0782d0SMark Fasheh /* 3202*3a0782d0SMark Fasheh * This code only understands how to 3203*3a0782d0SMark Fasheh * lock the suballocator in slot 0, 3204*3a0782d0SMark Fasheh * which is fine because allocation is 3205*3a0782d0SMark Fasheh * only ever done out of that 3206*3a0782d0SMark Fasheh * suballocator too. A future version 3207*3a0782d0SMark Fasheh * might change that however, so avoid 3208*3a0782d0SMark Fasheh * a free if we don't know how to 3209*3a0782d0SMark Fasheh * handle it. This way an fs incompat 3210*3a0782d0SMark Fasheh * bit will not be necessary. 3211*3a0782d0SMark Fasheh */ 3212*3a0782d0SMark Fasheh ret = ocfs2_free_extent_block(handle, 3213*3a0782d0SMark Fasheh tc->tc_ext_alloc_inode, 3214*3a0782d0SMark Fasheh tc->tc_ext_alloc_bh, 3215*3a0782d0SMark Fasheh eb); 3216*3a0782d0SMark Fasheh 3217*3a0782d0SMark Fasheh /* An error here is not fatal. */ 3218*3a0782d0SMark Fasheh if (ret < 0) 3219*3a0782d0SMark Fasheh mlog_errno(ret); 3220*3a0782d0SMark Fasheh } 3221*3a0782d0SMark Fasheh } else { 3222*3a0782d0SMark Fasheh deleted_eb = 0; 3223*3a0782d0SMark Fasheh } 3224*3a0782d0SMark Fasheh 3225*3a0782d0SMark Fasheh index--; 3226*3a0782d0SMark Fasheh } 3227*3a0782d0SMark Fasheh 3228*3a0782d0SMark Fasheh ret = 0; 3229*3a0782d0SMark Fasheh out: 3230*3a0782d0SMark Fasheh return ret; 3231*3a0782d0SMark Fasheh } 3232*3a0782d0SMark Fasheh 3233ccd979bdSMark Fasheh static int ocfs2_do_truncate(struct ocfs2_super *osb, 3234ccd979bdSMark Fasheh unsigned int clusters_to_del, 3235ccd979bdSMark Fasheh struct inode *inode, 3236ccd979bdSMark Fasheh struct buffer_head *fe_bh, 32371fabe148SMark Fasheh handle_t *handle, 3238dcd0538fSMark Fasheh struct ocfs2_truncate_context *tc, 3239dcd0538fSMark Fasheh struct ocfs2_path *path) 3240ccd979bdSMark Fasheh { 3241*3a0782d0SMark Fasheh int status; 3242ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 3243ccd979bdSMark Fasheh struct ocfs2_extent_block *last_eb = NULL; 3244ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 3245ccd979bdSMark Fasheh struct buffer_head *last_eb_bh = NULL; 3246ccd979bdSMark Fasheh u64 delete_blk = 0; 3247ccd979bdSMark Fasheh 3248ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 3249ccd979bdSMark Fasheh 3250*3a0782d0SMark Fasheh status = ocfs2_find_new_last_ext_blk(inode, clusters_to_del, 3251dcd0538fSMark Fasheh path, &last_eb_bh); 3252ccd979bdSMark Fasheh if (status < 0) { 3253ccd979bdSMark Fasheh mlog_errno(status); 3254ccd979bdSMark Fasheh goto bail; 3255ccd979bdSMark Fasheh } 3256ccd979bdSMark Fasheh 3257dcd0538fSMark Fasheh /* 3258dcd0538fSMark Fasheh * Each component will be touched, so we might as well journal 3259dcd0538fSMark Fasheh * here to avoid having to handle errors later. 3260dcd0538fSMark Fasheh */ 3261*3a0782d0SMark Fasheh status = ocfs2_journal_access_path(inode, handle, path); 3262ccd979bdSMark Fasheh if (status < 0) { 3263ccd979bdSMark Fasheh mlog_errno(status); 3264ccd979bdSMark Fasheh goto bail; 3265ccd979bdSMark Fasheh } 3266dcd0538fSMark Fasheh 3267dcd0538fSMark Fasheh if (last_eb_bh) { 3268dcd0538fSMark Fasheh status = ocfs2_journal_access(handle, inode, last_eb_bh, 3269dcd0538fSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE); 3270dcd0538fSMark Fasheh if (status < 0) { 3271dcd0538fSMark Fasheh mlog_errno(status); 3272dcd0538fSMark Fasheh goto bail; 3273dcd0538fSMark Fasheh } 3274dcd0538fSMark Fasheh 3275dcd0538fSMark Fasheh last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 3276dcd0538fSMark Fasheh } 3277dcd0538fSMark Fasheh 3278ccd979bdSMark Fasheh el = &(fe->id2.i_list); 3279ccd979bdSMark Fasheh 3280dcd0538fSMark Fasheh /* 3281dcd0538fSMark Fasheh * Lower levels depend on this never happening, but it's best 3282dcd0538fSMark Fasheh * to check it up here before changing the tree. 3283dcd0538fSMark Fasheh */ 3284dcd0538fSMark Fasheh if (el->l_tree_depth && ocfs2_is_empty_extent(&el->l_recs[0])) { 3285dcd0538fSMark Fasheh ocfs2_error(inode->i_sb, 3286dcd0538fSMark Fasheh "Inode %lu has an empty extent record, depth %u\n", 3287dcd0538fSMark Fasheh inode->i_ino, le16_to_cpu(el->l_tree_depth)); 3288*3a0782d0SMark Fasheh status = -EROFS; 3289dcd0538fSMark Fasheh goto bail; 3290dcd0538fSMark Fasheh } 3291dcd0538fSMark Fasheh 3292ccd979bdSMark Fasheh spin_lock(&OCFS2_I(inode)->ip_lock); 3293ccd979bdSMark Fasheh OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - 3294ccd979bdSMark Fasheh clusters_to_del; 3295ccd979bdSMark Fasheh spin_unlock(&OCFS2_I(inode)->ip_lock); 3296ccd979bdSMark Fasheh le32_add_cpu(&fe->i_clusters, -clusters_to_del); 3297ccd979bdSMark Fasheh 3298*3a0782d0SMark Fasheh status = ocfs2_trim_tree(inode, path, handle, tc, 3299*3a0782d0SMark Fasheh clusters_to_del, &delete_blk); 3300*3a0782d0SMark Fasheh if (status) { 3301*3a0782d0SMark Fasheh mlog_errno(status); 3302*3a0782d0SMark Fasheh goto bail; 3303ccd979bdSMark Fasheh } 3304ccd979bdSMark Fasheh 3305dcd0538fSMark Fasheh if (le32_to_cpu(fe->i_clusters) == 0) { 3306ccd979bdSMark Fasheh /* trunc to zero is a special case. */ 3307ccd979bdSMark Fasheh el->l_tree_depth = 0; 3308ccd979bdSMark Fasheh fe->i_last_eb_blk = 0; 3309ccd979bdSMark Fasheh } else if (last_eb) 3310ccd979bdSMark Fasheh fe->i_last_eb_blk = last_eb->h_blkno; 3311ccd979bdSMark Fasheh 3312ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, fe_bh); 3313ccd979bdSMark Fasheh if (status < 0) { 3314ccd979bdSMark Fasheh mlog_errno(status); 3315ccd979bdSMark Fasheh goto bail; 3316ccd979bdSMark Fasheh } 3317ccd979bdSMark Fasheh 3318ccd979bdSMark Fasheh if (last_eb) { 3319ccd979bdSMark Fasheh /* If there will be a new last extent block, then by 3320ccd979bdSMark Fasheh * definition, there cannot be any leaves to the right of 3321ccd979bdSMark Fasheh * him. */ 3322ccd979bdSMark Fasheh last_eb->h_next_leaf_blk = 0; 3323ccd979bdSMark Fasheh status = ocfs2_journal_dirty(handle, last_eb_bh); 3324ccd979bdSMark Fasheh if (status < 0) { 3325ccd979bdSMark Fasheh mlog_errno(status); 3326ccd979bdSMark Fasheh goto bail; 3327ccd979bdSMark Fasheh } 3328ccd979bdSMark Fasheh } 3329ccd979bdSMark Fasheh 3330*3a0782d0SMark Fasheh if (delete_blk) { 3331ccd979bdSMark Fasheh status = ocfs2_truncate_log_append(osb, handle, delete_blk, 3332ccd979bdSMark Fasheh clusters_to_del); 3333ccd979bdSMark Fasheh if (status < 0) { 3334ccd979bdSMark Fasheh mlog_errno(status); 3335ccd979bdSMark Fasheh goto bail; 3336ccd979bdSMark Fasheh } 3337*3a0782d0SMark Fasheh } 3338ccd979bdSMark Fasheh status = 0; 3339ccd979bdSMark Fasheh bail: 3340dcd0538fSMark Fasheh 3341ccd979bdSMark Fasheh mlog_exit(status); 3342ccd979bdSMark Fasheh return status; 3343ccd979bdSMark Fasheh } 3344ccd979bdSMark Fasheh 3345ccd979bdSMark Fasheh /* 3346ccd979bdSMark Fasheh * It is expected, that by the time you call this function, 3347ccd979bdSMark Fasheh * inode->i_size and fe->i_size have been adjusted. 3348ccd979bdSMark Fasheh * 3349ccd979bdSMark Fasheh * WARNING: This will kfree the truncate context 3350ccd979bdSMark Fasheh */ 3351ccd979bdSMark Fasheh int ocfs2_commit_truncate(struct ocfs2_super *osb, 3352ccd979bdSMark Fasheh struct inode *inode, 3353ccd979bdSMark Fasheh struct buffer_head *fe_bh, 3354ccd979bdSMark Fasheh struct ocfs2_truncate_context *tc) 3355ccd979bdSMark Fasheh { 3356ccd979bdSMark Fasheh int status, i, credits, tl_sem = 0; 3357dcd0538fSMark Fasheh u32 clusters_to_del, new_highest_cpos, range; 3358ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 33591fabe148SMark Fasheh handle_t *handle = NULL; 3360ccd979bdSMark Fasheh struct inode *tl_inode = osb->osb_tl_inode; 3361dcd0538fSMark Fasheh struct ocfs2_path *path = NULL; 3362ccd979bdSMark Fasheh 3363ccd979bdSMark Fasheh mlog_entry_void(); 3364ccd979bdSMark Fasheh 3365ccd979bdSMark Fasheh down_write(&OCFS2_I(inode)->ip_alloc_sem); 3366ccd979bdSMark Fasheh 3367dcd0538fSMark Fasheh new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, 3368ccd979bdSMark Fasheh i_size_read(inode)); 3369ccd979bdSMark Fasheh 3370dcd0538fSMark Fasheh path = ocfs2_new_inode_path(fe_bh); 3371dcd0538fSMark Fasheh if (!path) { 3372dcd0538fSMark Fasheh status = -ENOMEM; 3373ccd979bdSMark Fasheh mlog_errno(status); 3374ccd979bdSMark Fasheh goto bail; 3375ccd979bdSMark Fasheh } 3376dcd0538fSMark Fasheh start: 3377dcd0538fSMark Fasheh /* 3378*3a0782d0SMark Fasheh * Check that we still have allocation to delete. 3379*3a0782d0SMark Fasheh */ 3380*3a0782d0SMark Fasheh if (OCFS2_I(inode)->ip_clusters == 0) { 3381*3a0782d0SMark Fasheh status = 0; 3382*3a0782d0SMark Fasheh goto bail; 3383*3a0782d0SMark Fasheh } 3384*3a0782d0SMark Fasheh 3385*3a0782d0SMark Fasheh /* 3386dcd0538fSMark Fasheh * Truncate always works against the rightmost tree branch. 3387dcd0538fSMark Fasheh */ 3388dcd0538fSMark Fasheh status = ocfs2_find_path(inode, path, UINT_MAX); 3389dcd0538fSMark Fasheh if (status) { 3390dcd0538fSMark Fasheh mlog_errno(status); 3391ccd979bdSMark Fasheh goto bail; 3392ccd979bdSMark Fasheh } 3393ccd979bdSMark Fasheh 3394dcd0538fSMark Fasheh mlog(0, "inode->ip_clusters = %u, tree_depth = %u\n", 3395dcd0538fSMark Fasheh OCFS2_I(inode)->ip_clusters, path->p_tree_depth); 3396dcd0538fSMark Fasheh 3397dcd0538fSMark Fasheh /* 3398dcd0538fSMark Fasheh * By now, el will point to the extent list on the bottom most 3399dcd0538fSMark Fasheh * portion of this tree. Only the tail record is considered in 3400dcd0538fSMark Fasheh * each pass. 3401dcd0538fSMark Fasheh * 3402dcd0538fSMark Fasheh * We handle the following cases, in order: 3403dcd0538fSMark Fasheh * - empty extent: delete the remaining branch 3404dcd0538fSMark Fasheh * - remove the entire record 3405dcd0538fSMark Fasheh * - remove a partial record 3406dcd0538fSMark Fasheh * - no record needs to be removed (truncate has completed) 3407dcd0538fSMark Fasheh */ 3408dcd0538fSMark Fasheh el = path_leaf_el(path); 3409*3a0782d0SMark Fasheh if (le16_to_cpu(el->l_next_free_rec) == 0) { 3410*3a0782d0SMark Fasheh ocfs2_error(inode->i_sb, 3411*3a0782d0SMark Fasheh "Inode %llu has empty extent block at %llu\n", 3412*3a0782d0SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 3413*3a0782d0SMark Fasheh (unsigned long long)path_leaf_bh(path)->b_blocknr); 3414*3a0782d0SMark Fasheh status = -EROFS; 3415*3a0782d0SMark Fasheh goto bail; 3416*3a0782d0SMark Fasheh } 3417*3a0782d0SMark Fasheh 3418ccd979bdSMark Fasheh i = le16_to_cpu(el->l_next_free_rec) - 1; 3419dcd0538fSMark Fasheh range = le32_to_cpu(el->l_recs[i].e_cpos) + 3420dcd0538fSMark Fasheh le32_to_cpu(el->l_recs[i].e_clusters); 3421dcd0538fSMark Fasheh if (i == 0 && ocfs2_is_empty_extent(&el->l_recs[i])) { 3422dcd0538fSMark Fasheh clusters_to_del = 0; 3423dcd0538fSMark Fasheh } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { 3424ccd979bdSMark Fasheh clusters_to_del = le32_to_cpu(el->l_recs[i].e_clusters); 3425dcd0538fSMark Fasheh } else if (range > new_highest_cpos) { 3426ccd979bdSMark Fasheh clusters_to_del = (le32_to_cpu(el->l_recs[i].e_clusters) + 3427ccd979bdSMark Fasheh le32_to_cpu(el->l_recs[i].e_cpos)) - 3428dcd0538fSMark Fasheh new_highest_cpos; 3429dcd0538fSMark Fasheh } else { 3430dcd0538fSMark Fasheh status = 0; 3431dcd0538fSMark Fasheh goto bail; 3432dcd0538fSMark Fasheh } 3433ccd979bdSMark Fasheh 3434dcd0538fSMark Fasheh mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", 3435dcd0538fSMark Fasheh clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr); 3436dcd0538fSMark Fasheh 3437dcd0538fSMark Fasheh BUG_ON(clusters_to_del == 0); 3438ccd979bdSMark Fasheh 34391b1dcc1bSJes Sorensen mutex_lock(&tl_inode->i_mutex); 3440ccd979bdSMark Fasheh tl_sem = 1; 3441ccd979bdSMark Fasheh /* ocfs2_truncate_log_needs_flush guarantees us at least one 3442ccd979bdSMark Fasheh * record is free for use. If there isn't any, we flush to get 3443ccd979bdSMark Fasheh * an empty truncate log. */ 3444ccd979bdSMark Fasheh if (ocfs2_truncate_log_needs_flush(osb)) { 3445ccd979bdSMark Fasheh status = __ocfs2_flush_truncate_log(osb); 3446ccd979bdSMark Fasheh if (status < 0) { 3447ccd979bdSMark Fasheh mlog_errno(status); 3448ccd979bdSMark Fasheh goto bail; 3449ccd979bdSMark Fasheh } 3450ccd979bdSMark Fasheh } 3451ccd979bdSMark Fasheh 3452ccd979bdSMark Fasheh credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, 3453dcd0538fSMark Fasheh (struct ocfs2_dinode *)fe_bh->b_data, 3454dcd0538fSMark Fasheh el); 345565eff9ccSMark Fasheh handle = ocfs2_start_trans(osb, credits); 3456ccd979bdSMark Fasheh if (IS_ERR(handle)) { 3457ccd979bdSMark Fasheh status = PTR_ERR(handle); 3458ccd979bdSMark Fasheh handle = NULL; 3459ccd979bdSMark Fasheh mlog_errno(status); 3460ccd979bdSMark Fasheh goto bail; 3461ccd979bdSMark Fasheh } 3462ccd979bdSMark Fasheh 3463dcd0538fSMark Fasheh status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle, 3464dcd0538fSMark Fasheh tc, path); 3465ccd979bdSMark Fasheh if (status < 0) { 3466ccd979bdSMark Fasheh mlog_errno(status); 3467ccd979bdSMark Fasheh goto bail; 3468ccd979bdSMark Fasheh } 3469ccd979bdSMark Fasheh 34701b1dcc1bSJes Sorensen mutex_unlock(&tl_inode->i_mutex); 3471ccd979bdSMark Fasheh tl_sem = 0; 3472ccd979bdSMark Fasheh 347302dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle); 3474ccd979bdSMark Fasheh handle = NULL; 3475ccd979bdSMark Fasheh 3476dcd0538fSMark Fasheh ocfs2_reinit_path(path, 1); 3477dcd0538fSMark Fasheh 3478dcd0538fSMark Fasheh /* 3479*3a0782d0SMark Fasheh * The check above will catch the case where we've truncated 3480*3a0782d0SMark Fasheh * away all allocation. 3481dcd0538fSMark Fasheh */ 3482ccd979bdSMark Fasheh goto start; 3483*3a0782d0SMark Fasheh 3484ccd979bdSMark Fasheh bail: 3485ccd979bdSMark Fasheh up_write(&OCFS2_I(inode)->ip_alloc_sem); 3486ccd979bdSMark Fasheh 3487ccd979bdSMark Fasheh ocfs2_schedule_truncate_log_flush(osb, 1); 3488ccd979bdSMark Fasheh 3489ccd979bdSMark Fasheh if (tl_sem) 34901b1dcc1bSJes Sorensen mutex_unlock(&tl_inode->i_mutex); 3491ccd979bdSMark Fasheh 3492ccd979bdSMark Fasheh if (handle) 349302dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle); 3494ccd979bdSMark Fasheh 3495dcd0538fSMark Fasheh ocfs2_free_path(path); 3496ccd979bdSMark Fasheh 3497ccd979bdSMark Fasheh /* This will drop the ext_alloc cluster lock for us */ 3498ccd979bdSMark Fasheh ocfs2_free_truncate_context(tc); 3499ccd979bdSMark Fasheh 3500ccd979bdSMark Fasheh mlog_exit(status); 3501ccd979bdSMark Fasheh return status; 3502ccd979bdSMark Fasheh } 3503ccd979bdSMark Fasheh 3504ccd979bdSMark Fasheh /* 3505ccd979bdSMark Fasheh * Expects the inode to already be locked. This will figure out which 3506ccd979bdSMark Fasheh * inodes need to be locked and will put them on the returned truncate 3507ccd979bdSMark Fasheh * context. 3508ccd979bdSMark Fasheh */ 3509ccd979bdSMark Fasheh int ocfs2_prepare_truncate(struct ocfs2_super *osb, 3510ccd979bdSMark Fasheh struct inode *inode, 3511ccd979bdSMark Fasheh struct buffer_head *fe_bh, 3512ccd979bdSMark Fasheh struct ocfs2_truncate_context **tc) 3513ccd979bdSMark Fasheh { 3514dcd0538fSMark Fasheh int status, metadata_delete, i; 3515ccd979bdSMark Fasheh unsigned int new_i_clusters; 3516ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 3517ccd979bdSMark Fasheh struct ocfs2_extent_block *eb; 3518ccd979bdSMark Fasheh struct ocfs2_extent_list *el; 3519ccd979bdSMark Fasheh struct buffer_head *last_eb_bh = NULL; 3520ccd979bdSMark Fasheh struct inode *ext_alloc_inode = NULL; 3521ccd979bdSMark Fasheh struct buffer_head *ext_alloc_bh = NULL; 3522ccd979bdSMark Fasheh 3523ccd979bdSMark Fasheh mlog_entry_void(); 3524ccd979bdSMark Fasheh 3525ccd979bdSMark Fasheh *tc = NULL; 3526ccd979bdSMark Fasheh 3527ccd979bdSMark Fasheh new_i_clusters = ocfs2_clusters_for_bytes(osb->sb, 3528ccd979bdSMark Fasheh i_size_read(inode)); 3529ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) fe_bh->b_data; 3530ccd979bdSMark Fasheh 3531ccd979bdSMark Fasheh mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size =" 3532b0697053SMark Fasheh "%llu\n", fe->i_clusters, new_i_clusters, 3533b0697053SMark Fasheh (unsigned long long)fe->i_size); 3534ccd979bdSMark Fasheh 3535cd861280SRobert P. J. Day *tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL); 3536ccd979bdSMark Fasheh if (!(*tc)) { 3537ccd979bdSMark Fasheh status = -ENOMEM; 3538ccd979bdSMark Fasheh mlog_errno(status); 3539ccd979bdSMark Fasheh goto bail; 3540ccd979bdSMark Fasheh } 3541ccd979bdSMark Fasheh 3542ccd979bdSMark Fasheh metadata_delete = 0; 3543ccd979bdSMark Fasheh if (fe->id2.i_list.l_tree_depth) { 3544ccd979bdSMark Fasheh /* If we have a tree, then the truncate may result in 3545ccd979bdSMark Fasheh * metadata deletes. Figure this out from the 3546ccd979bdSMark Fasheh * rightmost leaf block.*/ 3547ccd979bdSMark Fasheh status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 3548ccd979bdSMark Fasheh &last_eb_bh, OCFS2_BH_CACHED, inode); 3549ccd979bdSMark Fasheh if (status < 0) { 3550ccd979bdSMark Fasheh mlog_errno(status); 3551ccd979bdSMark Fasheh goto bail; 3552ccd979bdSMark Fasheh } 3553ccd979bdSMark Fasheh eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 3554ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 3555ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 3556ccd979bdSMark Fasheh 3557ccd979bdSMark Fasheh brelse(last_eb_bh); 3558ccd979bdSMark Fasheh status = -EIO; 3559ccd979bdSMark Fasheh goto bail; 3560ccd979bdSMark Fasheh } 3561ccd979bdSMark Fasheh el = &(eb->h_list); 3562dcd0538fSMark Fasheh 3563dcd0538fSMark Fasheh i = 0; 3564dcd0538fSMark Fasheh if (ocfs2_is_empty_extent(&el->l_recs[0])) 3565dcd0538fSMark Fasheh i = 1; 3566dcd0538fSMark Fasheh /* 3567dcd0538fSMark Fasheh * XXX: Should we check that next_free_rec contains 3568dcd0538fSMark Fasheh * the extent? 3569dcd0538fSMark Fasheh */ 3570dcd0538fSMark Fasheh if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_i_clusters) 3571ccd979bdSMark Fasheh metadata_delete = 1; 3572ccd979bdSMark Fasheh } 3573ccd979bdSMark Fasheh 3574ccd979bdSMark Fasheh (*tc)->tc_last_eb_bh = last_eb_bh; 3575ccd979bdSMark Fasheh 3576ccd979bdSMark Fasheh if (metadata_delete) { 3577ccd979bdSMark Fasheh mlog(0, "Will have to delete metadata for this trunc. " 3578ccd979bdSMark Fasheh "locking allocator.\n"); 3579ccd979bdSMark Fasheh ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0); 3580ccd979bdSMark Fasheh if (!ext_alloc_inode) { 3581ccd979bdSMark Fasheh status = -ENOMEM; 3582ccd979bdSMark Fasheh mlog_errno(status); 3583ccd979bdSMark Fasheh goto bail; 3584ccd979bdSMark Fasheh } 3585ccd979bdSMark Fasheh 35861b1dcc1bSJes Sorensen mutex_lock(&ext_alloc_inode->i_mutex); 3587ccd979bdSMark Fasheh (*tc)->tc_ext_alloc_inode = ext_alloc_inode; 3588ccd979bdSMark Fasheh 35894bcec184SMark Fasheh status = ocfs2_meta_lock(ext_alloc_inode, &ext_alloc_bh, 1); 3590ccd979bdSMark Fasheh if (status < 0) { 3591ccd979bdSMark Fasheh mlog_errno(status); 3592ccd979bdSMark Fasheh goto bail; 3593ccd979bdSMark Fasheh } 3594ccd979bdSMark Fasheh (*tc)->tc_ext_alloc_bh = ext_alloc_bh; 3595ccd979bdSMark Fasheh (*tc)->tc_ext_alloc_locked = 1; 3596ccd979bdSMark Fasheh } 3597ccd979bdSMark Fasheh 3598ccd979bdSMark Fasheh status = 0; 3599ccd979bdSMark Fasheh bail: 3600ccd979bdSMark Fasheh if (status < 0) { 3601ccd979bdSMark Fasheh if (*tc) 3602ccd979bdSMark Fasheh ocfs2_free_truncate_context(*tc); 3603ccd979bdSMark Fasheh *tc = NULL; 3604ccd979bdSMark Fasheh } 3605ccd979bdSMark Fasheh mlog_exit_void(); 3606ccd979bdSMark Fasheh return status; 3607ccd979bdSMark Fasheh } 3608ccd979bdSMark Fasheh 3609ccd979bdSMark Fasheh static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) 3610ccd979bdSMark Fasheh { 3611ccd979bdSMark Fasheh if (tc->tc_ext_alloc_inode) { 3612ccd979bdSMark Fasheh if (tc->tc_ext_alloc_locked) 3613ccd979bdSMark Fasheh ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1); 3614ccd979bdSMark Fasheh 36151b1dcc1bSJes Sorensen mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex); 3616ccd979bdSMark Fasheh iput(tc->tc_ext_alloc_inode); 3617ccd979bdSMark Fasheh } 3618ccd979bdSMark Fasheh 3619ccd979bdSMark Fasheh if (tc->tc_ext_alloc_bh) 3620ccd979bdSMark Fasheh brelse(tc->tc_ext_alloc_bh); 3621ccd979bdSMark Fasheh 3622ccd979bdSMark Fasheh if (tc->tc_last_eb_bh) 3623ccd979bdSMark Fasheh brelse(tc->tc_last_eb_bh); 3624ccd979bdSMark Fasheh 3625ccd979bdSMark Fasheh kfree(tc); 3626ccd979bdSMark Fasheh } 3627