xref: /openbmc/linux/fs/ocfs2/alloc.c (revision b0697053)
1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*-
2ccd979bdSMark Fasheh  * vim: noexpandtab sw=8 ts=8 sts=0:
3ccd979bdSMark Fasheh  *
4ccd979bdSMark Fasheh  * alloc.c
5ccd979bdSMark Fasheh  *
6ccd979bdSMark Fasheh  * Extent allocs and frees
7ccd979bdSMark Fasheh  *
8ccd979bdSMark Fasheh  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
9ccd979bdSMark Fasheh  *
10ccd979bdSMark Fasheh  * This program is free software; you can redistribute it and/or
11ccd979bdSMark Fasheh  * modify it under the terms of the GNU General Public
12ccd979bdSMark Fasheh  * License as published by the Free Software Foundation; either
13ccd979bdSMark Fasheh  * version 2 of the License, or (at your option) any later version.
14ccd979bdSMark Fasheh  *
15ccd979bdSMark Fasheh  * This program is distributed in the hope that it will be useful,
16ccd979bdSMark Fasheh  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17ccd979bdSMark Fasheh  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18ccd979bdSMark Fasheh  * General Public License for more details.
19ccd979bdSMark Fasheh  *
20ccd979bdSMark Fasheh  * You should have received a copy of the GNU General Public
21ccd979bdSMark Fasheh  * License along with this program; if not, write to the
22ccd979bdSMark Fasheh  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23ccd979bdSMark Fasheh  * Boston, MA 021110-1307, USA.
24ccd979bdSMark Fasheh  */
25ccd979bdSMark Fasheh 
26ccd979bdSMark Fasheh #include <linux/fs.h>
27ccd979bdSMark Fasheh #include <linux/types.h>
28ccd979bdSMark Fasheh #include <linux/slab.h>
29ccd979bdSMark Fasheh #include <linux/highmem.h>
30ccd979bdSMark Fasheh 
31ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DISK_ALLOC
32ccd979bdSMark Fasheh #include <cluster/masklog.h>
33ccd979bdSMark Fasheh 
34ccd979bdSMark Fasheh #include "ocfs2.h"
35ccd979bdSMark Fasheh 
36ccd979bdSMark Fasheh #include "alloc.h"
37ccd979bdSMark Fasheh #include "dlmglue.h"
38ccd979bdSMark Fasheh #include "extent_map.h"
39ccd979bdSMark Fasheh #include "inode.h"
40ccd979bdSMark Fasheh #include "journal.h"
41ccd979bdSMark Fasheh #include "localalloc.h"
42ccd979bdSMark Fasheh #include "suballoc.h"
43ccd979bdSMark Fasheh #include "sysfile.h"
44ccd979bdSMark Fasheh #include "file.h"
45ccd979bdSMark Fasheh #include "super.h"
46ccd979bdSMark Fasheh #include "uptodate.h"
47ccd979bdSMark Fasheh 
48ccd979bdSMark Fasheh #include "buffer_head_io.h"
49ccd979bdSMark Fasheh 
50ccd979bdSMark Fasheh static int ocfs2_extent_contig(struct inode *inode,
51ccd979bdSMark Fasheh 			       struct ocfs2_extent_rec *ext,
52ccd979bdSMark Fasheh 			       u64 blkno);
53ccd979bdSMark Fasheh 
54ccd979bdSMark Fasheh static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
55ccd979bdSMark Fasheh 				     struct ocfs2_journal_handle *handle,
56ccd979bdSMark Fasheh 				     struct inode *inode,
57ccd979bdSMark Fasheh 				     int wanted,
58ccd979bdSMark Fasheh 				     struct ocfs2_alloc_context *meta_ac,
59ccd979bdSMark Fasheh 				     struct buffer_head *bhs[]);
60ccd979bdSMark Fasheh 
61ccd979bdSMark Fasheh static int ocfs2_add_branch(struct ocfs2_super *osb,
62ccd979bdSMark Fasheh 			    struct ocfs2_journal_handle *handle,
63ccd979bdSMark Fasheh 			    struct inode *inode,
64ccd979bdSMark Fasheh 			    struct buffer_head *fe_bh,
65ccd979bdSMark Fasheh 			    struct buffer_head *eb_bh,
66ccd979bdSMark Fasheh 			    struct buffer_head *last_eb_bh,
67ccd979bdSMark Fasheh 			    struct ocfs2_alloc_context *meta_ac);
68ccd979bdSMark Fasheh 
69ccd979bdSMark Fasheh static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
70ccd979bdSMark Fasheh 				  struct ocfs2_journal_handle *handle,
71ccd979bdSMark Fasheh 				  struct inode *inode,
72ccd979bdSMark Fasheh 				  struct buffer_head *fe_bh,
73ccd979bdSMark Fasheh 				  struct ocfs2_alloc_context *meta_ac,
74ccd979bdSMark Fasheh 				  struct buffer_head **ret_new_eb_bh);
75ccd979bdSMark Fasheh 
76ccd979bdSMark Fasheh static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
77ccd979bdSMark Fasheh 				  struct ocfs2_journal_handle *handle,
78ccd979bdSMark Fasheh 				  struct inode *inode,
79ccd979bdSMark Fasheh 				  struct buffer_head *fe_bh,
80ccd979bdSMark Fasheh 				  u64 blkno,
81ccd979bdSMark Fasheh 				  u32 new_clusters);
82ccd979bdSMark Fasheh 
83ccd979bdSMark Fasheh static int ocfs2_find_branch_target(struct ocfs2_super *osb,
84ccd979bdSMark Fasheh 				    struct inode *inode,
85ccd979bdSMark Fasheh 				    struct buffer_head *fe_bh,
86ccd979bdSMark Fasheh 				    struct buffer_head **target_bh);
87ccd979bdSMark Fasheh 
88ccd979bdSMark Fasheh static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb,
89ccd979bdSMark Fasheh 				       struct inode *inode,
90ccd979bdSMark Fasheh 				       struct ocfs2_dinode *fe,
91ccd979bdSMark Fasheh 				       unsigned int new_i_clusters,
92ccd979bdSMark Fasheh 				       struct buffer_head *old_last_eb,
93ccd979bdSMark Fasheh 				       struct buffer_head **new_last_eb);
94ccd979bdSMark Fasheh 
95ccd979bdSMark Fasheh static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
96ccd979bdSMark Fasheh 
97ccd979bdSMark Fasheh static int ocfs2_extent_contig(struct inode *inode,
98ccd979bdSMark Fasheh 			       struct ocfs2_extent_rec *ext,
99ccd979bdSMark Fasheh 			       u64 blkno)
100ccd979bdSMark Fasheh {
101ccd979bdSMark Fasheh 	return blkno == (le64_to_cpu(ext->e_blkno) +
102ccd979bdSMark Fasheh 			 ocfs2_clusters_to_blocks(inode->i_sb,
103ccd979bdSMark Fasheh 						  le32_to_cpu(ext->e_clusters)));
104ccd979bdSMark Fasheh }
105ccd979bdSMark Fasheh 
106ccd979bdSMark Fasheh /*
107ccd979bdSMark Fasheh  * How many free extents have we got before we need more meta data?
108ccd979bdSMark Fasheh  */
109ccd979bdSMark Fasheh int ocfs2_num_free_extents(struct ocfs2_super *osb,
110ccd979bdSMark Fasheh 			   struct inode *inode,
111ccd979bdSMark Fasheh 			   struct ocfs2_dinode *fe)
112ccd979bdSMark Fasheh {
113ccd979bdSMark Fasheh 	int retval;
114ccd979bdSMark Fasheh 	struct ocfs2_extent_list *el;
115ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
116ccd979bdSMark Fasheh 	struct buffer_head *eb_bh = NULL;
117ccd979bdSMark Fasheh 
118ccd979bdSMark Fasheh 	mlog_entry_void();
119ccd979bdSMark Fasheh 
120ccd979bdSMark Fasheh 	if (!OCFS2_IS_VALID_DINODE(fe)) {
121ccd979bdSMark Fasheh 		OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
122ccd979bdSMark Fasheh 		retval = -EIO;
123ccd979bdSMark Fasheh 		goto bail;
124ccd979bdSMark Fasheh 	}
125ccd979bdSMark Fasheh 
126ccd979bdSMark Fasheh 	if (fe->i_last_eb_blk) {
127ccd979bdSMark Fasheh 		retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
128ccd979bdSMark Fasheh 					  &eb_bh, OCFS2_BH_CACHED, inode);
129ccd979bdSMark Fasheh 		if (retval < 0) {
130ccd979bdSMark Fasheh 			mlog_errno(retval);
131ccd979bdSMark Fasheh 			goto bail;
132ccd979bdSMark Fasheh 		}
133ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
134ccd979bdSMark Fasheh 		el = &eb->h_list;
135ccd979bdSMark Fasheh 	} else
136ccd979bdSMark Fasheh 		el = &fe->id2.i_list;
137ccd979bdSMark Fasheh 
138ccd979bdSMark Fasheh 	BUG_ON(el->l_tree_depth != 0);
139ccd979bdSMark Fasheh 
140ccd979bdSMark Fasheh 	retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec);
141ccd979bdSMark Fasheh bail:
142ccd979bdSMark Fasheh 	if (eb_bh)
143ccd979bdSMark Fasheh 		brelse(eb_bh);
144ccd979bdSMark Fasheh 
145ccd979bdSMark Fasheh 	mlog_exit(retval);
146ccd979bdSMark Fasheh 	return retval;
147ccd979bdSMark Fasheh }
148ccd979bdSMark Fasheh 
149ccd979bdSMark Fasheh /* expects array to already be allocated
150ccd979bdSMark Fasheh  *
151ccd979bdSMark Fasheh  * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and
152ccd979bdSMark Fasheh  * l_count for you
153ccd979bdSMark Fasheh  */
154ccd979bdSMark Fasheh static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
155ccd979bdSMark Fasheh 				     struct ocfs2_journal_handle *handle,
156ccd979bdSMark Fasheh 				     struct inode *inode,
157ccd979bdSMark Fasheh 				     int wanted,
158ccd979bdSMark Fasheh 				     struct ocfs2_alloc_context *meta_ac,
159ccd979bdSMark Fasheh 				     struct buffer_head *bhs[])
160ccd979bdSMark Fasheh {
161ccd979bdSMark Fasheh 	int count, status, i;
162ccd979bdSMark Fasheh 	u16 suballoc_bit_start;
163ccd979bdSMark Fasheh 	u32 num_got;
164ccd979bdSMark Fasheh 	u64 first_blkno;
165ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
166ccd979bdSMark Fasheh 
167ccd979bdSMark Fasheh 	mlog_entry_void();
168ccd979bdSMark Fasheh 
169ccd979bdSMark Fasheh 	count = 0;
170ccd979bdSMark Fasheh 	while (count < wanted) {
171ccd979bdSMark Fasheh 		status = ocfs2_claim_metadata(osb,
172ccd979bdSMark Fasheh 					      handle,
173ccd979bdSMark Fasheh 					      meta_ac,
174ccd979bdSMark Fasheh 					      wanted - count,
175ccd979bdSMark Fasheh 					      &suballoc_bit_start,
176ccd979bdSMark Fasheh 					      &num_got,
177ccd979bdSMark Fasheh 					      &first_blkno);
178ccd979bdSMark Fasheh 		if (status < 0) {
179ccd979bdSMark Fasheh 			mlog_errno(status);
180ccd979bdSMark Fasheh 			goto bail;
181ccd979bdSMark Fasheh 		}
182ccd979bdSMark Fasheh 
183ccd979bdSMark Fasheh 		for(i = count;  i < (num_got + count); i++) {
184ccd979bdSMark Fasheh 			bhs[i] = sb_getblk(osb->sb, first_blkno);
185ccd979bdSMark Fasheh 			if (bhs[i] == NULL) {
186ccd979bdSMark Fasheh 				status = -EIO;
187ccd979bdSMark Fasheh 				mlog_errno(status);
188ccd979bdSMark Fasheh 				goto bail;
189ccd979bdSMark Fasheh 			}
190ccd979bdSMark Fasheh 			ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
191ccd979bdSMark Fasheh 
192ccd979bdSMark Fasheh 			status = ocfs2_journal_access(handle, inode, bhs[i],
193ccd979bdSMark Fasheh 						      OCFS2_JOURNAL_ACCESS_CREATE);
194ccd979bdSMark Fasheh 			if (status < 0) {
195ccd979bdSMark Fasheh 				mlog_errno(status);
196ccd979bdSMark Fasheh 				goto bail;
197ccd979bdSMark Fasheh 			}
198ccd979bdSMark Fasheh 
199ccd979bdSMark Fasheh 			memset(bhs[i]->b_data, 0, osb->sb->s_blocksize);
200ccd979bdSMark Fasheh 			eb = (struct ocfs2_extent_block *) bhs[i]->b_data;
201ccd979bdSMark Fasheh 			/* Ok, setup the minimal stuff here. */
202ccd979bdSMark Fasheh 			strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
203ccd979bdSMark Fasheh 			eb->h_blkno = cpu_to_le64(first_blkno);
204ccd979bdSMark Fasheh 			eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
205ccd979bdSMark Fasheh 
206ccd979bdSMark Fasheh #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
207ccd979bdSMark Fasheh 			/* we always use slot zero's suballocator */
208ccd979bdSMark Fasheh 			eb->h_suballoc_slot = 0;
209ccd979bdSMark Fasheh #else
210ccd979bdSMark Fasheh 			eb->h_suballoc_slot = cpu_to_le16(osb->slot_num);
211ccd979bdSMark Fasheh #endif
212ccd979bdSMark Fasheh 			eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
213ccd979bdSMark Fasheh 			eb->h_list.l_count =
214ccd979bdSMark Fasheh 				cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
215ccd979bdSMark Fasheh 
216ccd979bdSMark Fasheh 			suballoc_bit_start++;
217ccd979bdSMark Fasheh 			first_blkno++;
218ccd979bdSMark Fasheh 
219ccd979bdSMark Fasheh 			/* We'll also be dirtied by the caller, so
220ccd979bdSMark Fasheh 			 * this isn't absolutely necessary. */
221ccd979bdSMark Fasheh 			status = ocfs2_journal_dirty(handle, bhs[i]);
222ccd979bdSMark Fasheh 			if (status < 0) {
223ccd979bdSMark Fasheh 				mlog_errno(status);
224ccd979bdSMark Fasheh 				goto bail;
225ccd979bdSMark Fasheh 			}
226ccd979bdSMark Fasheh 		}
227ccd979bdSMark Fasheh 
228ccd979bdSMark Fasheh 		count += num_got;
229ccd979bdSMark Fasheh 	}
230ccd979bdSMark Fasheh 
231ccd979bdSMark Fasheh 	status = 0;
232ccd979bdSMark Fasheh bail:
233ccd979bdSMark Fasheh 	if (status < 0) {
234ccd979bdSMark Fasheh 		for(i = 0; i < wanted; i++) {
235ccd979bdSMark Fasheh 			if (bhs[i])
236ccd979bdSMark Fasheh 				brelse(bhs[i]);
237ccd979bdSMark Fasheh 			bhs[i] = NULL;
238ccd979bdSMark Fasheh 		}
239ccd979bdSMark Fasheh 	}
240ccd979bdSMark Fasheh 	mlog_exit(status);
241ccd979bdSMark Fasheh 	return status;
242ccd979bdSMark Fasheh }
243ccd979bdSMark Fasheh 
244ccd979bdSMark Fasheh /*
245ccd979bdSMark Fasheh  * Add an entire tree branch to our inode. eb_bh is the extent block
246ccd979bdSMark Fasheh  * to start at, if we don't want to start the branch at the dinode
247ccd979bdSMark Fasheh  * structure.
248ccd979bdSMark Fasheh  *
249ccd979bdSMark Fasheh  * last_eb_bh is required as we have to update it's next_leaf pointer
250ccd979bdSMark Fasheh  * for the new last extent block.
251ccd979bdSMark Fasheh  *
252ccd979bdSMark Fasheh  * the new branch will be 'empty' in the sense that every block will
253ccd979bdSMark Fasheh  * contain a single record with e_clusters == 0.
254ccd979bdSMark Fasheh  */
255ccd979bdSMark Fasheh static int ocfs2_add_branch(struct ocfs2_super *osb,
256ccd979bdSMark Fasheh 			    struct ocfs2_journal_handle *handle,
257ccd979bdSMark Fasheh 			    struct inode *inode,
258ccd979bdSMark Fasheh 			    struct buffer_head *fe_bh,
259ccd979bdSMark Fasheh 			    struct buffer_head *eb_bh,
260ccd979bdSMark Fasheh 			    struct buffer_head *last_eb_bh,
261ccd979bdSMark Fasheh 			    struct ocfs2_alloc_context *meta_ac)
262ccd979bdSMark Fasheh {
263ccd979bdSMark Fasheh 	int status, new_blocks, i;
264ccd979bdSMark Fasheh 	u64 next_blkno, new_last_eb_blk;
265ccd979bdSMark Fasheh 	struct buffer_head *bh;
266ccd979bdSMark Fasheh 	struct buffer_head **new_eb_bhs = NULL;
267ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
268ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
269ccd979bdSMark Fasheh 	struct ocfs2_extent_list  *eb_el;
270ccd979bdSMark Fasheh 	struct ocfs2_extent_list  *el;
271ccd979bdSMark Fasheh 
272ccd979bdSMark Fasheh 	mlog_entry_void();
273ccd979bdSMark Fasheh 
274ccd979bdSMark Fasheh 	BUG_ON(!last_eb_bh);
275ccd979bdSMark Fasheh 
276ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
277ccd979bdSMark Fasheh 
278ccd979bdSMark Fasheh 	if (eb_bh) {
279ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
280ccd979bdSMark Fasheh 		el = &eb->h_list;
281ccd979bdSMark Fasheh 	} else
282ccd979bdSMark Fasheh 		el = &fe->id2.i_list;
283ccd979bdSMark Fasheh 
284ccd979bdSMark Fasheh 	/* we never add a branch to a leaf. */
285ccd979bdSMark Fasheh 	BUG_ON(!el->l_tree_depth);
286ccd979bdSMark Fasheh 
287ccd979bdSMark Fasheh 	new_blocks = le16_to_cpu(el->l_tree_depth);
288ccd979bdSMark Fasheh 
289ccd979bdSMark Fasheh 	/* allocate the number of new eb blocks we need */
290ccd979bdSMark Fasheh 	new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *),
291ccd979bdSMark Fasheh 			     GFP_KERNEL);
292ccd979bdSMark Fasheh 	if (!new_eb_bhs) {
293ccd979bdSMark Fasheh 		status = -ENOMEM;
294ccd979bdSMark Fasheh 		mlog_errno(status);
295ccd979bdSMark Fasheh 		goto bail;
296ccd979bdSMark Fasheh 	}
297ccd979bdSMark Fasheh 
298ccd979bdSMark Fasheh 	status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks,
299ccd979bdSMark Fasheh 					   meta_ac, new_eb_bhs);
300ccd979bdSMark Fasheh 	if (status < 0) {
301ccd979bdSMark Fasheh 		mlog_errno(status);
302ccd979bdSMark Fasheh 		goto bail;
303ccd979bdSMark Fasheh 	}
304ccd979bdSMark Fasheh 
305ccd979bdSMark Fasheh 	/* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
306ccd979bdSMark Fasheh 	 * linked with the rest of the tree.
307ccd979bdSMark Fasheh 	 * conversly, new_eb_bhs[0] is the new bottommost leaf.
308ccd979bdSMark Fasheh 	 *
309ccd979bdSMark Fasheh 	 * when we leave the loop, new_last_eb_blk will point to the
310ccd979bdSMark Fasheh 	 * newest leaf, and next_blkno will point to the topmost extent
311ccd979bdSMark Fasheh 	 * block. */
312ccd979bdSMark Fasheh 	next_blkno = new_last_eb_blk = 0;
313ccd979bdSMark Fasheh 	for(i = 0; i < new_blocks; i++) {
314ccd979bdSMark Fasheh 		bh = new_eb_bhs[i];
315ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) bh->b_data;
316ccd979bdSMark Fasheh 		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
317ccd979bdSMark Fasheh 			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
318ccd979bdSMark Fasheh 			status = -EIO;
319ccd979bdSMark Fasheh 			goto bail;
320ccd979bdSMark Fasheh 		}
321ccd979bdSMark Fasheh 		eb_el = &eb->h_list;
322ccd979bdSMark Fasheh 
323ccd979bdSMark Fasheh 		status = ocfs2_journal_access(handle, inode, bh,
324ccd979bdSMark Fasheh 					      OCFS2_JOURNAL_ACCESS_CREATE);
325ccd979bdSMark Fasheh 		if (status < 0) {
326ccd979bdSMark Fasheh 			mlog_errno(status);
327ccd979bdSMark Fasheh 			goto bail;
328ccd979bdSMark Fasheh 		}
329ccd979bdSMark Fasheh 
330ccd979bdSMark Fasheh 		eb->h_next_leaf_blk = 0;
331ccd979bdSMark Fasheh 		eb_el->l_tree_depth = cpu_to_le16(i);
332ccd979bdSMark Fasheh 		eb_el->l_next_free_rec = cpu_to_le16(1);
333ccd979bdSMark Fasheh 		eb_el->l_recs[0].e_cpos = fe->i_clusters;
334ccd979bdSMark Fasheh 		eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno);
335ccd979bdSMark Fasheh 		eb_el->l_recs[0].e_clusters = cpu_to_le32(0);
336ccd979bdSMark Fasheh 		if (!eb_el->l_tree_depth)
337ccd979bdSMark Fasheh 			new_last_eb_blk = le64_to_cpu(eb->h_blkno);
338ccd979bdSMark Fasheh 
339ccd979bdSMark Fasheh 		status = ocfs2_journal_dirty(handle, bh);
340ccd979bdSMark Fasheh 		if (status < 0) {
341ccd979bdSMark Fasheh 			mlog_errno(status);
342ccd979bdSMark Fasheh 			goto bail;
343ccd979bdSMark Fasheh 		}
344ccd979bdSMark Fasheh 
345ccd979bdSMark Fasheh 		next_blkno = le64_to_cpu(eb->h_blkno);
346ccd979bdSMark Fasheh 	}
347ccd979bdSMark Fasheh 
348ccd979bdSMark Fasheh 	/* This is a bit hairy. We want to update up to three blocks
349ccd979bdSMark Fasheh 	 * here without leaving any of them in an inconsistent state
350ccd979bdSMark Fasheh 	 * in case of error. We don't have to worry about
351ccd979bdSMark Fasheh 	 * journal_dirty erroring as it won't unless we've aborted the
352ccd979bdSMark Fasheh 	 * handle (in which case we would never be here) so reserving
353ccd979bdSMark Fasheh 	 * the write with journal_access is all we need to do. */
354ccd979bdSMark Fasheh 	status = ocfs2_journal_access(handle, inode, last_eb_bh,
355ccd979bdSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
356ccd979bdSMark Fasheh 	if (status < 0) {
357ccd979bdSMark Fasheh 		mlog_errno(status);
358ccd979bdSMark Fasheh 		goto bail;
359ccd979bdSMark Fasheh 	}
360ccd979bdSMark Fasheh 	status = ocfs2_journal_access(handle, inode, fe_bh,
361ccd979bdSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
362ccd979bdSMark Fasheh 	if (status < 0) {
363ccd979bdSMark Fasheh 		mlog_errno(status);
364ccd979bdSMark Fasheh 		goto bail;
365ccd979bdSMark Fasheh 	}
366ccd979bdSMark Fasheh 	if (eb_bh) {
367ccd979bdSMark Fasheh 		status = ocfs2_journal_access(handle, inode, eb_bh,
368ccd979bdSMark Fasheh 					      OCFS2_JOURNAL_ACCESS_WRITE);
369ccd979bdSMark Fasheh 		if (status < 0) {
370ccd979bdSMark Fasheh 			mlog_errno(status);
371ccd979bdSMark Fasheh 			goto bail;
372ccd979bdSMark Fasheh 		}
373ccd979bdSMark Fasheh 	}
374ccd979bdSMark Fasheh 
375ccd979bdSMark Fasheh 	/* Link the new branch into the rest of the tree (el will
376ccd979bdSMark Fasheh 	 * either be on the fe, or the extent block passed in. */
377ccd979bdSMark Fasheh 	i = le16_to_cpu(el->l_next_free_rec);
378ccd979bdSMark Fasheh 	el->l_recs[i].e_blkno = cpu_to_le64(next_blkno);
379ccd979bdSMark Fasheh 	el->l_recs[i].e_cpos = fe->i_clusters;
380ccd979bdSMark Fasheh 	el->l_recs[i].e_clusters = 0;
381ccd979bdSMark Fasheh 	le16_add_cpu(&el->l_next_free_rec, 1);
382ccd979bdSMark Fasheh 
383ccd979bdSMark Fasheh 	/* fe needs a new last extent block pointer, as does the
384ccd979bdSMark Fasheh 	 * next_leaf on the previously last-extent-block. */
385ccd979bdSMark Fasheh 	fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk);
386ccd979bdSMark Fasheh 
387ccd979bdSMark Fasheh 	eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
388ccd979bdSMark Fasheh 	eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
389ccd979bdSMark Fasheh 
390ccd979bdSMark Fasheh 	status = ocfs2_journal_dirty(handle, last_eb_bh);
391ccd979bdSMark Fasheh 	if (status < 0)
392ccd979bdSMark Fasheh 		mlog_errno(status);
393ccd979bdSMark Fasheh 	status = ocfs2_journal_dirty(handle, fe_bh);
394ccd979bdSMark Fasheh 	if (status < 0)
395ccd979bdSMark Fasheh 		mlog_errno(status);
396ccd979bdSMark Fasheh 	if (eb_bh) {
397ccd979bdSMark Fasheh 		status = ocfs2_journal_dirty(handle, eb_bh);
398ccd979bdSMark Fasheh 		if (status < 0)
399ccd979bdSMark Fasheh 			mlog_errno(status);
400ccd979bdSMark Fasheh 	}
401ccd979bdSMark Fasheh 
402ccd979bdSMark Fasheh 	status = 0;
403ccd979bdSMark Fasheh bail:
404ccd979bdSMark Fasheh 	if (new_eb_bhs) {
405ccd979bdSMark Fasheh 		for (i = 0; i < new_blocks; i++)
406ccd979bdSMark Fasheh 			if (new_eb_bhs[i])
407ccd979bdSMark Fasheh 				brelse(new_eb_bhs[i]);
408ccd979bdSMark Fasheh 		kfree(new_eb_bhs);
409ccd979bdSMark Fasheh 	}
410ccd979bdSMark Fasheh 
411ccd979bdSMark Fasheh 	mlog_exit(status);
412ccd979bdSMark Fasheh 	return status;
413ccd979bdSMark Fasheh }
414ccd979bdSMark Fasheh 
415ccd979bdSMark Fasheh /*
416ccd979bdSMark Fasheh  * adds another level to the allocation tree.
417ccd979bdSMark Fasheh  * returns back the new extent block so you can add a branch to it
418ccd979bdSMark Fasheh  * after this call.
419ccd979bdSMark Fasheh  */
420ccd979bdSMark Fasheh static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
421ccd979bdSMark Fasheh 				  struct ocfs2_journal_handle *handle,
422ccd979bdSMark Fasheh 				  struct inode *inode,
423ccd979bdSMark Fasheh 				  struct buffer_head *fe_bh,
424ccd979bdSMark Fasheh 				  struct ocfs2_alloc_context *meta_ac,
425ccd979bdSMark Fasheh 				  struct buffer_head **ret_new_eb_bh)
426ccd979bdSMark Fasheh {
427ccd979bdSMark Fasheh 	int status, i;
428ccd979bdSMark Fasheh 	struct buffer_head *new_eb_bh = NULL;
429ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
430ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
431ccd979bdSMark Fasheh 	struct ocfs2_extent_list  *fe_el;
432ccd979bdSMark Fasheh 	struct ocfs2_extent_list  *eb_el;
433ccd979bdSMark Fasheh 
434ccd979bdSMark Fasheh 	mlog_entry_void();
435ccd979bdSMark Fasheh 
436ccd979bdSMark Fasheh 	status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac,
437ccd979bdSMark Fasheh 					   &new_eb_bh);
438ccd979bdSMark Fasheh 	if (status < 0) {
439ccd979bdSMark Fasheh 		mlog_errno(status);
440ccd979bdSMark Fasheh 		goto bail;
441ccd979bdSMark Fasheh 	}
442ccd979bdSMark Fasheh 
443ccd979bdSMark Fasheh 	eb = (struct ocfs2_extent_block *) new_eb_bh->b_data;
444ccd979bdSMark Fasheh 	if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
445ccd979bdSMark Fasheh 		OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
446ccd979bdSMark Fasheh 		status = -EIO;
447ccd979bdSMark Fasheh 		goto bail;
448ccd979bdSMark Fasheh 	}
449ccd979bdSMark Fasheh 
450ccd979bdSMark Fasheh 	eb_el = &eb->h_list;
451ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
452ccd979bdSMark Fasheh 	fe_el = &fe->id2.i_list;
453ccd979bdSMark Fasheh 
454ccd979bdSMark Fasheh 	status = ocfs2_journal_access(handle, inode, new_eb_bh,
455ccd979bdSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_CREATE);
456ccd979bdSMark Fasheh 	if (status < 0) {
457ccd979bdSMark Fasheh 		mlog_errno(status);
458ccd979bdSMark Fasheh 		goto bail;
459ccd979bdSMark Fasheh 	}
460ccd979bdSMark Fasheh 
461ccd979bdSMark Fasheh 	/* copy the fe data into the new extent block */
462ccd979bdSMark Fasheh 	eb_el->l_tree_depth = fe_el->l_tree_depth;
463ccd979bdSMark Fasheh 	eb_el->l_next_free_rec = fe_el->l_next_free_rec;
464ccd979bdSMark Fasheh 	for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) {
465ccd979bdSMark Fasheh 		eb_el->l_recs[i].e_cpos = fe_el->l_recs[i].e_cpos;
466ccd979bdSMark Fasheh 		eb_el->l_recs[i].e_clusters = fe_el->l_recs[i].e_clusters;
467ccd979bdSMark Fasheh 		eb_el->l_recs[i].e_blkno = fe_el->l_recs[i].e_blkno;
468ccd979bdSMark Fasheh 	}
469ccd979bdSMark Fasheh 
470ccd979bdSMark Fasheh 	status = ocfs2_journal_dirty(handle, new_eb_bh);
471ccd979bdSMark Fasheh 	if (status < 0) {
472ccd979bdSMark Fasheh 		mlog_errno(status);
473ccd979bdSMark Fasheh 		goto bail;
474ccd979bdSMark Fasheh 	}
475ccd979bdSMark Fasheh 
476ccd979bdSMark Fasheh 	status = ocfs2_journal_access(handle, inode, fe_bh,
477ccd979bdSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
478ccd979bdSMark Fasheh 	if (status < 0) {
479ccd979bdSMark Fasheh 		mlog_errno(status);
480ccd979bdSMark Fasheh 		goto bail;
481ccd979bdSMark Fasheh 	}
482ccd979bdSMark Fasheh 
483ccd979bdSMark Fasheh 	/* update fe now */
484ccd979bdSMark Fasheh 	le16_add_cpu(&fe_el->l_tree_depth, 1);
485ccd979bdSMark Fasheh 	fe_el->l_recs[0].e_cpos = 0;
486ccd979bdSMark Fasheh 	fe_el->l_recs[0].e_blkno = eb->h_blkno;
487ccd979bdSMark Fasheh 	fe_el->l_recs[0].e_clusters = fe->i_clusters;
488ccd979bdSMark Fasheh 	for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) {
489ccd979bdSMark Fasheh 		fe_el->l_recs[i].e_cpos = 0;
490ccd979bdSMark Fasheh 		fe_el->l_recs[i].e_clusters = 0;
491ccd979bdSMark Fasheh 		fe_el->l_recs[i].e_blkno = 0;
492ccd979bdSMark Fasheh 	}
493ccd979bdSMark Fasheh 	fe_el->l_next_free_rec = cpu_to_le16(1);
494ccd979bdSMark Fasheh 
495ccd979bdSMark Fasheh 	/* If this is our 1st tree depth shift, then last_eb_blk
496ccd979bdSMark Fasheh 	 * becomes the allocated extent block */
497ccd979bdSMark Fasheh 	if (fe_el->l_tree_depth == cpu_to_le16(1))
498ccd979bdSMark Fasheh 		fe->i_last_eb_blk = eb->h_blkno;
499ccd979bdSMark Fasheh 
500ccd979bdSMark Fasheh 	status = ocfs2_journal_dirty(handle, fe_bh);
501ccd979bdSMark Fasheh 	if (status < 0) {
502ccd979bdSMark Fasheh 		mlog_errno(status);
503ccd979bdSMark Fasheh 		goto bail;
504ccd979bdSMark Fasheh 	}
505ccd979bdSMark Fasheh 
506ccd979bdSMark Fasheh 	*ret_new_eb_bh = new_eb_bh;
507ccd979bdSMark Fasheh 	new_eb_bh = NULL;
508ccd979bdSMark Fasheh 	status = 0;
509ccd979bdSMark Fasheh bail:
510ccd979bdSMark Fasheh 	if (new_eb_bh)
511ccd979bdSMark Fasheh 		brelse(new_eb_bh);
512ccd979bdSMark Fasheh 
513ccd979bdSMark Fasheh 	mlog_exit(status);
514ccd979bdSMark Fasheh 	return status;
515ccd979bdSMark Fasheh }
516ccd979bdSMark Fasheh 
517ccd979bdSMark Fasheh /*
518ccd979bdSMark Fasheh  * Expects the tree to already have room in the rightmost leaf for the
519ccd979bdSMark Fasheh  * extent.  Updates all the extent blocks (and the dinode) on the way
520ccd979bdSMark Fasheh  * down.
521ccd979bdSMark Fasheh  */
522ccd979bdSMark Fasheh static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
523ccd979bdSMark Fasheh 				  struct ocfs2_journal_handle *handle,
524ccd979bdSMark Fasheh 				  struct inode *inode,
525ccd979bdSMark Fasheh 				  struct buffer_head *fe_bh,
526ccd979bdSMark Fasheh 				  u64 start_blk,
527ccd979bdSMark Fasheh 				  u32 new_clusters)
528ccd979bdSMark Fasheh {
529ccd979bdSMark Fasheh 	int status, i, num_bhs = 0;
530ccd979bdSMark Fasheh 	u64 next_blkno;
531ccd979bdSMark Fasheh 	u16 next_free;
532ccd979bdSMark Fasheh 	struct buffer_head **eb_bhs = NULL;
533ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
534ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
535ccd979bdSMark Fasheh 	struct ocfs2_extent_list  *el;
536ccd979bdSMark Fasheh 
537ccd979bdSMark Fasheh 	mlog_entry_void();
538ccd979bdSMark Fasheh 
539ccd979bdSMark Fasheh 	status = ocfs2_journal_access(handle, inode, fe_bh,
540ccd979bdSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
541ccd979bdSMark Fasheh 	if (status < 0) {
542ccd979bdSMark Fasheh 		mlog_errno(status);
543ccd979bdSMark Fasheh 		goto bail;
544ccd979bdSMark Fasheh 	}
545ccd979bdSMark Fasheh 
546ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
547ccd979bdSMark Fasheh 	el = &fe->id2.i_list;
548ccd979bdSMark Fasheh 	if (el->l_tree_depth) {
549ccd979bdSMark Fasheh 		/* This is another operation where we want to be
550ccd979bdSMark Fasheh 		 * careful about our tree updates. An error here means
551ccd979bdSMark Fasheh 		 * none of the previous changes we made should roll
552ccd979bdSMark Fasheh 		 * forward. As a result, we have to record the buffers
553ccd979bdSMark Fasheh 		 * for this part of the tree in an array and reserve a
554ccd979bdSMark Fasheh 		 * journal write to them before making any changes. */
555ccd979bdSMark Fasheh 		num_bhs = le16_to_cpu(fe->id2.i_list.l_tree_depth);
556ccd979bdSMark Fasheh 		eb_bhs = kcalloc(num_bhs, sizeof(struct buffer_head *),
557ccd979bdSMark Fasheh 				 GFP_KERNEL);
558ccd979bdSMark Fasheh 		if (!eb_bhs) {
559ccd979bdSMark Fasheh 			status = -ENOMEM;
560ccd979bdSMark Fasheh 			mlog_errno(status);
561ccd979bdSMark Fasheh 			goto bail;
562ccd979bdSMark Fasheh 		}
563ccd979bdSMark Fasheh 
564ccd979bdSMark Fasheh 		i = 0;
565ccd979bdSMark Fasheh 		while(el->l_tree_depth) {
566ccd979bdSMark Fasheh 			next_free = le16_to_cpu(el->l_next_free_rec);
567ccd979bdSMark Fasheh 			if (next_free == 0) {
568ccd979bdSMark Fasheh 				ocfs2_error(inode->i_sb,
569b0697053SMark Fasheh 					    "Dinode %llu has a bad extent list",
570b0697053SMark Fasheh 					    (unsigned long long)OCFS2_I(inode)->ip_blkno);
571ccd979bdSMark Fasheh 				status = -EIO;
572ccd979bdSMark Fasheh 				goto bail;
573ccd979bdSMark Fasheh 			}
574ccd979bdSMark Fasheh 			next_blkno = le64_to_cpu(el->l_recs[next_free - 1].e_blkno);
575ccd979bdSMark Fasheh 
576ccd979bdSMark Fasheh 			BUG_ON(i >= num_bhs);
577ccd979bdSMark Fasheh 			status = ocfs2_read_block(osb, next_blkno, &eb_bhs[i],
578ccd979bdSMark Fasheh 						  OCFS2_BH_CACHED, inode);
579ccd979bdSMark Fasheh 			if (status < 0) {
580ccd979bdSMark Fasheh 				mlog_errno(status);
581ccd979bdSMark Fasheh 				goto bail;
582ccd979bdSMark Fasheh 			}
583ccd979bdSMark Fasheh 			eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data;
584ccd979bdSMark Fasheh 			if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
585ccd979bdSMark Fasheh 				OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
586ccd979bdSMark Fasheh 								 eb);
587ccd979bdSMark Fasheh 				status = -EIO;
588ccd979bdSMark Fasheh 				goto bail;
589ccd979bdSMark Fasheh 			}
590ccd979bdSMark Fasheh 
591ccd979bdSMark Fasheh 			status = ocfs2_journal_access(handle, inode, eb_bhs[i],
592ccd979bdSMark Fasheh 						      OCFS2_JOURNAL_ACCESS_WRITE);
593ccd979bdSMark Fasheh 			if (status < 0) {
594ccd979bdSMark Fasheh 				mlog_errno(status);
595ccd979bdSMark Fasheh 				goto bail;
596ccd979bdSMark Fasheh 			}
597ccd979bdSMark Fasheh 
598ccd979bdSMark Fasheh 			el = &eb->h_list;
599ccd979bdSMark Fasheh 			i++;
600ccd979bdSMark Fasheh 			/* When we leave this loop, eb_bhs[num_bhs - 1] will
601ccd979bdSMark Fasheh 			 * hold the bottom-most leaf extent block. */
602ccd979bdSMark Fasheh 		}
603ccd979bdSMark Fasheh 		BUG_ON(el->l_tree_depth);
604ccd979bdSMark Fasheh 
605ccd979bdSMark Fasheh 		el = &fe->id2.i_list;
606ccd979bdSMark Fasheh 		/* If we have tree depth, then the fe update is
607ccd979bdSMark Fasheh 		 * trivial, and we want to switch el out for the
608ccd979bdSMark Fasheh 		 * bottom-most leaf in order to update it with the
609ccd979bdSMark Fasheh 		 * actual extent data below. */
610ccd979bdSMark Fasheh 		next_free = le16_to_cpu(el->l_next_free_rec);
611ccd979bdSMark Fasheh 		if (next_free == 0) {
612ccd979bdSMark Fasheh 			ocfs2_error(inode->i_sb,
613b0697053SMark Fasheh 				    "Dinode %llu has a bad extent list",
614b0697053SMark Fasheh 				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
615ccd979bdSMark Fasheh 			status = -EIO;
616ccd979bdSMark Fasheh 			goto bail;
617ccd979bdSMark Fasheh 		}
618ccd979bdSMark Fasheh 		le32_add_cpu(&el->l_recs[next_free - 1].e_clusters,
619ccd979bdSMark Fasheh 			     new_clusters);
620ccd979bdSMark Fasheh 		/* (num_bhs - 1) to avoid the leaf */
621ccd979bdSMark Fasheh 		for(i = 0; i < (num_bhs - 1); i++) {
622ccd979bdSMark Fasheh 			eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data;
623ccd979bdSMark Fasheh 			el = &eb->h_list;
624ccd979bdSMark Fasheh 
625ccd979bdSMark Fasheh 			/* finally, make our actual change to the
626ccd979bdSMark Fasheh 			 * intermediate extent blocks. */
627ccd979bdSMark Fasheh 			next_free = le16_to_cpu(el->l_next_free_rec);
628ccd979bdSMark Fasheh 			le32_add_cpu(&el->l_recs[next_free - 1].e_clusters,
629ccd979bdSMark Fasheh 				     new_clusters);
630ccd979bdSMark Fasheh 
631ccd979bdSMark Fasheh 			status = ocfs2_journal_dirty(handle, eb_bhs[i]);
632ccd979bdSMark Fasheh 			if (status < 0)
633ccd979bdSMark Fasheh 				mlog_errno(status);
634ccd979bdSMark Fasheh 		}
635ccd979bdSMark Fasheh 		BUG_ON(i != (num_bhs - 1));
636ccd979bdSMark Fasheh 		/* note that the leaf block wasn't touched in
637ccd979bdSMark Fasheh 		 * the loop above */
638ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) eb_bhs[num_bhs - 1]->b_data;
639ccd979bdSMark Fasheh 		el = &eb->h_list;
640ccd979bdSMark Fasheh 		BUG_ON(el->l_tree_depth);
641ccd979bdSMark Fasheh 	}
642ccd979bdSMark Fasheh 
643ccd979bdSMark Fasheh 	/* yay, we can finally add the actual extent now! */
644ccd979bdSMark Fasheh 	i = le16_to_cpu(el->l_next_free_rec) - 1;
645ccd979bdSMark Fasheh 	if (le16_to_cpu(el->l_next_free_rec) &&
646ccd979bdSMark Fasheh 	    ocfs2_extent_contig(inode, &el->l_recs[i], start_blk)) {
647ccd979bdSMark Fasheh 		le32_add_cpu(&el->l_recs[i].e_clusters, new_clusters);
648ccd979bdSMark Fasheh 	} else if (le16_to_cpu(el->l_next_free_rec) &&
649ccd979bdSMark Fasheh 		   (le32_to_cpu(el->l_recs[i].e_clusters) == 0)) {
650ccd979bdSMark Fasheh 		/* having an empty extent at eof is legal. */
651ccd979bdSMark Fasheh 		if (el->l_recs[i].e_cpos != fe->i_clusters) {
652ccd979bdSMark Fasheh 			ocfs2_error(inode->i_sb,
653b0697053SMark Fasheh 				    "Dinode %llu trailing extent is bad: "
654ccd979bdSMark Fasheh 				    "cpos (%u) != number of clusters (%u)",
655b0697053SMark Fasheh 				    (unsigned long long)OCFS2_I(inode)->ip_blkno,
656ccd979bdSMark Fasheh 				    le32_to_cpu(el->l_recs[i].e_cpos),
657ccd979bdSMark Fasheh 				    le32_to_cpu(fe->i_clusters));
658ccd979bdSMark Fasheh 			status = -EIO;
659ccd979bdSMark Fasheh 			goto bail;
660ccd979bdSMark Fasheh 		}
661ccd979bdSMark Fasheh 		el->l_recs[i].e_blkno = cpu_to_le64(start_blk);
662ccd979bdSMark Fasheh 		el->l_recs[i].e_clusters = cpu_to_le32(new_clusters);
663ccd979bdSMark Fasheh 	} else {
664ccd979bdSMark Fasheh 		/* No contiguous record, or no empty record at eof, so
665ccd979bdSMark Fasheh 		 * we add a new one. */
666ccd979bdSMark Fasheh 
667ccd979bdSMark Fasheh 		BUG_ON(le16_to_cpu(el->l_next_free_rec) >=
668ccd979bdSMark Fasheh 		       le16_to_cpu(el->l_count));
669ccd979bdSMark Fasheh 		i = le16_to_cpu(el->l_next_free_rec);
670ccd979bdSMark Fasheh 
671ccd979bdSMark Fasheh 		el->l_recs[i].e_blkno = cpu_to_le64(start_blk);
672ccd979bdSMark Fasheh 		el->l_recs[i].e_clusters = cpu_to_le32(new_clusters);
673ccd979bdSMark Fasheh 		el->l_recs[i].e_cpos = fe->i_clusters;
674ccd979bdSMark Fasheh 		le16_add_cpu(&el->l_next_free_rec, 1);
675ccd979bdSMark Fasheh 	}
676ccd979bdSMark Fasheh 
677ccd979bdSMark Fasheh 	/*
678ccd979bdSMark Fasheh 	 * extent_map errors are not fatal, so they are ignored outside
679ccd979bdSMark Fasheh 	 * of flushing the thing.
680ccd979bdSMark Fasheh 	 */
681ccd979bdSMark Fasheh 	status = ocfs2_extent_map_append(inode, &el->l_recs[i],
682ccd979bdSMark Fasheh 					 new_clusters);
683ccd979bdSMark Fasheh 	if (status) {
684ccd979bdSMark Fasheh 		mlog_errno(status);
685ccd979bdSMark Fasheh 		ocfs2_extent_map_drop(inode, le32_to_cpu(fe->i_clusters));
686ccd979bdSMark Fasheh 	}
687ccd979bdSMark Fasheh 
688ccd979bdSMark Fasheh 	status = ocfs2_journal_dirty(handle, fe_bh);
689ccd979bdSMark Fasheh 	if (status < 0)
690ccd979bdSMark Fasheh 		mlog_errno(status);
691ccd979bdSMark Fasheh 	if (fe->id2.i_list.l_tree_depth) {
692ccd979bdSMark Fasheh 		status = ocfs2_journal_dirty(handle, eb_bhs[num_bhs - 1]);
693ccd979bdSMark Fasheh 		if (status < 0)
694ccd979bdSMark Fasheh 			mlog_errno(status);
695ccd979bdSMark Fasheh 	}
696ccd979bdSMark Fasheh 
697ccd979bdSMark Fasheh 	status = 0;
698ccd979bdSMark Fasheh bail:
699ccd979bdSMark Fasheh 	if (eb_bhs) {
700ccd979bdSMark Fasheh 		for (i = 0; i < num_bhs; i++)
701ccd979bdSMark Fasheh 			if (eb_bhs[i])
702ccd979bdSMark Fasheh 				brelse(eb_bhs[i]);
703ccd979bdSMark Fasheh 		kfree(eb_bhs);
704ccd979bdSMark Fasheh 	}
705ccd979bdSMark Fasheh 
706ccd979bdSMark Fasheh 	mlog_exit(status);
707ccd979bdSMark Fasheh 	return status;
708ccd979bdSMark Fasheh }
709ccd979bdSMark Fasheh 
710ccd979bdSMark Fasheh /*
711ccd979bdSMark Fasheh  * Should only be called when there is no space left in any of the
712ccd979bdSMark Fasheh  * leaf nodes. What we want to do is find the lowest tree depth
713ccd979bdSMark Fasheh  * non-leaf extent block with room for new records. There are three
714ccd979bdSMark Fasheh  * valid results of this search:
715ccd979bdSMark Fasheh  *
716ccd979bdSMark Fasheh  * 1) a lowest extent block is found, then we pass it back in
717ccd979bdSMark Fasheh  *    *lowest_eb_bh and return '0'
718ccd979bdSMark Fasheh  *
719ccd979bdSMark Fasheh  * 2) the search fails to find anything, but the dinode has room. We
720ccd979bdSMark Fasheh  *    pass NULL back in *lowest_eb_bh, but still return '0'
721ccd979bdSMark Fasheh  *
722ccd979bdSMark Fasheh  * 3) the search fails to find anything AND the dinode is full, in
723ccd979bdSMark Fasheh  *    which case we return > 0
724ccd979bdSMark Fasheh  *
725ccd979bdSMark Fasheh  * return status < 0 indicates an error.
726ccd979bdSMark Fasheh  */
727ccd979bdSMark Fasheh static int ocfs2_find_branch_target(struct ocfs2_super *osb,
728ccd979bdSMark Fasheh 				    struct inode *inode,
729ccd979bdSMark Fasheh 				    struct buffer_head *fe_bh,
730ccd979bdSMark Fasheh 				    struct buffer_head **target_bh)
731ccd979bdSMark Fasheh {
732ccd979bdSMark Fasheh 	int status = 0, i;
733ccd979bdSMark Fasheh 	u64 blkno;
734ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
735ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
736ccd979bdSMark Fasheh 	struct ocfs2_extent_list  *el;
737ccd979bdSMark Fasheh 	struct buffer_head *bh = NULL;
738ccd979bdSMark Fasheh 	struct buffer_head *lowest_bh = NULL;
739ccd979bdSMark Fasheh 
740ccd979bdSMark Fasheh 	mlog_entry_void();
741ccd979bdSMark Fasheh 
742ccd979bdSMark Fasheh 	*target_bh = NULL;
743ccd979bdSMark Fasheh 
744ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
745ccd979bdSMark Fasheh 	el = &fe->id2.i_list;
746ccd979bdSMark Fasheh 
747ccd979bdSMark Fasheh 	while(le16_to_cpu(el->l_tree_depth) > 1) {
748ccd979bdSMark Fasheh 		if (le16_to_cpu(el->l_next_free_rec) == 0) {
749b0697053SMark Fasheh 			ocfs2_error(inode->i_sb, "Dinode %llu has empty "
750ccd979bdSMark Fasheh 				    "extent list (next_free_rec == 0)",
751b0697053SMark Fasheh 				    (unsigned long long)OCFS2_I(inode)->ip_blkno);
752ccd979bdSMark Fasheh 			status = -EIO;
753ccd979bdSMark Fasheh 			goto bail;
754ccd979bdSMark Fasheh 		}
755ccd979bdSMark Fasheh 		i = le16_to_cpu(el->l_next_free_rec) - 1;
756ccd979bdSMark Fasheh 		blkno = le64_to_cpu(el->l_recs[i].e_blkno);
757ccd979bdSMark Fasheh 		if (!blkno) {
758b0697053SMark Fasheh 			ocfs2_error(inode->i_sb, "Dinode %llu has extent "
759ccd979bdSMark Fasheh 				    "list where extent # %d has no physical "
760ccd979bdSMark Fasheh 				    "block start",
761b0697053SMark Fasheh 				    (unsigned long long)OCFS2_I(inode)->ip_blkno, i);
762ccd979bdSMark Fasheh 			status = -EIO;
763ccd979bdSMark Fasheh 			goto bail;
764ccd979bdSMark Fasheh 		}
765ccd979bdSMark Fasheh 
766ccd979bdSMark Fasheh 		if (bh) {
767ccd979bdSMark Fasheh 			brelse(bh);
768ccd979bdSMark Fasheh 			bh = NULL;
769ccd979bdSMark Fasheh 		}
770ccd979bdSMark Fasheh 
771ccd979bdSMark Fasheh 		status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED,
772ccd979bdSMark Fasheh 					  inode);
773ccd979bdSMark Fasheh 		if (status < 0) {
774ccd979bdSMark Fasheh 			mlog_errno(status);
775ccd979bdSMark Fasheh 			goto bail;
776ccd979bdSMark Fasheh 		}
777ccd979bdSMark Fasheh 
778ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) bh->b_data;
779ccd979bdSMark Fasheh 		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
780ccd979bdSMark Fasheh 			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
781ccd979bdSMark Fasheh 			status = -EIO;
782ccd979bdSMark Fasheh 			goto bail;
783ccd979bdSMark Fasheh 		}
784ccd979bdSMark Fasheh 		el = &eb->h_list;
785ccd979bdSMark Fasheh 
786ccd979bdSMark Fasheh 		if (le16_to_cpu(el->l_next_free_rec) <
787ccd979bdSMark Fasheh 		    le16_to_cpu(el->l_count)) {
788ccd979bdSMark Fasheh 			if (lowest_bh)
789ccd979bdSMark Fasheh 				brelse(lowest_bh);
790ccd979bdSMark Fasheh 			lowest_bh = bh;
791ccd979bdSMark Fasheh 			get_bh(lowest_bh);
792ccd979bdSMark Fasheh 		}
793ccd979bdSMark Fasheh 	}
794ccd979bdSMark Fasheh 
795ccd979bdSMark Fasheh 	/* If we didn't find one and the fe doesn't have any room,
796ccd979bdSMark Fasheh 	 * then return '1' */
797ccd979bdSMark Fasheh 	if (!lowest_bh
798ccd979bdSMark Fasheh 	    && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count))
799ccd979bdSMark Fasheh 		status = 1;
800ccd979bdSMark Fasheh 
801ccd979bdSMark Fasheh 	*target_bh = lowest_bh;
802ccd979bdSMark Fasheh bail:
803ccd979bdSMark Fasheh 	if (bh)
804ccd979bdSMark Fasheh 		brelse(bh);
805ccd979bdSMark Fasheh 
806ccd979bdSMark Fasheh 	mlog_exit(status);
807ccd979bdSMark Fasheh 	return status;
808ccd979bdSMark Fasheh }
809ccd979bdSMark Fasheh 
810ccd979bdSMark Fasheh /* the caller needs to update fe->i_clusters */
811ccd979bdSMark Fasheh int ocfs2_insert_extent(struct ocfs2_super *osb,
812ccd979bdSMark Fasheh 			struct ocfs2_journal_handle *handle,
813ccd979bdSMark Fasheh 			struct inode *inode,
814ccd979bdSMark Fasheh 			struct buffer_head *fe_bh,
815ccd979bdSMark Fasheh 			u64 start_blk,
816ccd979bdSMark Fasheh 			u32 new_clusters,
817ccd979bdSMark Fasheh 			struct ocfs2_alloc_context *meta_ac)
818ccd979bdSMark Fasheh {
819ccd979bdSMark Fasheh 	int status, i, shift;
820ccd979bdSMark Fasheh 	struct buffer_head *last_eb_bh = NULL;
821ccd979bdSMark Fasheh 	struct buffer_head *bh = NULL;
822ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
823ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
824ccd979bdSMark Fasheh 	struct ocfs2_extent_list  *el;
825ccd979bdSMark Fasheh 
826ccd979bdSMark Fasheh 	mlog_entry_void();
827ccd979bdSMark Fasheh 
828b0697053SMark Fasheh 	mlog(0, "add %u clusters starting at block %llu to inode %llu\n",
829b0697053SMark Fasheh 	     new_clusters, (unsigned long long)start_blk,
830b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
831ccd979bdSMark Fasheh 
832ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
833ccd979bdSMark Fasheh 	el = &fe->id2.i_list;
834ccd979bdSMark Fasheh 
835ccd979bdSMark Fasheh 	if (el->l_tree_depth) {
836ccd979bdSMark Fasheh 		/* jump to end of tree */
837ccd979bdSMark Fasheh 		status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
838ccd979bdSMark Fasheh 					  &last_eb_bh, OCFS2_BH_CACHED, inode);
839ccd979bdSMark Fasheh 		if (status < 0) {
840ccd979bdSMark Fasheh 			mlog_exit(status);
841ccd979bdSMark Fasheh 			goto bail;
842ccd979bdSMark Fasheh 		}
843ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
844ccd979bdSMark Fasheh 		el = &eb->h_list;
845ccd979bdSMark Fasheh 	}
846ccd979bdSMark Fasheh 
847ccd979bdSMark Fasheh 	/* Can we allocate without adding/shifting tree bits? */
848ccd979bdSMark Fasheh 	i = le16_to_cpu(el->l_next_free_rec) - 1;
849ccd979bdSMark Fasheh 	if (le16_to_cpu(el->l_next_free_rec) == 0
850ccd979bdSMark Fasheh 	    || (le16_to_cpu(el->l_next_free_rec) < le16_to_cpu(el->l_count))
851ccd979bdSMark Fasheh 	    || le32_to_cpu(el->l_recs[i].e_clusters) == 0
852ccd979bdSMark Fasheh 	    || ocfs2_extent_contig(inode, &el->l_recs[i], start_blk))
853ccd979bdSMark Fasheh 		goto out_add;
854ccd979bdSMark Fasheh 
855ccd979bdSMark Fasheh 	mlog(0, "ocfs2_allocate_extent: couldn't do a simple add, traversing "
856ccd979bdSMark Fasheh 	     "tree now.\n");
857ccd979bdSMark Fasheh 
858ccd979bdSMark Fasheh 	shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh);
859ccd979bdSMark Fasheh 	if (shift < 0) {
860ccd979bdSMark Fasheh 		status = shift;
861ccd979bdSMark Fasheh 		mlog_errno(status);
862ccd979bdSMark Fasheh 		goto bail;
863ccd979bdSMark Fasheh 	}
864ccd979bdSMark Fasheh 
865ccd979bdSMark Fasheh 	/* We traveled all the way to the bottom of the allocation tree
866ccd979bdSMark Fasheh 	 * and didn't find room for any more extents - we need to add
867ccd979bdSMark Fasheh 	 * another tree level */
868ccd979bdSMark Fasheh 	if (shift) {
869ccd979bdSMark Fasheh 		/* if we hit a leaf, we'd better be empty :) */
870ccd979bdSMark Fasheh 		BUG_ON(le16_to_cpu(el->l_next_free_rec) !=
871ccd979bdSMark Fasheh 		       le16_to_cpu(el->l_count));
872ccd979bdSMark Fasheh 		BUG_ON(bh);
873ccd979bdSMark Fasheh 		mlog(0, "ocfs2_allocate_extent: need to shift tree depth "
874ccd979bdSMark Fasheh 		     "(current = %u)\n",
875ccd979bdSMark Fasheh 		     le16_to_cpu(fe->id2.i_list.l_tree_depth));
876ccd979bdSMark Fasheh 
877ccd979bdSMark Fasheh 		/* ocfs2_shift_tree_depth will return us a buffer with
878ccd979bdSMark Fasheh 		 * the new extent block (so we can pass that to
879ccd979bdSMark Fasheh 		 * ocfs2_add_branch). */
880ccd979bdSMark Fasheh 		status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh,
881ccd979bdSMark Fasheh 						meta_ac, &bh);
882ccd979bdSMark Fasheh 		if (status < 0) {
883ccd979bdSMark Fasheh 			mlog_errno(status);
884ccd979bdSMark Fasheh 			goto bail;
885ccd979bdSMark Fasheh 		}
886ccd979bdSMark Fasheh 		/* Special case: we have room now if we shifted from
887ccd979bdSMark Fasheh 		 * tree_depth 0 */
888ccd979bdSMark Fasheh 		if (fe->id2.i_list.l_tree_depth == cpu_to_le16(1))
889ccd979bdSMark Fasheh 			goto out_add;
890ccd979bdSMark Fasheh 	}
891ccd979bdSMark Fasheh 
892ccd979bdSMark Fasheh 	/* call ocfs2_add_branch to add the final part of the tree with
893ccd979bdSMark Fasheh 	 * the new data. */
894ccd979bdSMark Fasheh 	mlog(0, "ocfs2_allocate_extent: add branch. bh = %p\n", bh);
895ccd979bdSMark Fasheh 	status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh,
896ccd979bdSMark Fasheh 				  meta_ac);
897ccd979bdSMark Fasheh 	if (status < 0) {
898ccd979bdSMark Fasheh 		mlog_errno(status);
899ccd979bdSMark Fasheh 		goto bail;
900ccd979bdSMark Fasheh 	}
901ccd979bdSMark Fasheh 
902ccd979bdSMark Fasheh out_add:
903ccd979bdSMark Fasheh 	/* Finally, we can add clusters. */
904ccd979bdSMark Fasheh 	status = ocfs2_do_insert_extent(osb, handle, inode, fe_bh,
905ccd979bdSMark Fasheh 					start_blk, new_clusters);
906ccd979bdSMark Fasheh 	if (status < 0)
907ccd979bdSMark Fasheh 		mlog_errno(status);
908ccd979bdSMark Fasheh 
909ccd979bdSMark Fasheh bail:
910ccd979bdSMark Fasheh 	if (bh)
911ccd979bdSMark Fasheh 		brelse(bh);
912ccd979bdSMark Fasheh 
913ccd979bdSMark Fasheh 	if (last_eb_bh)
914ccd979bdSMark Fasheh 		brelse(last_eb_bh);
915ccd979bdSMark Fasheh 
916ccd979bdSMark Fasheh 	mlog_exit(status);
917ccd979bdSMark Fasheh 	return status;
918ccd979bdSMark Fasheh }
919ccd979bdSMark Fasheh 
920ccd979bdSMark Fasheh static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
921ccd979bdSMark Fasheh {
922ccd979bdSMark Fasheh 	struct buffer_head *tl_bh = osb->osb_tl_bh;
923ccd979bdSMark Fasheh 	struct ocfs2_dinode *di;
924ccd979bdSMark Fasheh 	struct ocfs2_truncate_log *tl;
925ccd979bdSMark Fasheh 
926ccd979bdSMark Fasheh 	di = (struct ocfs2_dinode *) tl_bh->b_data;
927ccd979bdSMark Fasheh 	tl = &di->id2.i_dealloc;
928ccd979bdSMark Fasheh 
929ccd979bdSMark Fasheh 	mlog_bug_on_msg(le16_to_cpu(tl->tl_used) > le16_to_cpu(tl->tl_count),
930ccd979bdSMark Fasheh 			"slot %d, invalid truncate log parameters: used = "
931ccd979bdSMark Fasheh 			"%u, count = %u\n", osb->slot_num,
932ccd979bdSMark Fasheh 			le16_to_cpu(tl->tl_used), le16_to_cpu(tl->tl_count));
933ccd979bdSMark Fasheh 	return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count);
934ccd979bdSMark Fasheh }
935ccd979bdSMark Fasheh 
936ccd979bdSMark Fasheh static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl,
937ccd979bdSMark Fasheh 					   unsigned int new_start)
938ccd979bdSMark Fasheh {
939ccd979bdSMark Fasheh 	unsigned int tail_index;
940ccd979bdSMark Fasheh 	unsigned int current_tail;
941ccd979bdSMark Fasheh 
942ccd979bdSMark Fasheh 	/* No records, nothing to coalesce */
943ccd979bdSMark Fasheh 	if (!le16_to_cpu(tl->tl_used))
944ccd979bdSMark Fasheh 		return 0;
945ccd979bdSMark Fasheh 
946ccd979bdSMark Fasheh 	tail_index = le16_to_cpu(tl->tl_used) - 1;
947ccd979bdSMark Fasheh 	current_tail = le32_to_cpu(tl->tl_recs[tail_index].t_start);
948ccd979bdSMark Fasheh 	current_tail += le32_to_cpu(tl->tl_recs[tail_index].t_clusters);
949ccd979bdSMark Fasheh 
950ccd979bdSMark Fasheh 	return current_tail == new_start;
951ccd979bdSMark Fasheh }
952ccd979bdSMark Fasheh 
953ccd979bdSMark Fasheh static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
954ccd979bdSMark Fasheh 				     struct ocfs2_journal_handle *handle,
955ccd979bdSMark Fasheh 				     u64 start_blk,
956ccd979bdSMark Fasheh 				     unsigned int num_clusters)
957ccd979bdSMark Fasheh {
958ccd979bdSMark Fasheh 	int status, index;
959ccd979bdSMark Fasheh 	unsigned int start_cluster, tl_count;
960ccd979bdSMark Fasheh 	struct inode *tl_inode = osb->osb_tl_inode;
961ccd979bdSMark Fasheh 	struct buffer_head *tl_bh = osb->osb_tl_bh;
962ccd979bdSMark Fasheh 	struct ocfs2_dinode *di;
963ccd979bdSMark Fasheh 	struct ocfs2_truncate_log *tl;
964ccd979bdSMark Fasheh 
965b0697053SMark Fasheh 	mlog_entry("start_blk = %llu, num_clusters = %u\n",
966b0697053SMark Fasheh 		   (unsigned long long)start_blk, num_clusters);
967ccd979bdSMark Fasheh 
9681b1dcc1bSJes Sorensen 	BUG_ON(mutex_trylock(&tl_inode->i_mutex));
969ccd979bdSMark Fasheh 
970ccd979bdSMark Fasheh 	start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
971ccd979bdSMark Fasheh 
972ccd979bdSMark Fasheh 	di = (struct ocfs2_dinode *) tl_bh->b_data;
973ccd979bdSMark Fasheh 	tl = &di->id2.i_dealloc;
974ccd979bdSMark Fasheh 	if (!OCFS2_IS_VALID_DINODE(di)) {
975ccd979bdSMark Fasheh 		OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
976ccd979bdSMark Fasheh 		status = -EIO;
977ccd979bdSMark Fasheh 		goto bail;
978ccd979bdSMark Fasheh 	}
979ccd979bdSMark Fasheh 
980ccd979bdSMark Fasheh 	tl_count = le16_to_cpu(tl->tl_count);
981ccd979bdSMark Fasheh 	mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
982ccd979bdSMark Fasheh 			tl_count == 0,
983b0697053SMark Fasheh 			"Truncate record count on #%llu invalid "
984b0697053SMark Fasheh 			"wanted %u, actual %u\n",
985b0697053SMark Fasheh 			(unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
986ccd979bdSMark Fasheh 			ocfs2_truncate_recs_per_inode(osb->sb),
987ccd979bdSMark Fasheh 			le16_to_cpu(tl->tl_count));
988ccd979bdSMark Fasheh 
989ccd979bdSMark Fasheh 	/* Caller should have known to flush before calling us. */
990ccd979bdSMark Fasheh 	index = le16_to_cpu(tl->tl_used);
991ccd979bdSMark Fasheh 	if (index >= tl_count) {
992ccd979bdSMark Fasheh 		status = -ENOSPC;
993ccd979bdSMark Fasheh 		mlog_errno(status);
994ccd979bdSMark Fasheh 		goto bail;
995ccd979bdSMark Fasheh 	}
996ccd979bdSMark Fasheh 
997ccd979bdSMark Fasheh 	status = ocfs2_journal_access(handle, tl_inode, tl_bh,
998ccd979bdSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
999ccd979bdSMark Fasheh 	if (status < 0) {
1000ccd979bdSMark Fasheh 		mlog_errno(status);
1001ccd979bdSMark Fasheh 		goto bail;
1002ccd979bdSMark Fasheh 	}
1003ccd979bdSMark Fasheh 
1004ccd979bdSMark Fasheh 	mlog(0, "Log truncate of %u clusters starting at cluster %u to "
1005b0697053SMark Fasheh 	     "%llu (index = %d)\n", num_clusters, start_cluster,
1006b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index);
1007ccd979bdSMark Fasheh 
1008ccd979bdSMark Fasheh 	if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) {
1009ccd979bdSMark Fasheh 		/*
1010ccd979bdSMark Fasheh 		 * Move index back to the record we are coalescing with.
1011ccd979bdSMark Fasheh 		 * ocfs2_truncate_log_can_coalesce() guarantees nonzero
1012ccd979bdSMark Fasheh 		 */
1013ccd979bdSMark Fasheh 		index--;
1014ccd979bdSMark Fasheh 
1015ccd979bdSMark Fasheh 		num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters);
1016ccd979bdSMark Fasheh 		mlog(0, "Coalesce with index %u (start = %u, clusters = %u)\n",
1017ccd979bdSMark Fasheh 		     index, le32_to_cpu(tl->tl_recs[index].t_start),
1018ccd979bdSMark Fasheh 		     num_clusters);
1019ccd979bdSMark Fasheh 	} else {
1020ccd979bdSMark Fasheh 		tl->tl_recs[index].t_start = cpu_to_le32(start_cluster);
1021ccd979bdSMark Fasheh 		tl->tl_used = cpu_to_le16(index + 1);
1022ccd979bdSMark Fasheh 	}
1023ccd979bdSMark Fasheh 	tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters);
1024ccd979bdSMark Fasheh 
1025ccd979bdSMark Fasheh 	status = ocfs2_journal_dirty(handle, tl_bh);
1026ccd979bdSMark Fasheh 	if (status < 0) {
1027ccd979bdSMark Fasheh 		mlog_errno(status);
1028ccd979bdSMark Fasheh 		goto bail;
1029ccd979bdSMark Fasheh 	}
1030ccd979bdSMark Fasheh 
1031ccd979bdSMark Fasheh bail:
1032ccd979bdSMark Fasheh 	mlog_exit(status);
1033ccd979bdSMark Fasheh 	return status;
1034ccd979bdSMark Fasheh }
1035ccd979bdSMark Fasheh 
1036ccd979bdSMark Fasheh static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
1037ccd979bdSMark Fasheh 					 struct ocfs2_journal_handle *handle,
1038ccd979bdSMark Fasheh 					 struct inode *data_alloc_inode,
1039ccd979bdSMark Fasheh 					 struct buffer_head *data_alloc_bh)
1040ccd979bdSMark Fasheh {
1041ccd979bdSMark Fasheh 	int status = 0;
1042ccd979bdSMark Fasheh 	int i;
1043ccd979bdSMark Fasheh 	unsigned int num_clusters;
1044ccd979bdSMark Fasheh 	u64 start_blk;
1045ccd979bdSMark Fasheh 	struct ocfs2_truncate_rec rec;
1046ccd979bdSMark Fasheh 	struct ocfs2_dinode *di;
1047ccd979bdSMark Fasheh 	struct ocfs2_truncate_log *tl;
1048ccd979bdSMark Fasheh 	struct inode *tl_inode = osb->osb_tl_inode;
1049ccd979bdSMark Fasheh 	struct buffer_head *tl_bh = osb->osb_tl_bh;
1050ccd979bdSMark Fasheh 
1051ccd979bdSMark Fasheh 	mlog_entry_void();
1052ccd979bdSMark Fasheh 
1053ccd979bdSMark Fasheh 	di = (struct ocfs2_dinode *) tl_bh->b_data;
1054ccd979bdSMark Fasheh 	tl = &di->id2.i_dealloc;
1055ccd979bdSMark Fasheh 	i = le16_to_cpu(tl->tl_used) - 1;
1056ccd979bdSMark Fasheh 	while (i >= 0) {
1057ccd979bdSMark Fasheh 		/* Caller has given us at least enough credits to
1058ccd979bdSMark Fasheh 		 * update the truncate log dinode */
1059ccd979bdSMark Fasheh 		status = ocfs2_journal_access(handle, tl_inode, tl_bh,
1060ccd979bdSMark Fasheh 					      OCFS2_JOURNAL_ACCESS_WRITE);
1061ccd979bdSMark Fasheh 		if (status < 0) {
1062ccd979bdSMark Fasheh 			mlog_errno(status);
1063ccd979bdSMark Fasheh 			goto bail;
1064ccd979bdSMark Fasheh 		}
1065ccd979bdSMark Fasheh 
1066ccd979bdSMark Fasheh 		tl->tl_used = cpu_to_le16(i);
1067ccd979bdSMark Fasheh 
1068ccd979bdSMark Fasheh 		status = ocfs2_journal_dirty(handle, tl_bh);
1069ccd979bdSMark Fasheh 		if (status < 0) {
1070ccd979bdSMark Fasheh 			mlog_errno(status);
1071ccd979bdSMark Fasheh 			goto bail;
1072ccd979bdSMark Fasheh 		}
1073ccd979bdSMark Fasheh 
1074ccd979bdSMark Fasheh 		/* TODO: Perhaps we can calculate the bulk of the
1075ccd979bdSMark Fasheh 		 * credits up front rather than extending like
1076ccd979bdSMark Fasheh 		 * this. */
1077ccd979bdSMark Fasheh 		status = ocfs2_extend_trans(handle,
1078ccd979bdSMark Fasheh 					    OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
1079ccd979bdSMark Fasheh 		if (status < 0) {
1080ccd979bdSMark Fasheh 			mlog_errno(status);
1081ccd979bdSMark Fasheh 			goto bail;
1082ccd979bdSMark Fasheh 		}
1083ccd979bdSMark Fasheh 
1084ccd979bdSMark Fasheh 		rec = tl->tl_recs[i];
1085ccd979bdSMark Fasheh 		start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb,
1086ccd979bdSMark Fasheh 						    le32_to_cpu(rec.t_start));
1087ccd979bdSMark Fasheh 		num_clusters = le32_to_cpu(rec.t_clusters);
1088ccd979bdSMark Fasheh 
1089ccd979bdSMark Fasheh 		/* if start_blk is not set, we ignore the record as
1090ccd979bdSMark Fasheh 		 * invalid. */
1091ccd979bdSMark Fasheh 		if (start_blk) {
1092ccd979bdSMark Fasheh 			mlog(0, "free record %d, start = %u, clusters = %u\n",
1093ccd979bdSMark Fasheh 			     i, le32_to_cpu(rec.t_start), num_clusters);
1094ccd979bdSMark Fasheh 
1095ccd979bdSMark Fasheh 			status = ocfs2_free_clusters(handle, data_alloc_inode,
1096ccd979bdSMark Fasheh 						     data_alloc_bh, start_blk,
1097ccd979bdSMark Fasheh 						     num_clusters);
1098ccd979bdSMark Fasheh 			if (status < 0) {
1099ccd979bdSMark Fasheh 				mlog_errno(status);
1100ccd979bdSMark Fasheh 				goto bail;
1101ccd979bdSMark Fasheh 			}
1102ccd979bdSMark Fasheh 		}
1103ccd979bdSMark Fasheh 		i--;
1104ccd979bdSMark Fasheh 	}
1105ccd979bdSMark Fasheh 
1106ccd979bdSMark Fasheh bail:
1107ccd979bdSMark Fasheh 	mlog_exit(status);
1108ccd979bdSMark Fasheh 	return status;
1109ccd979bdSMark Fasheh }
1110ccd979bdSMark Fasheh 
11111b1dcc1bSJes Sorensen /* Expects you to already be holding tl_inode->i_mutex */
1112ccd979bdSMark Fasheh static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
1113ccd979bdSMark Fasheh {
1114ccd979bdSMark Fasheh 	int status;
1115ccd979bdSMark Fasheh 	unsigned int num_to_flush;
1116ccd979bdSMark Fasheh 	struct ocfs2_journal_handle *handle = NULL;
1117ccd979bdSMark Fasheh 	struct inode *tl_inode = osb->osb_tl_inode;
1118ccd979bdSMark Fasheh 	struct inode *data_alloc_inode = NULL;
1119ccd979bdSMark Fasheh 	struct buffer_head *tl_bh = osb->osb_tl_bh;
1120ccd979bdSMark Fasheh 	struct buffer_head *data_alloc_bh = NULL;
1121ccd979bdSMark Fasheh 	struct ocfs2_dinode *di;
1122ccd979bdSMark Fasheh 	struct ocfs2_truncate_log *tl;
1123ccd979bdSMark Fasheh 
1124ccd979bdSMark Fasheh 	mlog_entry_void();
1125ccd979bdSMark Fasheh 
11261b1dcc1bSJes Sorensen 	BUG_ON(mutex_trylock(&tl_inode->i_mutex));
1127ccd979bdSMark Fasheh 
1128ccd979bdSMark Fasheh 	di = (struct ocfs2_dinode *) tl_bh->b_data;
1129ccd979bdSMark Fasheh 	tl = &di->id2.i_dealloc;
1130ccd979bdSMark Fasheh 	if (!OCFS2_IS_VALID_DINODE(di)) {
1131ccd979bdSMark Fasheh 		OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
1132ccd979bdSMark Fasheh 		status = -EIO;
1133ccd979bdSMark Fasheh 		goto bail;
1134ccd979bdSMark Fasheh 	}
1135ccd979bdSMark Fasheh 
1136ccd979bdSMark Fasheh 	num_to_flush = le16_to_cpu(tl->tl_used);
1137b0697053SMark Fasheh 	mlog(0, "Flush %u records from truncate log #%llu\n",
1138b0697053SMark Fasheh 	     num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno);
1139ccd979bdSMark Fasheh 	if (!num_to_flush) {
1140ccd979bdSMark Fasheh 		status = 0;
1141ccd979bdSMark Fasheh 		goto bail;
1142ccd979bdSMark Fasheh 	}
1143ccd979bdSMark Fasheh 
1144ccd979bdSMark Fasheh 	handle = ocfs2_alloc_handle(osb);
1145ccd979bdSMark Fasheh 	if (!handle) {
1146ccd979bdSMark Fasheh 		status = -ENOMEM;
1147ccd979bdSMark Fasheh 		mlog_errno(status);
1148ccd979bdSMark Fasheh 		goto bail;
1149ccd979bdSMark Fasheh 	}
1150ccd979bdSMark Fasheh 
1151ccd979bdSMark Fasheh 	data_alloc_inode = ocfs2_get_system_file_inode(osb,
1152ccd979bdSMark Fasheh 						       GLOBAL_BITMAP_SYSTEM_INODE,
1153ccd979bdSMark Fasheh 						       OCFS2_INVALID_SLOT);
1154ccd979bdSMark Fasheh 	if (!data_alloc_inode) {
1155ccd979bdSMark Fasheh 		status = -EINVAL;
1156ccd979bdSMark Fasheh 		mlog(ML_ERROR, "Could not get bitmap inode!\n");
1157ccd979bdSMark Fasheh 		goto bail;
1158ccd979bdSMark Fasheh 	}
1159ccd979bdSMark Fasheh 
1160ccd979bdSMark Fasheh 	ocfs2_handle_add_inode(handle, data_alloc_inode);
1161ccd979bdSMark Fasheh 	status = ocfs2_meta_lock(data_alloc_inode, handle, &data_alloc_bh, 1);
1162ccd979bdSMark Fasheh 	if (status < 0) {
1163ccd979bdSMark Fasheh 		mlog_errno(status);
1164ccd979bdSMark Fasheh 		goto bail;
1165ccd979bdSMark Fasheh 	}
1166ccd979bdSMark Fasheh 
1167ccd979bdSMark Fasheh 	handle = ocfs2_start_trans(osb, handle, OCFS2_TRUNCATE_LOG_UPDATE);
1168ccd979bdSMark Fasheh 	if (IS_ERR(handle)) {
1169ccd979bdSMark Fasheh 		status = PTR_ERR(handle);
1170ccd979bdSMark Fasheh 		handle = NULL;
1171ccd979bdSMark Fasheh 		mlog_errno(status);
1172ccd979bdSMark Fasheh 		goto bail;
1173ccd979bdSMark Fasheh 	}
1174ccd979bdSMark Fasheh 
1175ccd979bdSMark Fasheh 	status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode,
1176ccd979bdSMark Fasheh 					       data_alloc_bh);
1177ccd979bdSMark Fasheh 	if (status < 0) {
1178ccd979bdSMark Fasheh 		mlog_errno(status);
1179ccd979bdSMark Fasheh 		goto bail;
1180ccd979bdSMark Fasheh 	}
1181ccd979bdSMark Fasheh 
1182ccd979bdSMark Fasheh bail:
1183ccd979bdSMark Fasheh 	if (handle)
1184ccd979bdSMark Fasheh 		ocfs2_commit_trans(handle);
1185ccd979bdSMark Fasheh 
1186ccd979bdSMark Fasheh 	if (data_alloc_inode)
1187ccd979bdSMark Fasheh 		iput(data_alloc_inode);
1188ccd979bdSMark Fasheh 
1189ccd979bdSMark Fasheh 	if (data_alloc_bh)
1190ccd979bdSMark Fasheh 		brelse(data_alloc_bh);
1191ccd979bdSMark Fasheh 
1192ccd979bdSMark Fasheh 	mlog_exit(status);
1193ccd979bdSMark Fasheh 	return status;
1194ccd979bdSMark Fasheh }
1195ccd979bdSMark Fasheh 
1196ccd979bdSMark Fasheh int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
1197ccd979bdSMark Fasheh {
1198ccd979bdSMark Fasheh 	int status;
1199ccd979bdSMark Fasheh 	struct inode *tl_inode = osb->osb_tl_inode;
1200ccd979bdSMark Fasheh 
12011b1dcc1bSJes Sorensen 	mutex_lock(&tl_inode->i_mutex);
1202ccd979bdSMark Fasheh 	status = __ocfs2_flush_truncate_log(osb);
12031b1dcc1bSJes Sorensen 	mutex_unlock(&tl_inode->i_mutex);
1204ccd979bdSMark Fasheh 
1205ccd979bdSMark Fasheh 	return status;
1206ccd979bdSMark Fasheh }
1207ccd979bdSMark Fasheh 
1208ccd979bdSMark Fasheh static void ocfs2_truncate_log_worker(void *data)
1209ccd979bdSMark Fasheh {
1210ccd979bdSMark Fasheh 	int status;
1211ccd979bdSMark Fasheh 	struct ocfs2_super *osb = data;
1212ccd979bdSMark Fasheh 
1213ccd979bdSMark Fasheh 	mlog_entry_void();
1214ccd979bdSMark Fasheh 
1215ccd979bdSMark Fasheh 	status = ocfs2_flush_truncate_log(osb);
1216ccd979bdSMark Fasheh 	if (status < 0)
1217ccd979bdSMark Fasheh 		mlog_errno(status);
1218ccd979bdSMark Fasheh 
1219ccd979bdSMark Fasheh 	mlog_exit(status);
1220ccd979bdSMark Fasheh }
1221ccd979bdSMark Fasheh 
1222ccd979bdSMark Fasheh #define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ)
1223ccd979bdSMark Fasheh void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
1224ccd979bdSMark Fasheh 				       int cancel)
1225ccd979bdSMark Fasheh {
1226ccd979bdSMark Fasheh 	if (osb->osb_tl_inode) {
1227ccd979bdSMark Fasheh 		/* We want to push off log flushes while truncates are
1228ccd979bdSMark Fasheh 		 * still running. */
1229ccd979bdSMark Fasheh 		if (cancel)
1230ccd979bdSMark Fasheh 			cancel_delayed_work(&osb->osb_truncate_log_wq);
1231ccd979bdSMark Fasheh 
1232ccd979bdSMark Fasheh 		queue_delayed_work(ocfs2_wq, &osb->osb_truncate_log_wq,
1233ccd979bdSMark Fasheh 				   OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL);
1234ccd979bdSMark Fasheh 	}
1235ccd979bdSMark Fasheh }
1236ccd979bdSMark Fasheh 
1237ccd979bdSMark Fasheh static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
1238ccd979bdSMark Fasheh 				       int slot_num,
1239ccd979bdSMark Fasheh 				       struct inode **tl_inode,
1240ccd979bdSMark Fasheh 				       struct buffer_head **tl_bh)
1241ccd979bdSMark Fasheh {
1242ccd979bdSMark Fasheh 	int status;
1243ccd979bdSMark Fasheh 	struct inode *inode = NULL;
1244ccd979bdSMark Fasheh 	struct buffer_head *bh = NULL;
1245ccd979bdSMark Fasheh 
1246ccd979bdSMark Fasheh 	inode = ocfs2_get_system_file_inode(osb,
1247ccd979bdSMark Fasheh 					   TRUNCATE_LOG_SYSTEM_INODE,
1248ccd979bdSMark Fasheh 					   slot_num);
1249ccd979bdSMark Fasheh 	if (!inode) {
1250ccd979bdSMark Fasheh 		status = -EINVAL;
1251ccd979bdSMark Fasheh 		mlog(ML_ERROR, "Could not get load truncate log inode!\n");
1252ccd979bdSMark Fasheh 		goto bail;
1253ccd979bdSMark Fasheh 	}
1254ccd979bdSMark Fasheh 
1255ccd979bdSMark Fasheh 	status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
1256ccd979bdSMark Fasheh 				  OCFS2_BH_CACHED, inode);
1257ccd979bdSMark Fasheh 	if (status < 0) {
1258ccd979bdSMark Fasheh 		iput(inode);
1259ccd979bdSMark Fasheh 		mlog_errno(status);
1260ccd979bdSMark Fasheh 		goto bail;
1261ccd979bdSMark Fasheh 	}
1262ccd979bdSMark Fasheh 
1263ccd979bdSMark Fasheh 	*tl_inode = inode;
1264ccd979bdSMark Fasheh 	*tl_bh    = bh;
1265ccd979bdSMark Fasheh bail:
1266ccd979bdSMark Fasheh 	mlog_exit(status);
1267ccd979bdSMark Fasheh 	return status;
1268ccd979bdSMark Fasheh }
1269ccd979bdSMark Fasheh 
1270ccd979bdSMark Fasheh /* called during the 1st stage of node recovery. we stamp a clean
1271ccd979bdSMark Fasheh  * truncate log and pass back a copy for processing later. if the
1272ccd979bdSMark Fasheh  * truncate log does not require processing, a *tl_copy is set to
1273ccd979bdSMark Fasheh  * NULL. */
1274ccd979bdSMark Fasheh int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
1275ccd979bdSMark Fasheh 				      int slot_num,
1276ccd979bdSMark Fasheh 				      struct ocfs2_dinode **tl_copy)
1277ccd979bdSMark Fasheh {
1278ccd979bdSMark Fasheh 	int status;
1279ccd979bdSMark Fasheh 	struct inode *tl_inode = NULL;
1280ccd979bdSMark Fasheh 	struct buffer_head *tl_bh = NULL;
1281ccd979bdSMark Fasheh 	struct ocfs2_dinode *di;
1282ccd979bdSMark Fasheh 	struct ocfs2_truncate_log *tl;
1283ccd979bdSMark Fasheh 
1284ccd979bdSMark Fasheh 	*tl_copy = NULL;
1285ccd979bdSMark Fasheh 
1286ccd979bdSMark Fasheh 	mlog(0, "recover truncate log from slot %d\n", slot_num);
1287ccd979bdSMark Fasheh 
1288ccd979bdSMark Fasheh 	status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh);
1289ccd979bdSMark Fasheh 	if (status < 0) {
1290ccd979bdSMark Fasheh 		mlog_errno(status);
1291ccd979bdSMark Fasheh 		goto bail;
1292ccd979bdSMark Fasheh 	}
1293ccd979bdSMark Fasheh 
1294ccd979bdSMark Fasheh 	di = (struct ocfs2_dinode *) tl_bh->b_data;
1295ccd979bdSMark Fasheh 	tl = &di->id2.i_dealloc;
1296ccd979bdSMark Fasheh 	if (!OCFS2_IS_VALID_DINODE(di)) {
1297ccd979bdSMark Fasheh 		OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di);
1298ccd979bdSMark Fasheh 		status = -EIO;
1299ccd979bdSMark Fasheh 		goto bail;
1300ccd979bdSMark Fasheh 	}
1301ccd979bdSMark Fasheh 
1302ccd979bdSMark Fasheh 	if (le16_to_cpu(tl->tl_used)) {
1303ccd979bdSMark Fasheh 		mlog(0, "We'll have %u logs to recover\n",
1304ccd979bdSMark Fasheh 		     le16_to_cpu(tl->tl_used));
1305ccd979bdSMark Fasheh 
1306ccd979bdSMark Fasheh 		*tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
1307ccd979bdSMark Fasheh 		if (!(*tl_copy)) {
1308ccd979bdSMark Fasheh 			status = -ENOMEM;
1309ccd979bdSMark Fasheh 			mlog_errno(status);
1310ccd979bdSMark Fasheh 			goto bail;
1311ccd979bdSMark Fasheh 		}
1312ccd979bdSMark Fasheh 
1313ccd979bdSMark Fasheh 		/* Assuming the write-out below goes well, this copy
1314ccd979bdSMark Fasheh 		 * will be passed back to recovery for processing. */
1315ccd979bdSMark Fasheh 		memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size);
1316ccd979bdSMark Fasheh 
1317ccd979bdSMark Fasheh 		/* All we need to do to clear the truncate log is set
1318ccd979bdSMark Fasheh 		 * tl_used. */
1319ccd979bdSMark Fasheh 		tl->tl_used = 0;
1320ccd979bdSMark Fasheh 
1321ccd979bdSMark Fasheh 		status = ocfs2_write_block(osb, tl_bh, tl_inode);
1322ccd979bdSMark Fasheh 		if (status < 0) {
1323ccd979bdSMark Fasheh 			mlog_errno(status);
1324ccd979bdSMark Fasheh 			goto bail;
1325ccd979bdSMark Fasheh 		}
1326ccd979bdSMark Fasheh 	}
1327ccd979bdSMark Fasheh 
1328ccd979bdSMark Fasheh bail:
1329ccd979bdSMark Fasheh 	if (tl_inode)
1330ccd979bdSMark Fasheh 		iput(tl_inode);
1331ccd979bdSMark Fasheh 	if (tl_bh)
1332ccd979bdSMark Fasheh 		brelse(tl_bh);
1333ccd979bdSMark Fasheh 
1334ccd979bdSMark Fasheh 	if (status < 0 && (*tl_copy)) {
1335ccd979bdSMark Fasheh 		kfree(*tl_copy);
1336ccd979bdSMark Fasheh 		*tl_copy = NULL;
1337ccd979bdSMark Fasheh 	}
1338ccd979bdSMark Fasheh 
1339ccd979bdSMark Fasheh 	mlog_exit(status);
1340ccd979bdSMark Fasheh 	return status;
1341ccd979bdSMark Fasheh }
1342ccd979bdSMark Fasheh 
1343ccd979bdSMark Fasheh int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
1344ccd979bdSMark Fasheh 					 struct ocfs2_dinode *tl_copy)
1345ccd979bdSMark Fasheh {
1346ccd979bdSMark Fasheh 	int status = 0;
1347ccd979bdSMark Fasheh 	int i;
1348ccd979bdSMark Fasheh 	unsigned int clusters, num_recs, start_cluster;
1349ccd979bdSMark Fasheh 	u64 start_blk;
1350ccd979bdSMark Fasheh 	struct ocfs2_journal_handle *handle;
1351ccd979bdSMark Fasheh 	struct inode *tl_inode = osb->osb_tl_inode;
1352ccd979bdSMark Fasheh 	struct ocfs2_truncate_log *tl;
1353ccd979bdSMark Fasheh 
1354ccd979bdSMark Fasheh 	mlog_entry_void();
1355ccd979bdSMark Fasheh 
1356ccd979bdSMark Fasheh 	if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) {
1357ccd979bdSMark Fasheh 		mlog(ML_ERROR, "Asked to recover my own truncate log!\n");
1358ccd979bdSMark Fasheh 		return -EINVAL;
1359ccd979bdSMark Fasheh 	}
1360ccd979bdSMark Fasheh 
1361ccd979bdSMark Fasheh 	tl = &tl_copy->id2.i_dealloc;
1362ccd979bdSMark Fasheh 	num_recs = le16_to_cpu(tl->tl_used);
1363b0697053SMark Fasheh 	mlog(0, "cleanup %u records from %llu\n", num_recs,
1364b0697053SMark Fasheh 	     (unsigned long long)tl_copy->i_blkno);
1365ccd979bdSMark Fasheh 
13661b1dcc1bSJes Sorensen 	mutex_lock(&tl_inode->i_mutex);
1367ccd979bdSMark Fasheh 	for(i = 0; i < num_recs; i++) {
1368ccd979bdSMark Fasheh 		if (ocfs2_truncate_log_needs_flush(osb)) {
1369ccd979bdSMark Fasheh 			status = __ocfs2_flush_truncate_log(osb);
1370ccd979bdSMark Fasheh 			if (status < 0) {
1371ccd979bdSMark Fasheh 				mlog_errno(status);
1372ccd979bdSMark Fasheh 				goto bail_up;
1373ccd979bdSMark Fasheh 			}
1374ccd979bdSMark Fasheh 		}
1375ccd979bdSMark Fasheh 
1376ccd979bdSMark Fasheh 		handle = ocfs2_start_trans(osb, NULL,
1377ccd979bdSMark Fasheh 					   OCFS2_TRUNCATE_LOG_UPDATE);
1378ccd979bdSMark Fasheh 		if (IS_ERR(handle)) {
1379ccd979bdSMark Fasheh 			status = PTR_ERR(handle);
1380ccd979bdSMark Fasheh 			mlog_errno(status);
1381ccd979bdSMark Fasheh 			goto bail_up;
1382ccd979bdSMark Fasheh 		}
1383ccd979bdSMark Fasheh 
1384ccd979bdSMark Fasheh 		clusters = le32_to_cpu(tl->tl_recs[i].t_clusters);
1385ccd979bdSMark Fasheh 		start_cluster = le32_to_cpu(tl->tl_recs[i].t_start);
1386ccd979bdSMark Fasheh 		start_blk = ocfs2_clusters_to_blocks(osb->sb, start_cluster);
1387ccd979bdSMark Fasheh 
1388ccd979bdSMark Fasheh 		status = ocfs2_truncate_log_append(osb, handle,
1389ccd979bdSMark Fasheh 						   start_blk, clusters);
1390ccd979bdSMark Fasheh 		ocfs2_commit_trans(handle);
1391ccd979bdSMark Fasheh 		if (status < 0) {
1392ccd979bdSMark Fasheh 			mlog_errno(status);
1393ccd979bdSMark Fasheh 			goto bail_up;
1394ccd979bdSMark Fasheh 		}
1395ccd979bdSMark Fasheh 	}
1396ccd979bdSMark Fasheh 
1397ccd979bdSMark Fasheh bail_up:
13981b1dcc1bSJes Sorensen 	mutex_unlock(&tl_inode->i_mutex);
1399ccd979bdSMark Fasheh 
1400ccd979bdSMark Fasheh 	mlog_exit(status);
1401ccd979bdSMark Fasheh 	return status;
1402ccd979bdSMark Fasheh }
1403ccd979bdSMark Fasheh 
1404ccd979bdSMark Fasheh void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
1405ccd979bdSMark Fasheh {
1406ccd979bdSMark Fasheh 	int status;
1407ccd979bdSMark Fasheh 	struct inode *tl_inode = osb->osb_tl_inode;
1408ccd979bdSMark Fasheh 
1409ccd979bdSMark Fasheh 	mlog_entry_void();
1410ccd979bdSMark Fasheh 
1411ccd979bdSMark Fasheh 	if (tl_inode) {
1412ccd979bdSMark Fasheh 		cancel_delayed_work(&osb->osb_truncate_log_wq);
1413ccd979bdSMark Fasheh 		flush_workqueue(ocfs2_wq);
1414ccd979bdSMark Fasheh 
1415ccd979bdSMark Fasheh 		status = ocfs2_flush_truncate_log(osb);
1416ccd979bdSMark Fasheh 		if (status < 0)
1417ccd979bdSMark Fasheh 			mlog_errno(status);
1418ccd979bdSMark Fasheh 
1419ccd979bdSMark Fasheh 		brelse(osb->osb_tl_bh);
1420ccd979bdSMark Fasheh 		iput(osb->osb_tl_inode);
1421ccd979bdSMark Fasheh 	}
1422ccd979bdSMark Fasheh 
1423ccd979bdSMark Fasheh 	mlog_exit_void();
1424ccd979bdSMark Fasheh }
1425ccd979bdSMark Fasheh 
1426ccd979bdSMark Fasheh int ocfs2_truncate_log_init(struct ocfs2_super *osb)
1427ccd979bdSMark Fasheh {
1428ccd979bdSMark Fasheh 	int status;
1429ccd979bdSMark Fasheh 	struct inode *tl_inode = NULL;
1430ccd979bdSMark Fasheh 	struct buffer_head *tl_bh = NULL;
1431ccd979bdSMark Fasheh 
1432ccd979bdSMark Fasheh 	mlog_entry_void();
1433ccd979bdSMark Fasheh 
1434ccd979bdSMark Fasheh 	status = ocfs2_get_truncate_log_info(osb,
1435ccd979bdSMark Fasheh 					     osb->slot_num,
1436ccd979bdSMark Fasheh 					     &tl_inode,
1437ccd979bdSMark Fasheh 					     &tl_bh);
1438ccd979bdSMark Fasheh 	if (status < 0)
1439ccd979bdSMark Fasheh 		mlog_errno(status);
1440ccd979bdSMark Fasheh 
1441ccd979bdSMark Fasheh 	/* ocfs2_truncate_log_shutdown keys on the existence of
1442ccd979bdSMark Fasheh 	 * osb->osb_tl_inode so we don't set any of the osb variables
1443ccd979bdSMark Fasheh 	 * until we're sure all is well. */
1444ccd979bdSMark Fasheh 	INIT_WORK(&osb->osb_truncate_log_wq, ocfs2_truncate_log_worker, osb);
1445ccd979bdSMark Fasheh 	osb->osb_tl_bh    = tl_bh;
1446ccd979bdSMark Fasheh 	osb->osb_tl_inode = tl_inode;
1447ccd979bdSMark Fasheh 
1448ccd979bdSMark Fasheh 	mlog_exit(status);
1449ccd979bdSMark Fasheh 	return status;
1450ccd979bdSMark Fasheh }
1451ccd979bdSMark Fasheh 
1452ccd979bdSMark Fasheh /* This function will figure out whether the currently last extent
1453ccd979bdSMark Fasheh  * block will be deleted, and if it will, what the new last extent
1454ccd979bdSMark Fasheh  * block will be so we can update his h_next_leaf_blk field, as well
1455ccd979bdSMark Fasheh  * as the dinodes i_last_eb_blk */
1456ccd979bdSMark Fasheh static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb,
1457ccd979bdSMark Fasheh 				       struct inode *inode,
1458ccd979bdSMark Fasheh 				       struct ocfs2_dinode *fe,
1459ccd979bdSMark Fasheh 				       u32 new_i_clusters,
1460ccd979bdSMark Fasheh 				       struct buffer_head *old_last_eb,
1461ccd979bdSMark Fasheh 				       struct buffer_head **new_last_eb)
1462ccd979bdSMark Fasheh {
1463ccd979bdSMark Fasheh 	int i, status = 0;
1464ccd979bdSMark Fasheh 	u64 block = 0;
1465ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
1466ccd979bdSMark Fasheh 	struct ocfs2_extent_list *el;
1467ccd979bdSMark Fasheh 	struct buffer_head *bh = NULL;
1468ccd979bdSMark Fasheh 
1469ccd979bdSMark Fasheh 	*new_last_eb = NULL;
1470ccd979bdSMark Fasheh 
1471ccd979bdSMark Fasheh 	if (!OCFS2_IS_VALID_DINODE(fe)) {
1472ccd979bdSMark Fasheh 		OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
1473ccd979bdSMark Fasheh 		status = -EIO;
1474ccd979bdSMark Fasheh 		goto bail;
1475ccd979bdSMark Fasheh 	}
1476ccd979bdSMark Fasheh 
1477ccd979bdSMark Fasheh 	/* we have no tree, so of course, no last_eb. */
1478ccd979bdSMark Fasheh 	if (!fe->id2.i_list.l_tree_depth)
1479ccd979bdSMark Fasheh 		goto bail;
1480ccd979bdSMark Fasheh 
1481ccd979bdSMark Fasheh 	/* trunc to zero special case - this makes tree_depth = 0
1482ccd979bdSMark Fasheh 	 * regardless of what it is.  */
1483ccd979bdSMark Fasheh 	if (!new_i_clusters)
1484ccd979bdSMark Fasheh 		goto bail;
1485ccd979bdSMark Fasheh 
1486ccd979bdSMark Fasheh 	eb = (struct ocfs2_extent_block *) old_last_eb->b_data;
1487ccd979bdSMark Fasheh 	el = &(eb->h_list);
1488ccd979bdSMark Fasheh 	BUG_ON(!el->l_next_free_rec);
1489ccd979bdSMark Fasheh 
1490ccd979bdSMark Fasheh 	/* Make sure that this guy will actually be empty after we
1491ccd979bdSMark Fasheh 	 * clear away the data. */
1492ccd979bdSMark Fasheh 	if (le32_to_cpu(el->l_recs[0].e_cpos) < new_i_clusters)
1493ccd979bdSMark Fasheh 		goto bail;
1494ccd979bdSMark Fasheh 
1495ccd979bdSMark Fasheh 	/* Ok, at this point, we know that last_eb will definitely
1496ccd979bdSMark Fasheh 	 * change, so lets traverse the tree and find the second to
1497ccd979bdSMark Fasheh 	 * last extent block. */
1498ccd979bdSMark Fasheh 	el = &(fe->id2.i_list);
1499ccd979bdSMark Fasheh 	/* go down the tree, */
1500ccd979bdSMark Fasheh 	do {
1501ccd979bdSMark Fasheh 		for(i = (le16_to_cpu(el->l_next_free_rec) - 1); i >= 0; i--) {
1502ccd979bdSMark Fasheh 			if (le32_to_cpu(el->l_recs[i].e_cpos) <
1503ccd979bdSMark Fasheh 			    new_i_clusters) {
1504ccd979bdSMark Fasheh 				block = le64_to_cpu(el->l_recs[i].e_blkno);
1505ccd979bdSMark Fasheh 				break;
1506ccd979bdSMark Fasheh 			}
1507ccd979bdSMark Fasheh 		}
1508ccd979bdSMark Fasheh 		BUG_ON(i < 0);
1509ccd979bdSMark Fasheh 
1510ccd979bdSMark Fasheh 		if (bh) {
1511ccd979bdSMark Fasheh 			brelse(bh);
1512ccd979bdSMark Fasheh 			bh = NULL;
1513ccd979bdSMark Fasheh 		}
1514ccd979bdSMark Fasheh 
1515ccd979bdSMark Fasheh 		status = ocfs2_read_block(osb, block, &bh, OCFS2_BH_CACHED,
1516ccd979bdSMark Fasheh 					 inode);
1517ccd979bdSMark Fasheh 		if (status < 0) {
1518ccd979bdSMark Fasheh 			mlog_errno(status);
1519ccd979bdSMark Fasheh 			goto bail;
1520ccd979bdSMark Fasheh 		}
1521ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) bh->b_data;
1522ccd979bdSMark Fasheh 		el = &eb->h_list;
1523ccd979bdSMark Fasheh 		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
1524ccd979bdSMark Fasheh 			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
1525ccd979bdSMark Fasheh 			status = -EIO;
1526ccd979bdSMark Fasheh 			goto bail;
1527ccd979bdSMark Fasheh 		}
1528ccd979bdSMark Fasheh 	} while (el->l_tree_depth);
1529ccd979bdSMark Fasheh 
1530ccd979bdSMark Fasheh 	*new_last_eb = bh;
1531ccd979bdSMark Fasheh 	get_bh(*new_last_eb);
1532b0697053SMark Fasheh 	mlog(0, "returning block %llu\n",
1533b0697053SMark Fasheh 	     (unsigned long long)le64_to_cpu(eb->h_blkno));
1534ccd979bdSMark Fasheh bail:
1535ccd979bdSMark Fasheh 	if (bh)
1536ccd979bdSMark Fasheh 		brelse(bh);
1537ccd979bdSMark Fasheh 
1538ccd979bdSMark Fasheh 	return status;
1539ccd979bdSMark Fasheh }
1540ccd979bdSMark Fasheh 
1541ccd979bdSMark Fasheh static int ocfs2_do_truncate(struct ocfs2_super *osb,
1542ccd979bdSMark Fasheh 			     unsigned int clusters_to_del,
1543ccd979bdSMark Fasheh 			     struct inode *inode,
1544ccd979bdSMark Fasheh 			     struct buffer_head *fe_bh,
1545ccd979bdSMark Fasheh 			     struct buffer_head *old_last_eb_bh,
1546ccd979bdSMark Fasheh 			     struct ocfs2_journal_handle *handle,
1547ccd979bdSMark Fasheh 			     struct ocfs2_truncate_context *tc)
1548ccd979bdSMark Fasheh {
1549ccd979bdSMark Fasheh 	int status, i, depth;
1550ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
1551ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
1552ccd979bdSMark Fasheh 	struct ocfs2_extent_block *last_eb = NULL;
1553ccd979bdSMark Fasheh 	struct ocfs2_extent_list *el;
1554ccd979bdSMark Fasheh 	struct buffer_head *eb_bh = NULL;
1555ccd979bdSMark Fasheh 	struct buffer_head *last_eb_bh = NULL;
1556ccd979bdSMark Fasheh 	u64 next_eb = 0;
1557ccd979bdSMark Fasheh 	u64 delete_blk = 0;
1558ccd979bdSMark Fasheh 
1559ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
1560ccd979bdSMark Fasheh 
1561ccd979bdSMark Fasheh 	status = ocfs2_find_new_last_ext_blk(osb,
1562ccd979bdSMark Fasheh 					     inode,
1563ccd979bdSMark Fasheh 					     fe,
1564ccd979bdSMark Fasheh 					     le32_to_cpu(fe->i_clusters) -
1565ccd979bdSMark Fasheh 					     		clusters_to_del,
1566ccd979bdSMark Fasheh 					     old_last_eb_bh,
1567ccd979bdSMark Fasheh 					     &last_eb_bh);
1568ccd979bdSMark Fasheh 	if (status < 0) {
1569ccd979bdSMark Fasheh 		mlog_errno(status);
1570ccd979bdSMark Fasheh 		goto bail;
1571ccd979bdSMark Fasheh 	}
1572ccd979bdSMark Fasheh 	if (last_eb_bh)
1573ccd979bdSMark Fasheh 		last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
1574ccd979bdSMark Fasheh 
1575ccd979bdSMark Fasheh 	status = ocfs2_journal_access(handle, inode, fe_bh,
1576ccd979bdSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
1577ccd979bdSMark Fasheh 	if (status < 0) {
1578ccd979bdSMark Fasheh 		mlog_errno(status);
1579ccd979bdSMark Fasheh 		goto bail;
1580ccd979bdSMark Fasheh 	}
1581ccd979bdSMark Fasheh 	el = &(fe->id2.i_list);
1582ccd979bdSMark Fasheh 
1583ccd979bdSMark Fasheh 	spin_lock(&OCFS2_I(inode)->ip_lock);
1584ccd979bdSMark Fasheh 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
1585ccd979bdSMark Fasheh 				      clusters_to_del;
1586ccd979bdSMark Fasheh 	spin_unlock(&OCFS2_I(inode)->ip_lock);
1587ccd979bdSMark Fasheh 	le32_add_cpu(&fe->i_clusters, -clusters_to_del);
1588ccd979bdSMark Fasheh 	fe->i_mtime = cpu_to_le64(CURRENT_TIME.tv_sec);
1589ccd979bdSMark Fasheh 	fe->i_mtime_nsec = cpu_to_le32(CURRENT_TIME.tv_nsec);
1590ccd979bdSMark Fasheh 
1591ccd979bdSMark Fasheh 	i = le16_to_cpu(el->l_next_free_rec) - 1;
1592ccd979bdSMark Fasheh 
1593ccd979bdSMark Fasheh 	BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del);
1594ccd979bdSMark Fasheh 	le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del);
1595ccd979bdSMark Fasheh 	/* tree depth zero, we can just delete the clusters, otherwise
1596ccd979bdSMark Fasheh 	 * we need to record the offset of the next level extent block
1597ccd979bdSMark Fasheh 	 * as we may overwrite it. */
1598ccd979bdSMark Fasheh 	if (!el->l_tree_depth)
1599ccd979bdSMark Fasheh 		delete_blk = le64_to_cpu(el->l_recs[i].e_blkno)
1600ccd979bdSMark Fasheh 			+ ocfs2_clusters_to_blocks(osb->sb,
1601ccd979bdSMark Fasheh 					le32_to_cpu(el->l_recs[i].e_clusters));
1602ccd979bdSMark Fasheh 	else
1603ccd979bdSMark Fasheh 		next_eb = le64_to_cpu(el->l_recs[i].e_blkno);
1604ccd979bdSMark Fasheh 
1605ccd979bdSMark Fasheh 	if (!el->l_recs[i].e_clusters) {
1606ccd979bdSMark Fasheh 		/* if we deleted the whole extent record, then clear
1607ccd979bdSMark Fasheh 		 * out the other fields and update the extent
1608ccd979bdSMark Fasheh 		 * list. For depth > 0 trees, we've already recorded
1609ccd979bdSMark Fasheh 		 * the extent block in 'next_eb' */
1610ccd979bdSMark Fasheh 		el->l_recs[i].e_cpos = 0;
1611ccd979bdSMark Fasheh 		el->l_recs[i].e_blkno = 0;
1612ccd979bdSMark Fasheh 		BUG_ON(!el->l_next_free_rec);
1613ccd979bdSMark Fasheh 		le16_add_cpu(&el->l_next_free_rec, -1);
1614ccd979bdSMark Fasheh 	}
1615ccd979bdSMark Fasheh 
1616ccd979bdSMark Fasheh 	depth = le16_to_cpu(el->l_tree_depth);
1617ccd979bdSMark Fasheh 	if (!fe->i_clusters) {
1618ccd979bdSMark Fasheh 		/* trunc to zero is a special case. */
1619ccd979bdSMark Fasheh 		el->l_tree_depth = 0;
1620ccd979bdSMark Fasheh 		fe->i_last_eb_blk = 0;
1621ccd979bdSMark Fasheh 	} else if (last_eb)
1622ccd979bdSMark Fasheh 		fe->i_last_eb_blk = last_eb->h_blkno;
1623ccd979bdSMark Fasheh 
1624ccd979bdSMark Fasheh 	status = ocfs2_journal_dirty(handle, fe_bh);
1625ccd979bdSMark Fasheh 	if (status < 0) {
1626ccd979bdSMark Fasheh 		mlog_errno(status);
1627ccd979bdSMark Fasheh 		goto bail;
1628ccd979bdSMark Fasheh 	}
1629ccd979bdSMark Fasheh 
1630ccd979bdSMark Fasheh 	if (last_eb) {
1631ccd979bdSMark Fasheh 		/* If there will be a new last extent block, then by
1632ccd979bdSMark Fasheh 		 * definition, there cannot be any leaves to the right of
1633ccd979bdSMark Fasheh 		 * him. */
1634ccd979bdSMark Fasheh 		status = ocfs2_journal_access(handle, inode, last_eb_bh,
1635ccd979bdSMark Fasheh 					      OCFS2_JOURNAL_ACCESS_WRITE);
1636ccd979bdSMark Fasheh 		if (status < 0) {
1637ccd979bdSMark Fasheh 			mlog_errno(status);
1638ccd979bdSMark Fasheh 			goto bail;
1639ccd979bdSMark Fasheh 		}
1640ccd979bdSMark Fasheh 		last_eb->h_next_leaf_blk = 0;
1641ccd979bdSMark Fasheh 		status = ocfs2_journal_dirty(handle, last_eb_bh);
1642ccd979bdSMark Fasheh 		if (status < 0) {
1643ccd979bdSMark Fasheh 			mlog_errno(status);
1644ccd979bdSMark Fasheh 			goto bail;
1645ccd979bdSMark Fasheh 		}
1646ccd979bdSMark Fasheh 	}
1647ccd979bdSMark Fasheh 
1648ccd979bdSMark Fasheh 	/* if our tree depth > 0, update all the tree blocks below us. */
1649ccd979bdSMark Fasheh 	while (depth) {
1650b0697053SMark Fasheh 		mlog(0, "traveling tree (depth = %d, next_eb = %llu)\n",
1651b0697053SMark Fasheh 		     depth,  (unsigned long long)next_eb);
1652ccd979bdSMark Fasheh 		status = ocfs2_read_block(osb, next_eb, &eb_bh,
1653ccd979bdSMark Fasheh 					  OCFS2_BH_CACHED, inode);
1654ccd979bdSMark Fasheh 		if (status < 0) {
1655ccd979bdSMark Fasheh 			mlog_errno(status);
1656ccd979bdSMark Fasheh 			goto bail;
1657ccd979bdSMark Fasheh 		}
1658ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *)eb_bh->b_data;
1659ccd979bdSMark Fasheh 		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
1660ccd979bdSMark Fasheh 			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
1661ccd979bdSMark Fasheh 			status = -EIO;
1662ccd979bdSMark Fasheh 			goto bail;
1663ccd979bdSMark Fasheh 		}
1664ccd979bdSMark Fasheh 		el = &(eb->h_list);
1665ccd979bdSMark Fasheh 
1666ccd979bdSMark Fasheh 		status = ocfs2_journal_access(handle, inode, eb_bh,
1667ccd979bdSMark Fasheh 					      OCFS2_JOURNAL_ACCESS_WRITE);
1668ccd979bdSMark Fasheh 		if (status < 0) {
1669ccd979bdSMark Fasheh 			mlog_errno(status);
1670ccd979bdSMark Fasheh 			goto bail;
1671ccd979bdSMark Fasheh 		}
1672ccd979bdSMark Fasheh 
1673ccd979bdSMark Fasheh 		BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
1674ccd979bdSMark Fasheh 		BUG_ON(depth != (le16_to_cpu(el->l_tree_depth) + 1));
1675ccd979bdSMark Fasheh 
1676ccd979bdSMark Fasheh 		i = le16_to_cpu(el->l_next_free_rec) - 1;
1677ccd979bdSMark Fasheh 
1678b0697053SMark Fasheh 		mlog(0, "extent block %llu, before: record %d: "
1679b0697053SMark Fasheh 		     "(%u, %u, %llu), next = %u\n",
1680b0697053SMark Fasheh 		     (unsigned long long)le64_to_cpu(eb->h_blkno), i,
1681ccd979bdSMark Fasheh 		     le32_to_cpu(el->l_recs[i].e_cpos),
1682ccd979bdSMark Fasheh 		     le32_to_cpu(el->l_recs[i].e_clusters),
1683b0697053SMark Fasheh 		     (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno),
1684ccd979bdSMark Fasheh 		     le16_to_cpu(el->l_next_free_rec));
1685ccd979bdSMark Fasheh 
1686ccd979bdSMark Fasheh 		BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del);
1687ccd979bdSMark Fasheh 		le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del);
1688ccd979bdSMark Fasheh 
1689ccd979bdSMark Fasheh 		next_eb = le64_to_cpu(el->l_recs[i].e_blkno);
1690ccd979bdSMark Fasheh 		/* bottom-most block requires us to delete data.*/
1691ccd979bdSMark Fasheh 		if (!el->l_tree_depth)
1692ccd979bdSMark Fasheh 			delete_blk = le64_to_cpu(el->l_recs[i].e_blkno)
1693ccd979bdSMark Fasheh 				+ ocfs2_clusters_to_blocks(osb->sb,
1694ccd979bdSMark Fasheh 					le32_to_cpu(el->l_recs[i].e_clusters));
1695ccd979bdSMark Fasheh 		if (!el->l_recs[i].e_clusters) {
1696ccd979bdSMark Fasheh 			el->l_recs[i].e_cpos = 0;
1697ccd979bdSMark Fasheh 			el->l_recs[i].e_blkno = 0;
1698ccd979bdSMark Fasheh 			BUG_ON(!el->l_next_free_rec);
1699ccd979bdSMark Fasheh 			le16_add_cpu(&el->l_next_free_rec, -1);
1700ccd979bdSMark Fasheh 		}
1701b0697053SMark Fasheh 		mlog(0, "extent block %llu, after: record %d: "
1702b0697053SMark Fasheh 		     "(%u, %u, %llu), next = %u\n",
1703b0697053SMark Fasheh 		     (unsigned long long)le64_to_cpu(eb->h_blkno), i,
1704ccd979bdSMark Fasheh 		     le32_to_cpu(el->l_recs[i].e_cpos),
1705ccd979bdSMark Fasheh 		     le32_to_cpu(el->l_recs[i].e_clusters),
1706b0697053SMark Fasheh 		     (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno),
1707ccd979bdSMark Fasheh 		     le16_to_cpu(el->l_next_free_rec));
1708ccd979bdSMark Fasheh 
1709ccd979bdSMark Fasheh 		status = ocfs2_journal_dirty(handle, eb_bh);
1710ccd979bdSMark Fasheh 		if (status < 0) {
1711ccd979bdSMark Fasheh 			mlog_errno(status);
1712ccd979bdSMark Fasheh 			goto bail;
1713ccd979bdSMark Fasheh 		}
1714ccd979bdSMark Fasheh 
1715ccd979bdSMark Fasheh 		if (!el->l_next_free_rec) {
1716ccd979bdSMark Fasheh 			mlog(0, "deleting this extent block.\n");
1717ccd979bdSMark Fasheh 
1718ccd979bdSMark Fasheh 			ocfs2_remove_from_cache(inode, eb_bh);
1719ccd979bdSMark Fasheh 
1720ccd979bdSMark Fasheh 			BUG_ON(eb->h_suballoc_slot);
1721ccd979bdSMark Fasheh 			BUG_ON(el->l_recs[0].e_clusters);
1722ccd979bdSMark Fasheh 			BUG_ON(el->l_recs[0].e_cpos);
1723ccd979bdSMark Fasheh 			BUG_ON(el->l_recs[0].e_blkno);
1724ccd979bdSMark Fasheh 			status = ocfs2_free_extent_block(handle,
1725ccd979bdSMark Fasheh 							 tc->tc_ext_alloc_inode,
1726ccd979bdSMark Fasheh 							 tc->tc_ext_alloc_bh,
1727ccd979bdSMark Fasheh 							 eb);
1728ccd979bdSMark Fasheh 			if (status < 0) {
1729ccd979bdSMark Fasheh 				mlog_errno(status);
1730ccd979bdSMark Fasheh 				goto bail;
1731ccd979bdSMark Fasheh 			}
1732ccd979bdSMark Fasheh 		}
1733ccd979bdSMark Fasheh 		brelse(eb_bh);
1734ccd979bdSMark Fasheh 		eb_bh = NULL;
1735ccd979bdSMark Fasheh 		depth--;
1736ccd979bdSMark Fasheh 	}
1737ccd979bdSMark Fasheh 
1738ccd979bdSMark Fasheh 	BUG_ON(!delete_blk);
1739ccd979bdSMark Fasheh 	status = ocfs2_truncate_log_append(osb, handle, delete_blk,
1740ccd979bdSMark Fasheh 					   clusters_to_del);
1741ccd979bdSMark Fasheh 	if (status < 0) {
1742ccd979bdSMark Fasheh 		mlog_errno(status);
1743ccd979bdSMark Fasheh 		goto bail;
1744ccd979bdSMark Fasheh 	}
1745ccd979bdSMark Fasheh 	status = 0;
1746ccd979bdSMark Fasheh bail:
1747ccd979bdSMark Fasheh 	if (!status)
1748ccd979bdSMark Fasheh 		ocfs2_extent_map_trunc(inode, le32_to_cpu(fe->i_clusters));
1749ccd979bdSMark Fasheh 	else
1750ccd979bdSMark Fasheh 		ocfs2_extent_map_drop(inode, 0);
1751ccd979bdSMark Fasheh 	mlog_exit(status);
1752ccd979bdSMark Fasheh 	return status;
1753ccd979bdSMark Fasheh }
1754ccd979bdSMark Fasheh 
1755ccd979bdSMark Fasheh /*
1756ccd979bdSMark Fasheh  * It is expected, that by the time you call this function,
1757ccd979bdSMark Fasheh  * inode->i_size and fe->i_size have been adjusted.
1758ccd979bdSMark Fasheh  *
1759ccd979bdSMark Fasheh  * WARNING: This will kfree the truncate context
1760ccd979bdSMark Fasheh  */
1761ccd979bdSMark Fasheh int ocfs2_commit_truncate(struct ocfs2_super *osb,
1762ccd979bdSMark Fasheh 			  struct inode *inode,
1763ccd979bdSMark Fasheh 			  struct buffer_head *fe_bh,
1764ccd979bdSMark Fasheh 			  struct ocfs2_truncate_context *tc)
1765ccd979bdSMark Fasheh {
1766ccd979bdSMark Fasheh 	int status, i, credits, tl_sem = 0;
1767ccd979bdSMark Fasheh 	u32 clusters_to_del, target_i_clusters;
1768ccd979bdSMark Fasheh 	u64 last_eb = 0;
1769ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
1770ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
1771ccd979bdSMark Fasheh 	struct ocfs2_extent_list *el;
1772ccd979bdSMark Fasheh 	struct buffer_head *last_eb_bh;
1773ccd979bdSMark Fasheh 	struct ocfs2_journal_handle *handle = NULL;
1774ccd979bdSMark Fasheh 	struct inode *tl_inode = osb->osb_tl_inode;
1775ccd979bdSMark Fasheh 
1776ccd979bdSMark Fasheh 	mlog_entry_void();
1777ccd979bdSMark Fasheh 
1778ccd979bdSMark Fasheh 	down_write(&OCFS2_I(inode)->ip_alloc_sem);
1779ccd979bdSMark Fasheh 
1780ccd979bdSMark Fasheh 	target_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
1781ccd979bdSMark Fasheh 						     i_size_read(inode));
1782ccd979bdSMark Fasheh 
1783ccd979bdSMark Fasheh 	last_eb_bh = tc->tc_last_eb_bh;
1784ccd979bdSMark Fasheh 	tc->tc_last_eb_bh = NULL;
1785ccd979bdSMark Fasheh 
1786ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
1787ccd979bdSMark Fasheh 
1788ccd979bdSMark Fasheh 	if (fe->id2.i_list.l_tree_depth) {
1789ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
1790ccd979bdSMark Fasheh 		el = &eb->h_list;
1791ccd979bdSMark Fasheh 	} else
1792ccd979bdSMark Fasheh 		el = &fe->id2.i_list;
1793ccd979bdSMark Fasheh 	last_eb = le64_to_cpu(fe->i_last_eb_blk);
1794ccd979bdSMark Fasheh start:
1795ccd979bdSMark Fasheh 	mlog(0, "ocfs2_commit_truncate: fe->i_clusters = %u, "
1796b0697053SMark Fasheh 	     "last_eb = %llu, fe->i_last_eb_blk = %llu, "
1797ccd979bdSMark Fasheh 	     "fe->id2.i_list.l_tree_depth = %u last_eb_bh = %p\n",
1798b0697053SMark Fasheh 	     le32_to_cpu(fe->i_clusters), (unsigned long long)last_eb,
1799b0697053SMark Fasheh 	     (unsigned long long)le64_to_cpu(fe->i_last_eb_blk),
1800ccd979bdSMark Fasheh 	     le16_to_cpu(fe->id2.i_list.l_tree_depth), last_eb_bh);
1801ccd979bdSMark Fasheh 
1802ccd979bdSMark Fasheh 	if (last_eb != le64_to_cpu(fe->i_last_eb_blk)) {
1803ccd979bdSMark Fasheh 		mlog(0, "last_eb changed!\n");
1804ccd979bdSMark Fasheh 		BUG_ON(!fe->id2.i_list.l_tree_depth);
1805ccd979bdSMark Fasheh 		last_eb = le64_to_cpu(fe->i_last_eb_blk);
1806ccd979bdSMark Fasheh 		/* i_last_eb_blk may have changed, read it if
1807ccd979bdSMark Fasheh 		 * necessary. We don't have to worry about the
1808ccd979bdSMark Fasheh 		 * truncate to zero case here (where there becomes no
1809ccd979bdSMark Fasheh 		 * last_eb) because we never loop back after our work
1810ccd979bdSMark Fasheh 		 * is done. */
1811ccd979bdSMark Fasheh 		if (last_eb_bh) {
1812ccd979bdSMark Fasheh 			brelse(last_eb_bh);
1813ccd979bdSMark Fasheh 			last_eb_bh = NULL;
1814ccd979bdSMark Fasheh 		}
1815ccd979bdSMark Fasheh 
1816ccd979bdSMark Fasheh 		status = ocfs2_read_block(osb, last_eb,
1817ccd979bdSMark Fasheh 					  &last_eb_bh, OCFS2_BH_CACHED,
1818ccd979bdSMark Fasheh 					  inode);
1819ccd979bdSMark Fasheh 		if (status < 0) {
1820ccd979bdSMark Fasheh 			mlog_errno(status);
1821ccd979bdSMark Fasheh 			goto bail;
1822ccd979bdSMark Fasheh 		}
1823ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
1824ccd979bdSMark Fasheh 		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
1825ccd979bdSMark Fasheh 			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
1826ccd979bdSMark Fasheh 			status = -EIO;
1827ccd979bdSMark Fasheh 			goto bail;
1828ccd979bdSMark Fasheh 		}
1829ccd979bdSMark Fasheh 		el = &(eb->h_list);
1830ccd979bdSMark Fasheh 	}
1831ccd979bdSMark Fasheh 
1832ccd979bdSMark Fasheh 	/* by now, el will point to the extent list on the bottom most
1833ccd979bdSMark Fasheh 	 * portion of this tree. */
1834ccd979bdSMark Fasheh 	i = le16_to_cpu(el->l_next_free_rec) - 1;
1835ccd979bdSMark Fasheh 	if (le32_to_cpu(el->l_recs[i].e_cpos) >= target_i_clusters)
1836ccd979bdSMark Fasheh 		clusters_to_del = le32_to_cpu(el->l_recs[i].e_clusters);
1837ccd979bdSMark Fasheh 	else
1838ccd979bdSMark Fasheh 		clusters_to_del = (le32_to_cpu(el->l_recs[i].e_clusters) +
1839ccd979bdSMark Fasheh 				   le32_to_cpu(el->l_recs[i].e_cpos)) -
1840ccd979bdSMark Fasheh 				  target_i_clusters;
1841ccd979bdSMark Fasheh 
1842ccd979bdSMark Fasheh 	mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del);
1843ccd979bdSMark Fasheh 
18441b1dcc1bSJes Sorensen 	mutex_lock(&tl_inode->i_mutex);
1845ccd979bdSMark Fasheh 	tl_sem = 1;
1846ccd979bdSMark Fasheh 	/* ocfs2_truncate_log_needs_flush guarantees us at least one
1847ccd979bdSMark Fasheh 	 * record is free for use. If there isn't any, we flush to get
1848ccd979bdSMark Fasheh 	 * an empty truncate log.  */
1849ccd979bdSMark Fasheh 	if (ocfs2_truncate_log_needs_flush(osb)) {
1850ccd979bdSMark Fasheh 		status = __ocfs2_flush_truncate_log(osb);
1851ccd979bdSMark Fasheh 		if (status < 0) {
1852ccd979bdSMark Fasheh 			mlog_errno(status);
1853ccd979bdSMark Fasheh 			goto bail;
1854ccd979bdSMark Fasheh 		}
1855ccd979bdSMark Fasheh 	}
1856ccd979bdSMark Fasheh 
1857ccd979bdSMark Fasheh 	credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
1858ccd979bdSMark Fasheh 						fe, el);
1859ccd979bdSMark Fasheh 	handle = ocfs2_start_trans(osb, NULL, credits);
1860ccd979bdSMark Fasheh 	if (IS_ERR(handle)) {
1861ccd979bdSMark Fasheh 		status = PTR_ERR(handle);
1862ccd979bdSMark Fasheh 		handle = NULL;
1863ccd979bdSMark Fasheh 		mlog_errno(status);
1864ccd979bdSMark Fasheh 		goto bail;
1865ccd979bdSMark Fasheh 	}
1866ccd979bdSMark Fasheh 
1867ccd979bdSMark Fasheh 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
1868ccd979bdSMark Fasheh 	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
1869ccd979bdSMark Fasheh 	if (status < 0)
1870ccd979bdSMark Fasheh 		mlog_errno(status);
1871ccd979bdSMark Fasheh 
1872ccd979bdSMark Fasheh 	status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh,
1873ccd979bdSMark Fasheh 				   last_eb_bh, handle, tc);
1874ccd979bdSMark Fasheh 	if (status < 0) {
1875ccd979bdSMark Fasheh 		mlog_errno(status);
1876ccd979bdSMark Fasheh 		goto bail;
1877ccd979bdSMark Fasheh 	}
1878ccd979bdSMark Fasheh 
18791b1dcc1bSJes Sorensen 	mutex_unlock(&tl_inode->i_mutex);
1880ccd979bdSMark Fasheh 	tl_sem = 0;
1881ccd979bdSMark Fasheh 
1882ccd979bdSMark Fasheh 	ocfs2_commit_trans(handle);
1883ccd979bdSMark Fasheh 	handle = NULL;
1884ccd979bdSMark Fasheh 
1885ccd979bdSMark Fasheh 	BUG_ON(le32_to_cpu(fe->i_clusters) < target_i_clusters);
1886ccd979bdSMark Fasheh 	if (le32_to_cpu(fe->i_clusters) > target_i_clusters)
1887ccd979bdSMark Fasheh 		goto start;
1888ccd979bdSMark Fasheh bail:
1889ccd979bdSMark Fasheh 	up_write(&OCFS2_I(inode)->ip_alloc_sem);
1890ccd979bdSMark Fasheh 
1891ccd979bdSMark Fasheh 	ocfs2_schedule_truncate_log_flush(osb, 1);
1892ccd979bdSMark Fasheh 
1893ccd979bdSMark Fasheh 	if (tl_sem)
18941b1dcc1bSJes Sorensen 		mutex_unlock(&tl_inode->i_mutex);
1895ccd979bdSMark Fasheh 
1896ccd979bdSMark Fasheh 	if (handle)
1897ccd979bdSMark Fasheh 		ocfs2_commit_trans(handle);
1898ccd979bdSMark Fasheh 
1899ccd979bdSMark Fasheh 	if (last_eb_bh)
1900ccd979bdSMark Fasheh 		brelse(last_eb_bh);
1901ccd979bdSMark Fasheh 
1902ccd979bdSMark Fasheh 	/* This will drop the ext_alloc cluster lock for us */
1903ccd979bdSMark Fasheh 	ocfs2_free_truncate_context(tc);
1904ccd979bdSMark Fasheh 
1905ccd979bdSMark Fasheh 	mlog_exit(status);
1906ccd979bdSMark Fasheh 	return status;
1907ccd979bdSMark Fasheh }
1908ccd979bdSMark Fasheh 
1909ccd979bdSMark Fasheh 
1910ccd979bdSMark Fasheh /*
1911ccd979bdSMark Fasheh  * Expects the inode to already be locked. This will figure out which
1912ccd979bdSMark Fasheh  * inodes need to be locked and will put them on the returned truncate
1913ccd979bdSMark Fasheh  * context.
1914ccd979bdSMark Fasheh  */
1915ccd979bdSMark Fasheh int ocfs2_prepare_truncate(struct ocfs2_super *osb,
1916ccd979bdSMark Fasheh 			   struct inode *inode,
1917ccd979bdSMark Fasheh 			   struct buffer_head *fe_bh,
1918ccd979bdSMark Fasheh 			   struct ocfs2_truncate_context **tc)
1919ccd979bdSMark Fasheh {
1920ccd979bdSMark Fasheh 	int status, metadata_delete;
1921ccd979bdSMark Fasheh 	unsigned int new_i_clusters;
1922ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
1923ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
1924ccd979bdSMark Fasheh 	struct ocfs2_extent_list *el;
1925ccd979bdSMark Fasheh 	struct buffer_head *last_eb_bh = NULL;
1926ccd979bdSMark Fasheh 	struct inode *ext_alloc_inode = NULL;
1927ccd979bdSMark Fasheh 	struct buffer_head *ext_alloc_bh = NULL;
1928ccd979bdSMark Fasheh 
1929ccd979bdSMark Fasheh 	mlog_entry_void();
1930ccd979bdSMark Fasheh 
1931ccd979bdSMark Fasheh 	*tc = NULL;
1932ccd979bdSMark Fasheh 
1933ccd979bdSMark Fasheh 	new_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
1934ccd979bdSMark Fasheh 						  i_size_read(inode));
1935ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
1936ccd979bdSMark Fasheh 
1937ccd979bdSMark Fasheh 	mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
1938b0697053SMark Fasheh 	     "%llu\n", fe->i_clusters, new_i_clusters,
1939b0697053SMark Fasheh 	     (unsigned long long)fe->i_size);
1940ccd979bdSMark Fasheh 
1941ccd979bdSMark Fasheh 	if (le32_to_cpu(fe->i_clusters) <= new_i_clusters) {
1942b0697053SMark Fasheh 		ocfs2_error(inode->i_sb, "Dinode %llu has cluster count "
1943b0697053SMark Fasheh 			    "%u and size %llu whereas struct inode has "
1944ccd979bdSMark Fasheh 			    "cluster count %u and size %llu which caused an "
1945ccd979bdSMark Fasheh 			    "invalid truncate to %u clusters.",
1946b0697053SMark Fasheh 			    (unsigned long long)le64_to_cpu(fe->i_blkno),
1947ccd979bdSMark Fasheh 			    le32_to_cpu(fe->i_clusters),
1948b0697053SMark Fasheh 			    (unsigned long long)le64_to_cpu(fe->i_size),
1949ccd979bdSMark Fasheh 			    OCFS2_I(inode)->ip_clusters, i_size_read(inode),
1950ccd979bdSMark Fasheh 			    new_i_clusters);
1951ccd979bdSMark Fasheh 		mlog_meta_lvb(ML_ERROR, &OCFS2_I(inode)->ip_meta_lockres);
1952ccd979bdSMark Fasheh 		status = -EIO;
1953ccd979bdSMark Fasheh 		goto bail;
1954ccd979bdSMark Fasheh 	}
1955ccd979bdSMark Fasheh 
1956ccd979bdSMark Fasheh 	*tc = kcalloc(1, sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
1957ccd979bdSMark Fasheh 	if (!(*tc)) {
1958ccd979bdSMark Fasheh 		status = -ENOMEM;
1959ccd979bdSMark Fasheh 		mlog_errno(status);
1960ccd979bdSMark Fasheh 		goto bail;
1961ccd979bdSMark Fasheh 	}
1962ccd979bdSMark Fasheh 
1963ccd979bdSMark Fasheh 	metadata_delete = 0;
1964ccd979bdSMark Fasheh 	if (fe->id2.i_list.l_tree_depth) {
1965ccd979bdSMark Fasheh 		/* If we have a tree, then the truncate may result in
1966ccd979bdSMark Fasheh 		 * metadata deletes. Figure this out from the
1967ccd979bdSMark Fasheh 		 * rightmost leaf block.*/
1968ccd979bdSMark Fasheh 		status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
1969ccd979bdSMark Fasheh 					  &last_eb_bh, OCFS2_BH_CACHED, inode);
1970ccd979bdSMark Fasheh 		if (status < 0) {
1971ccd979bdSMark Fasheh 			mlog_errno(status);
1972ccd979bdSMark Fasheh 			goto bail;
1973ccd979bdSMark Fasheh 		}
1974ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
1975ccd979bdSMark Fasheh 		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
1976ccd979bdSMark Fasheh 			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
1977ccd979bdSMark Fasheh 
1978ccd979bdSMark Fasheh 			brelse(last_eb_bh);
1979ccd979bdSMark Fasheh 			status = -EIO;
1980ccd979bdSMark Fasheh 			goto bail;
1981ccd979bdSMark Fasheh 		}
1982ccd979bdSMark Fasheh 		el = &(eb->h_list);
1983ccd979bdSMark Fasheh 		if (le32_to_cpu(el->l_recs[0].e_cpos) >= new_i_clusters)
1984ccd979bdSMark Fasheh 			metadata_delete = 1;
1985ccd979bdSMark Fasheh 	}
1986ccd979bdSMark Fasheh 
1987ccd979bdSMark Fasheh 	(*tc)->tc_last_eb_bh = last_eb_bh;
1988ccd979bdSMark Fasheh 
1989ccd979bdSMark Fasheh 	if (metadata_delete) {
1990ccd979bdSMark Fasheh 		mlog(0, "Will have to delete metadata for this trunc. "
1991ccd979bdSMark Fasheh 		     "locking allocator.\n");
1992ccd979bdSMark Fasheh 		ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0);
1993ccd979bdSMark Fasheh 		if (!ext_alloc_inode) {
1994ccd979bdSMark Fasheh 			status = -ENOMEM;
1995ccd979bdSMark Fasheh 			mlog_errno(status);
1996ccd979bdSMark Fasheh 			goto bail;
1997ccd979bdSMark Fasheh 		}
1998ccd979bdSMark Fasheh 
19991b1dcc1bSJes Sorensen 		mutex_lock(&ext_alloc_inode->i_mutex);
2000ccd979bdSMark Fasheh 		(*tc)->tc_ext_alloc_inode = ext_alloc_inode;
2001ccd979bdSMark Fasheh 
2002ccd979bdSMark Fasheh 		status = ocfs2_meta_lock(ext_alloc_inode,
2003ccd979bdSMark Fasheh 					 NULL,
2004ccd979bdSMark Fasheh 					 &ext_alloc_bh,
2005ccd979bdSMark Fasheh 					 1);
2006ccd979bdSMark Fasheh 		if (status < 0) {
2007ccd979bdSMark Fasheh 			mlog_errno(status);
2008ccd979bdSMark Fasheh 			goto bail;
2009ccd979bdSMark Fasheh 		}
2010ccd979bdSMark Fasheh 		(*tc)->tc_ext_alloc_bh = ext_alloc_bh;
2011ccd979bdSMark Fasheh 		(*tc)->tc_ext_alloc_locked = 1;
2012ccd979bdSMark Fasheh 	}
2013ccd979bdSMark Fasheh 
2014ccd979bdSMark Fasheh 	status = 0;
2015ccd979bdSMark Fasheh bail:
2016ccd979bdSMark Fasheh 	if (status < 0) {
2017ccd979bdSMark Fasheh 		if (*tc)
2018ccd979bdSMark Fasheh 			ocfs2_free_truncate_context(*tc);
2019ccd979bdSMark Fasheh 		*tc = NULL;
2020ccd979bdSMark Fasheh 	}
2021ccd979bdSMark Fasheh 	mlog_exit_void();
2022ccd979bdSMark Fasheh 	return status;
2023ccd979bdSMark Fasheh }
2024ccd979bdSMark Fasheh 
2025ccd979bdSMark Fasheh static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
2026ccd979bdSMark Fasheh {
2027ccd979bdSMark Fasheh 	if (tc->tc_ext_alloc_inode) {
2028ccd979bdSMark Fasheh 		if (tc->tc_ext_alloc_locked)
2029ccd979bdSMark Fasheh 			ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1);
2030ccd979bdSMark Fasheh 
20311b1dcc1bSJes Sorensen 		mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex);
2032ccd979bdSMark Fasheh 		iput(tc->tc_ext_alloc_inode);
2033ccd979bdSMark Fasheh 	}
2034ccd979bdSMark Fasheh 
2035ccd979bdSMark Fasheh 	if (tc->tc_ext_alloc_bh)
2036ccd979bdSMark Fasheh 		brelse(tc->tc_ext_alloc_bh);
2037ccd979bdSMark Fasheh 
2038ccd979bdSMark Fasheh 	if (tc->tc_last_eb_bh)
2039ccd979bdSMark Fasheh 		brelse(tc->tc_last_eb_bh);
2040ccd979bdSMark Fasheh 
2041ccd979bdSMark Fasheh 	kfree(tc);
2042ccd979bdSMark Fasheh }
2043