xref: /openbmc/linux/fs/ocfs2/alloc.c (revision ccd979bdbce9fba8412beb3f1de68a9d0171b12c)
1*ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*-
2*ccd979bdSMark Fasheh  * vim: noexpandtab sw=8 ts=8 sts=0:
3*ccd979bdSMark Fasheh  *
4*ccd979bdSMark Fasheh  * alloc.c
5*ccd979bdSMark Fasheh  *
6*ccd979bdSMark Fasheh  * Extent allocs and frees
7*ccd979bdSMark Fasheh  *
8*ccd979bdSMark Fasheh  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
9*ccd979bdSMark Fasheh  *
10*ccd979bdSMark Fasheh  * This program is free software; you can redistribute it and/or
11*ccd979bdSMark Fasheh  * modify it under the terms of the GNU General Public
12*ccd979bdSMark Fasheh  * License as published by the Free Software Foundation; either
13*ccd979bdSMark Fasheh  * version 2 of the License, or (at your option) any later version.
14*ccd979bdSMark Fasheh  *
15*ccd979bdSMark Fasheh  * This program is distributed in the hope that it will be useful,
16*ccd979bdSMark Fasheh  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*ccd979bdSMark Fasheh  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18*ccd979bdSMark Fasheh  * General Public License for more details.
19*ccd979bdSMark Fasheh  *
20*ccd979bdSMark Fasheh  * You should have received a copy of the GNU General Public
21*ccd979bdSMark Fasheh  * License along with this program; if not, write to the
22*ccd979bdSMark Fasheh  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23*ccd979bdSMark Fasheh  * Boston, MA 021110-1307, USA.
24*ccd979bdSMark Fasheh  */
25*ccd979bdSMark Fasheh 
26*ccd979bdSMark Fasheh #include <linux/fs.h>
27*ccd979bdSMark Fasheh #include <linux/types.h>
28*ccd979bdSMark Fasheh #include <linux/slab.h>
29*ccd979bdSMark Fasheh #include <linux/highmem.h>
30*ccd979bdSMark Fasheh 
31*ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DISK_ALLOC
32*ccd979bdSMark Fasheh #include <cluster/masklog.h>
33*ccd979bdSMark Fasheh 
34*ccd979bdSMark Fasheh #include "ocfs2.h"
35*ccd979bdSMark Fasheh 
36*ccd979bdSMark Fasheh #include "alloc.h"
37*ccd979bdSMark Fasheh #include "dlmglue.h"
38*ccd979bdSMark Fasheh #include "extent_map.h"
39*ccd979bdSMark Fasheh #include "inode.h"
40*ccd979bdSMark Fasheh #include "journal.h"
41*ccd979bdSMark Fasheh #include "localalloc.h"
42*ccd979bdSMark Fasheh #include "suballoc.h"
43*ccd979bdSMark Fasheh #include "sysfile.h"
44*ccd979bdSMark Fasheh #include "file.h"
45*ccd979bdSMark Fasheh #include "super.h"
46*ccd979bdSMark Fasheh #include "uptodate.h"
47*ccd979bdSMark Fasheh 
48*ccd979bdSMark Fasheh #include "buffer_head_io.h"
49*ccd979bdSMark Fasheh 
50*ccd979bdSMark Fasheh static int ocfs2_extent_contig(struct inode *inode,
51*ccd979bdSMark Fasheh 			       struct ocfs2_extent_rec *ext,
52*ccd979bdSMark Fasheh 			       u64 blkno);
53*ccd979bdSMark Fasheh 
54*ccd979bdSMark Fasheh static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
55*ccd979bdSMark Fasheh 				     struct ocfs2_journal_handle *handle,
56*ccd979bdSMark Fasheh 				     struct inode *inode,
57*ccd979bdSMark Fasheh 				     int wanted,
58*ccd979bdSMark Fasheh 				     struct ocfs2_alloc_context *meta_ac,
59*ccd979bdSMark Fasheh 				     struct buffer_head *bhs[]);
60*ccd979bdSMark Fasheh 
61*ccd979bdSMark Fasheh static int ocfs2_add_branch(struct ocfs2_super *osb,
62*ccd979bdSMark Fasheh 			    struct ocfs2_journal_handle *handle,
63*ccd979bdSMark Fasheh 			    struct inode *inode,
64*ccd979bdSMark Fasheh 			    struct buffer_head *fe_bh,
65*ccd979bdSMark Fasheh 			    struct buffer_head *eb_bh,
66*ccd979bdSMark Fasheh 			    struct buffer_head *last_eb_bh,
67*ccd979bdSMark Fasheh 			    struct ocfs2_alloc_context *meta_ac);
68*ccd979bdSMark Fasheh 
69*ccd979bdSMark Fasheh static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
70*ccd979bdSMark Fasheh 				  struct ocfs2_journal_handle *handle,
71*ccd979bdSMark Fasheh 				  struct inode *inode,
72*ccd979bdSMark Fasheh 				  struct buffer_head *fe_bh,
73*ccd979bdSMark Fasheh 				  struct ocfs2_alloc_context *meta_ac,
74*ccd979bdSMark Fasheh 				  struct buffer_head **ret_new_eb_bh);
75*ccd979bdSMark Fasheh 
76*ccd979bdSMark Fasheh static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
77*ccd979bdSMark Fasheh 				  struct ocfs2_journal_handle *handle,
78*ccd979bdSMark Fasheh 				  struct inode *inode,
79*ccd979bdSMark Fasheh 				  struct buffer_head *fe_bh,
80*ccd979bdSMark Fasheh 				  u64 blkno,
81*ccd979bdSMark Fasheh 				  u32 new_clusters);
82*ccd979bdSMark Fasheh 
83*ccd979bdSMark Fasheh static int ocfs2_find_branch_target(struct ocfs2_super *osb,
84*ccd979bdSMark Fasheh 				    struct inode *inode,
85*ccd979bdSMark Fasheh 				    struct buffer_head *fe_bh,
86*ccd979bdSMark Fasheh 				    struct buffer_head **target_bh);
87*ccd979bdSMark Fasheh 
88*ccd979bdSMark Fasheh static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb,
89*ccd979bdSMark Fasheh 				       struct inode *inode,
90*ccd979bdSMark Fasheh 				       struct ocfs2_dinode *fe,
91*ccd979bdSMark Fasheh 				       unsigned int new_i_clusters,
92*ccd979bdSMark Fasheh 				       struct buffer_head *old_last_eb,
93*ccd979bdSMark Fasheh 				       struct buffer_head **new_last_eb);
94*ccd979bdSMark Fasheh 
95*ccd979bdSMark Fasheh static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
96*ccd979bdSMark Fasheh 
97*ccd979bdSMark Fasheh static int ocfs2_extent_contig(struct inode *inode,
98*ccd979bdSMark Fasheh 			       struct ocfs2_extent_rec *ext,
99*ccd979bdSMark Fasheh 			       u64 blkno)
100*ccd979bdSMark Fasheh {
101*ccd979bdSMark Fasheh 	return blkno == (le64_to_cpu(ext->e_blkno) +
102*ccd979bdSMark Fasheh 			 ocfs2_clusters_to_blocks(inode->i_sb,
103*ccd979bdSMark Fasheh 						  le32_to_cpu(ext->e_clusters)));
104*ccd979bdSMark Fasheh }
105*ccd979bdSMark Fasheh 
106*ccd979bdSMark Fasheh /*
107*ccd979bdSMark Fasheh  * How many free extents have we got before we need more meta data?
108*ccd979bdSMark Fasheh  */
109*ccd979bdSMark Fasheh int ocfs2_num_free_extents(struct ocfs2_super *osb,
110*ccd979bdSMark Fasheh 			   struct inode *inode,
111*ccd979bdSMark Fasheh 			   struct ocfs2_dinode *fe)
112*ccd979bdSMark Fasheh {
113*ccd979bdSMark Fasheh 	int retval;
114*ccd979bdSMark Fasheh 	struct ocfs2_extent_list *el;
115*ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
116*ccd979bdSMark Fasheh 	struct buffer_head *eb_bh = NULL;
117*ccd979bdSMark Fasheh 
118*ccd979bdSMark Fasheh 	mlog_entry_void();
119*ccd979bdSMark Fasheh 
120*ccd979bdSMark Fasheh 	if (!OCFS2_IS_VALID_DINODE(fe)) {
121*ccd979bdSMark Fasheh 		OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
122*ccd979bdSMark Fasheh 		retval = -EIO;
123*ccd979bdSMark Fasheh 		goto bail;
124*ccd979bdSMark Fasheh 	}
125*ccd979bdSMark Fasheh 
126*ccd979bdSMark Fasheh 	if (fe->i_last_eb_blk) {
127*ccd979bdSMark Fasheh 		retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
128*ccd979bdSMark Fasheh 					  &eb_bh, OCFS2_BH_CACHED, inode);
129*ccd979bdSMark Fasheh 		if (retval < 0) {
130*ccd979bdSMark Fasheh 			mlog_errno(retval);
131*ccd979bdSMark Fasheh 			goto bail;
132*ccd979bdSMark Fasheh 		}
133*ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
134*ccd979bdSMark Fasheh 		el = &eb->h_list;
135*ccd979bdSMark Fasheh 	} else
136*ccd979bdSMark Fasheh 		el = &fe->id2.i_list;
137*ccd979bdSMark Fasheh 
138*ccd979bdSMark Fasheh 	BUG_ON(el->l_tree_depth != 0);
139*ccd979bdSMark Fasheh 
140*ccd979bdSMark Fasheh 	retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec);
141*ccd979bdSMark Fasheh bail:
142*ccd979bdSMark Fasheh 	if (eb_bh)
143*ccd979bdSMark Fasheh 		brelse(eb_bh);
144*ccd979bdSMark Fasheh 
145*ccd979bdSMark Fasheh 	mlog_exit(retval);
146*ccd979bdSMark Fasheh 	return retval;
147*ccd979bdSMark Fasheh }
148*ccd979bdSMark Fasheh 
149*ccd979bdSMark Fasheh /* expects array to already be allocated
150*ccd979bdSMark Fasheh  *
151*ccd979bdSMark Fasheh  * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and
152*ccd979bdSMark Fasheh  * l_count for you
153*ccd979bdSMark Fasheh  */
154*ccd979bdSMark Fasheh static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
155*ccd979bdSMark Fasheh 				     struct ocfs2_journal_handle *handle,
156*ccd979bdSMark Fasheh 				     struct inode *inode,
157*ccd979bdSMark Fasheh 				     int wanted,
158*ccd979bdSMark Fasheh 				     struct ocfs2_alloc_context *meta_ac,
159*ccd979bdSMark Fasheh 				     struct buffer_head *bhs[])
160*ccd979bdSMark Fasheh {
161*ccd979bdSMark Fasheh 	int count, status, i;
162*ccd979bdSMark Fasheh 	u16 suballoc_bit_start;
163*ccd979bdSMark Fasheh 	u32 num_got;
164*ccd979bdSMark Fasheh 	u64 first_blkno;
165*ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
166*ccd979bdSMark Fasheh 
167*ccd979bdSMark Fasheh 	mlog_entry_void();
168*ccd979bdSMark Fasheh 
169*ccd979bdSMark Fasheh 	count = 0;
170*ccd979bdSMark Fasheh 	while (count < wanted) {
171*ccd979bdSMark Fasheh 		status = ocfs2_claim_metadata(osb,
172*ccd979bdSMark Fasheh 					      handle,
173*ccd979bdSMark Fasheh 					      meta_ac,
174*ccd979bdSMark Fasheh 					      wanted - count,
175*ccd979bdSMark Fasheh 					      &suballoc_bit_start,
176*ccd979bdSMark Fasheh 					      &num_got,
177*ccd979bdSMark Fasheh 					      &first_blkno);
178*ccd979bdSMark Fasheh 		if (status < 0) {
179*ccd979bdSMark Fasheh 			mlog_errno(status);
180*ccd979bdSMark Fasheh 			goto bail;
181*ccd979bdSMark Fasheh 		}
182*ccd979bdSMark Fasheh 
183*ccd979bdSMark Fasheh 		for(i = count;  i < (num_got + count); i++) {
184*ccd979bdSMark Fasheh 			bhs[i] = sb_getblk(osb->sb, first_blkno);
185*ccd979bdSMark Fasheh 			if (bhs[i] == NULL) {
186*ccd979bdSMark Fasheh 				status = -EIO;
187*ccd979bdSMark Fasheh 				mlog_errno(status);
188*ccd979bdSMark Fasheh 				goto bail;
189*ccd979bdSMark Fasheh 			}
190*ccd979bdSMark Fasheh 			ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
191*ccd979bdSMark Fasheh 
192*ccd979bdSMark Fasheh 			status = ocfs2_journal_access(handle, inode, bhs[i],
193*ccd979bdSMark Fasheh 						      OCFS2_JOURNAL_ACCESS_CREATE);
194*ccd979bdSMark Fasheh 			if (status < 0) {
195*ccd979bdSMark Fasheh 				mlog_errno(status);
196*ccd979bdSMark Fasheh 				goto bail;
197*ccd979bdSMark Fasheh 			}
198*ccd979bdSMark Fasheh 
199*ccd979bdSMark Fasheh 			memset(bhs[i]->b_data, 0, osb->sb->s_blocksize);
200*ccd979bdSMark Fasheh 			eb = (struct ocfs2_extent_block *) bhs[i]->b_data;
201*ccd979bdSMark Fasheh 			/* Ok, setup the minimal stuff here. */
202*ccd979bdSMark Fasheh 			strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
203*ccd979bdSMark Fasheh 			eb->h_blkno = cpu_to_le64(first_blkno);
204*ccd979bdSMark Fasheh 			eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
205*ccd979bdSMark Fasheh 
206*ccd979bdSMark Fasheh #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
207*ccd979bdSMark Fasheh 			/* we always use slot zero's suballocator */
208*ccd979bdSMark Fasheh 			eb->h_suballoc_slot = 0;
209*ccd979bdSMark Fasheh #else
210*ccd979bdSMark Fasheh 			eb->h_suballoc_slot = cpu_to_le16(osb->slot_num);
211*ccd979bdSMark Fasheh #endif
212*ccd979bdSMark Fasheh 			eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
213*ccd979bdSMark Fasheh 			eb->h_list.l_count =
214*ccd979bdSMark Fasheh 				cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
215*ccd979bdSMark Fasheh 
216*ccd979bdSMark Fasheh 			suballoc_bit_start++;
217*ccd979bdSMark Fasheh 			first_blkno++;
218*ccd979bdSMark Fasheh 
219*ccd979bdSMark Fasheh 			/* We'll also be dirtied by the caller, so
220*ccd979bdSMark Fasheh 			 * this isn't absolutely necessary. */
221*ccd979bdSMark Fasheh 			status = ocfs2_journal_dirty(handle, bhs[i]);
222*ccd979bdSMark Fasheh 			if (status < 0) {
223*ccd979bdSMark Fasheh 				mlog_errno(status);
224*ccd979bdSMark Fasheh 				goto bail;
225*ccd979bdSMark Fasheh 			}
226*ccd979bdSMark Fasheh 		}
227*ccd979bdSMark Fasheh 
228*ccd979bdSMark Fasheh 		count += num_got;
229*ccd979bdSMark Fasheh 	}
230*ccd979bdSMark Fasheh 
231*ccd979bdSMark Fasheh 	status = 0;
232*ccd979bdSMark Fasheh bail:
233*ccd979bdSMark Fasheh 	if (status < 0) {
234*ccd979bdSMark Fasheh 		for(i = 0; i < wanted; i++) {
235*ccd979bdSMark Fasheh 			if (bhs[i])
236*ccd979bdSMark Fasheh 				brelse(bhs[i]);
237*ccd979bdSMark Fasheh 			bhs[i] = NULL;
238*ccd979bdSMark Fasheh 		}
239*ccd979bdSMark Fasheh 	}
240*ccd979bdSMark Fasheh 	mlog_exit(status);
241*ccd979bdSMark Fasheh 	return status;
242*ccd979bdSMark Fasheh }
243*ccd979bdSMark Fasheh 
244*ccd979bdSMark Fasheh /*
245*ccd979bdSMark Fasheh  * Add an entire tree branch to our inode. eb_bh is the extent block
246*ccd979bdSMark Fasheh  * to start at, if we don't want to start the branch at the dinode
247*ccd979bdSMark Fasheh  * structure.
248*ccd979bdSMark Fasheh  *
249*ccd979bdSMark Fasheh  * last_eb_bh is required as we have to update it's next_leaf pointer
250*ccd979bdSMark Fasheh  * for the new last extent block.
251*ccd979bdSMark Fasheh  *
252*ccd979bdSMark Fasheh  * the new branch will be 'empty' in the sense that every block will
253*ccd979bdSMark Fasheh  * contain a single record with e_clusters == 0.
254*ccd979bdSMark Fasheh  */
255*ccd979bdSMark Fasheh static int ocfs2_add_branch(struct ocfs2_super *osb,
256*ccd979bdSMark Fasheh 			    struct ocfs2_journal_handle *handle,
257*ccd979bdSMark Fasheh 			    struct inode *inode,
258*ccd979bdSMark Fasheh 			    struct buffer_head *fe_bh,
259*ccd979bdSMark Fasheh 			    struct buffer_head *eb_bh,
260*ccd979bdSMark Fasheh 			    struct buffer_head *last_eb_bh,
261*ccd979bdSMark Fasheh 			    struct ocfs2_alloc_context *meta_ac)
262*ccd979bdSMark Fasheh {
263*ccd979bdSMark Fasheh 	int status, new_blocks, i;
264*ccd979bdSMark Fasheh 	u64 next_blkno, new_last_eb_blk;
265*ccd979bdSMark Fasheh 	struct buffer_head *bh;
266*ccd979bdSMark Fasheh 	struct buffer_head **new_eb_bhs = NULL;
267*ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
268*ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
269*ccd979bdSMark Fasheh 	struct ocfs2_extent_list  *eb_el;
270*ccd979bdSMark Fasheh 	struct ocfs2_extent_list  *el;
271*ccd979bdSMark Fasheh 
272*ccd979bdSMark Fasheh 	mlog_entry_void();
273*ccd979bdSMark Fasheh 
274*ccd979bdSMark Fasheh 	BUG_ON(!last_eb_bh);
275*ccd979bdSMark Fasheh 
276*ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
277*ccd979bdSMark Fasheh 
278*ccd979bdSMark Fasheh 	if (eb_bh) {
279*ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
280*ccd979bdSMark Fasheh 		el = &eb->h_list;
281*ccd979bdSMark Fasheh 	} else
282*ccd979bdSMark Fasheh 		el = &fe->id2.i_list;
283*ccd979bdSMark Fasheh 
284*ccd979bdSMark Fasheh 	/* we never add a branch to a leaf. */
285*ccd979bdSMark Fasheh 	BUG_ON(!el->l_tree_depth);
286*ccd979bdSMark Fasheh 
287*ccd979bdSMark Fasheh 	new_blocks = le16_to_cpu(el->l_tree_depth);
288*ccd979bdSMark Fasheh 
289*ccd979bdSMark Fasheh 	/* allocate the number of new eb blocks we need */
290*ccd979bdSMark Fasheh 	new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *),
291*ccd979bdSMark Fasheh 			     GFP_KERNEL);
292*ccd979bdSMark Fasheh 	if (!new_eb_bhs) {
293*ccd979bdSMark Fasheh 		status = -ENOMEM;
294*ccd979bdSMark Fasheh 		mlog_errno(status);
295*ccd979bdSMark Fasheh 		goto bail;
296*ccd979bdSMark Fasheh 	}
297*ccd979bdSMark Fasheh 
298*ccd979bdSMark Fasheh 	status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks,
299*ccd979bdSMark Fasheh 					   meta_ac, new_eb_bhs);
300*ccd979bdSMark Fasheh 	if (status < 0) {
301*ccd979bdSMark Fasheh 		mlog_errno(status);
302*ccd979bdSMark Fasheh 		goto bail;
303*ccd979bdSMark Fasheh 	}
304*ccd979bdSMark Fasheh 
305*ccd979bdSMark Fasheh 	/* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
306*ccd979bdSMark Fasheh 	 * linked with the rest of the tree.
307*ccd979bdSMark Fasheh 	 * conversly, new_eb_bhs[0] is the new bottommost leaf.
308*ccd979bdSMark Fasheh 	 *
309*ccd979bdSMark Fasheh 	 * when we leave the loop, new_last_eb_blk will point to the
310*ccd979bdSMark Fasheh 	 * newest leaf, and next_blkno will point to the topmost extent
311*ccd979bdSMark Fasheh 	 * block. */
312*ccd979bdSMark Fasheh 	next_blkno = new_last_eb_blk = 0;
313*ccd979bdSMark Fasheh 	for(i = 0; i < new_blocks; i++) {
314*ccd979bdSMark Fasheh 		bh = new_eb_bhs[i];
315*ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) bh->b_data;
316*ccd979bdSMark Fasheh 		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
317*ccd979bdSMark Fasheh 			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
318*ccd979bdSMark Fasheh 			status = -EIO;
319*ccd979bdSMark Fasheh 			goto bail;
320*ccd979bdSMark Fasheh 		}
321*ccd979bdSMark Fasheh 		eb_el = &eb->h_list;
322*ccd979bdSMark Fasheh 
323*ccd979bdSMark Fasheh 		status = ocfs2_journal_access(handle, inode, bh,
324*ccd979bdSMark Fasheh 					      OCFS2_JOURNAL_ACCESS_CREATE);
325*ccd979bdSMark Fasheh 		if (status < 0) {
326*ccd979bdSMark Fasheh 			mlog_errno(status);
327*ccd979bdSMark Fasheh 			goto bail;
328*ccd979bdSMark Fasheh 		}
329*ccd979bdSMark Fasheh 
330*ccd979bdSMark Fasheh 		eb->h_next_leaf_blk = 0;
331*ccd979bdSMark Fasheh 		eb_el->l_tree_depth = cpu_to_le16(i);
332*ccd979bdSMark Fasheh 		eb_el->l_next_free_rec = cpu_to_le16(1);
333*ccd979bdSMark Fasheh 		eb_el->l_recs[0].e_cpos = fe->i_clusters;
334*ccd979bdSMark Fasheh 		eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno);
335*ccd979bdSMark Fasheh 		eb_el->l_recs[0].e_clusters = cpu_to_le32(0);
336*ccd979bdSMark Fasheh 		if (!eb_el->l_tree_depth)
337*ccd979bdSMark Fasheh 			new_last_eb_blk = le64_to_cpu(eb->h_blkno);
338*ccd979bdSMark Fasheh 
339*ccd979bdSMark Fasheh 		status = ocfs2_journal_dirty(handle, bh);
340*ccd979bdSMark Fasheh 		if (status < 0) {
341*ccd979bdSMark Fasheh 			mlog_errno(status);
342*ccd979bdSMark Fasheh 			goto bail;
343*ccd979bdSMark Fasheh 		}
344*ccd979bdSMark Fasheh 
345*ccd979bdSMark Fasheh 		next_blkno = le64_to_cpu(eb->h_blkno);
346*ccd979bdSMark Fasheh 	}
347*ccd979bdSMark Fasheh 
348*ccd979bdSMark Fasheh 	/* This is a bit hairy. We want to update up to three blocks
349*ccd979bdSMark Fasheh 	 * here without leaving any of them in an inconsistent state
350*ccd979bdSMark Fasheh 	 * in case of error. We don't have to worry about
351*ccd979bdSMark Fasheh 	 * journal_dirty erroring as it won't unless we've aborted the
352*ccd979bdSMark Fasheh 	 * handle (in which case we would never be here) so reserving
353*ccd979bdSMark Fasheh 	 * the write with journal_access is all we need to do. */
354*ccd979bdSMark Fasheh 	status = ocfs2_journal_access(handle, inode, last_eb_bh,
355*ccd979bdSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
356*ccd979bdSMark Fasheh 	if (status < 0) {
357*ccd979bdSMark Fasheh 		mlog_errno(status);
358*ccd979bdSMark Fasheh 		goto bail;
359*ccd979bdSMark Fasheh 	}
360*ccd979bdSMark Fasheh 	status = ocfs2_journal_access(handle, inode, fe_bh,
361*ccd979bdSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
362*ccd979bdSMark Fasheh 	if (status < 0) {
363*ccd979bdSMark Fasheh 		mlog_errno(status);
364*ccd979bdSMark Fasheh 		goto bail;
365*ccd979bdSMark Fasheh 	}
366*ccd979bdSMark Fasheh 	if (eb_bh) {
367*ccd979bdSMark Fasheh 		status = ocfs2_journal_access(handle, inode, eb_bh,
368*ccd979bdSMark Fasheh 					      OCFS2_JOURNAL_ACCESS_WRITE);
369*ccd979bdSMark Fasheh 		if (status < 0) {
370*ccd979bdSMark Fasheh 			mlog_errno(status);
371*ccd979bdSMark Fasheh 			goto bail;
372*ccd979bdSMark Fasheh 		}
373*ccd979bdSMark Fasheh 	}
374*ccd979bdSMark Fasheh 
375*ccd979bdSMark Fasheh 	/* Link the new branch into the rest of the tree (el will
376*ccd979bdSMark Fasheh 	 * either be on the fe, or the extent block passed in. */
377*ccd979bdSMark Fasheh 	i = le16_to_cpu(el->l_next_free_rec);
378*ccd979bdSMark Fasheh 	el->l_recs[i].e_blkno = cpu_to_le64(next_blkno);
379*ccd979bdSMark Fasheh 	el->l_recs[i].e_cpos = fe->i_clusters;
380*ccd979bdSMark Fasheh 	el->l_recs[i].e_clusters = 0;
381*ccd979bdSMark Fasheh 	le16_add_cpu(&el->l_next_free_rec, 1);
382*ccd979bdSMark Fasheh 
383*ccd979bdSMark Fasheh 	/* fe needs a new last extent block pointer, as does the
384*ccd979bdSMark Fasheh 	 * next_leaf on the previously last-extent-block. */
385*ccd979bdSMark Fasheh 	fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk);
386*ccd979bdSMark Fasheh 
387*ccd979bdSMark Fasheh 	eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
388*ccd979bdSMark Fasheh 	eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
389*ccd979bdSMark Fasheh 
390*ccd979bdSMark Fasheh 	status = ocfs2_journal_dirty(handle, last_eb_bh);
391*ccd979bdSMark Fasheh 	if (status < 0)
392*ccd979bdSMark Fasheh 		mlog_errno(status);
393*ccd979bdSMark Fasheh 	status = ocfs2_journal_dirty(handle, fe_bh);
394*ccd979bdSMark Fasheh 	if (status < 0)
395*ccd979bdSMark Fasheh 		mlog_errno(status);
396*ccd979bdSMark Fasheh 	if (eb_bh) {
397*ccd979bdSMark Fasheh 		status = ocfs2_journal_dirty(handle, eb_bh);
398*ccd979bdSMark Fasheh 		if (status < 0)
399*ccd979bdSMark Fasheh 			mlog_errno(status);
400*ccd979bdSMark Fasheh 	}
401*ccd979bdSMark Fasheh 
402*ccd979bdSMark Fasheh 	status = 0;
403*ccd979bdSMark Fasheh bail:
404*ccd979bdSMark Fasheh 	if (new_eb_bhs) {
405*ccd979bdSMark Fasheh 		for (i = 0; i < new_blocks; i++)
406*ccd979bdSMark Fasheh 			if (new_eb_bhs[i])
407*ccd979bdSMark Fasheh 				brelse(new_eb_bhs[i]);
408*ccd979bdSMark Fasheh 		kfree(new_eb_bhs);
409*ccd979bdSMark Fasheh 	}
410*ccd979bdSMark Fasheh 
411*ccd979bdSMark Fasheh 	mlog_exit(status);
412*ccd979bdSMark Fasheh 	return status;
413*ccd979bdSMark Fasheh }
414*ccd979bdSMark Fasheh 
415*ccd979bdSMark Fasheh /*
416*ccd979bdSMark Fasheh  * adds another level to the allocation tree.
417*ccd979bdSMark Fasheh  * returns back the new extent block so you can add a branch to it
418*ccd979bdSMark Fasheh  * after this call.
419*ccd979bdSMark Fasheh  */
420*ccd979bdSMark Fasheh static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
421*ccd979bdSMark Fasheh 				  struct ocfs2_journal_handle *handle,
422*ccd979bdSMark Fasheh 				  struct inode *inode,
423*ccd979bdSMark Fasheh 				  struct buffer_head *fe_bh,
424*ccd979bdSMark Fasheh 				  struct ocfs2_alloc_context *meta_ac,
425*ccd979bdSMark Fasheh 				  struct buffer_head **ret_new_eb_bh)
426*ccd979bdSMark Fasheh {
427*ccd979bdSMark Fasheh 	int status, i;
428*ccd979bdSMark Fasheh 	struct buffer_head *new_eb_bh = NULL;
429*ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
430*ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
431*ccd979bdSMark Fasheh 	struct ocfs2_extent_list  *fe_el;
432*ccd979bdSMark Fasheh 	struct ocfs2_extent_list  *eb_el;
433*ccd979bdSMark Fasheh 
434*ccd979bdSMark Fasheh 	mlog_entry_void();
435*ccd979bdSMark Fasheh 
436*ccd979bdSMark Fasheh 	status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac,
437*ccd979bdSMark Fasheh 					   &new_eb_bh);
438*ccd979bdSMark Fasheh 	if (status < 0) {
439*ccd979bdSMark Fasheh 		mlog_errno(status);
440*ccd979bdSMark Fasheh 		goto bail;
441*ccd979bdSMark Fasheh 	}
442*ccd979bdSMark Fasheh 
443*ccd979bdSMark Fasheh 	eb = (struct ocfs2_extent_block *) new_eb_bh->b_data;
444*ccd979bdSMark Fasheh 	if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
445*ccd979bdSMark Fasheh 		OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
446*ccd979bdSMark Fasheh 		status = -EIO;
447*ccd979bdSMark Fasheh 		goto bail;
448*ccd979bdSMark Fasheh 	}
449*ccd979bdSMark Fasheh 
450*ccd979bdSMark Fasheh 	eb_el = &eb->h_list;
451*ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
452*ccd979bdSMark Fasheh 	fe_el = &fe->id2.i_list;
453*ccd979bdSMark Fasheh 
454*ccd979bdSMark Fasheh 	status = ocfs2_journal_access(handle, inode, new_eb_bh,
455*ccd979bdSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_CREATE);
456*ccd979bdSMark Fasheh 	if (status < 0) {
457*ccd979bdSMark Fasheh 		mlog_errno(status);
458*ccd979bdSMark Fasheh 		goto bail;
459*ccd979bdSMark Fasheh 	}
460*ccd979bdSMark Fasheh 
461*ccd979bdSMark Fasheh 	/* copy the fe data into the new extent block */
462*ccd979bdSMark Fasheh 	eb_el->l_tree_depth = fe_el->l_tree_depth;
463*ccd979bdSMark Fasheh 	eb_el->l_next_free_rec = fe_el->l_next_free_rec;
464*ccd979bdSMark Fasheh 	for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) {
465*ccd979bdSMark Fasheh 		eb_el->l_recs[i].e_cpos = fe_el->l_recs[i].e_cpos;
466*ccd979bdSMark Fasheh 		eb_el->l_recs[i].e_clusters = fe_el->l_recs[i].e_clusters;
467*ccd979bdSMark Fasheh 		eb_el->l_recs[i].e_blkno = fe_el->l_recs[i].e_blkno;
468*ccd979bdSMark Fasheh 	}
469*ccd979bdSMark Fasheh 
470*ccd979bdSMark Fasheh 	status = ocfs2_journal_dirty(handle, new_eb_bh);
471*ccd979bdSMark Fasheh 	if (status < 0) {
472*ccd979bdSMark Fasheh 		mlog_errno(status);
473*ccd979bdSMark Fasheh 		goto bail;
474*ccd979bdSMark Fasheh 	}
475*ccd979bdSMark Fasheh 
476*ccd979bdSMark Fasheh 	status = ocfs2_journal_access(handle, inode, fe_bh,
477*ccd979bdSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
478*ccd979bdSMark Fasheh 	if (status < 0) {
479*ccd979bdSMark Fasheh 		mlog_errno(status);
480*ccd979bdSMark Fasheh 		goto bail;
481*ccd979bdSMark Fasheh 	}
482*ccd979bdSMark Fasheh 
483*ccd979bdSMark Fasheh 	/* update fe now */
484*ccd979bdSMark Fasheh 	le16_add_cpu(&fe_el->l_tree_depth, 1);
485*ccd979bdSMark Fasheh 	fe_el->l_recs[0].e_cpos = 0;
486*ccd979bdSMark Fasheh 	fe_el->l_recs[0].e_blkno = eb->h_blkno;
487*ccd979bdSMark Fasheh 	fe_el->l_recs[0].e_clusters = fe->i_clusters;
488*ccd979bdSMark Fasheh 	for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) {
489*ccd979bdSMark Fasheh 		fe_el->l_recs[i].e_cpos = 0;
490*ccd979bdSMark Fasheh 		fe_el->l_recs[i].e_clusters = 0;
491*ccd979bdSMark Fasheh 		fe_el->l_recs[i].e_blkno = 0;
492*ccd979bdSMark Fasheh 	}
493*ccd979bdSMark Fasheh 	fe_el->l_next_free_rec = cpu_to_le16(1);
494*ccd979bdSMark Fasheh 
495*ccd979bdSMark Fasheh 	/* If this is our 1st tree depth shift, then last_eb_blk
496*ccd979bdSMark Fasheh 	 * becomes the allocated extent block */
497*ccd979bdSMark Fasheh 	if (fe_el->l_tree_depth == cpu_to_le16(1))
498*ccd979bdSMark Fasheh 		fe->i_last_eb_blk = eb->h_blkno;
499*ccd979bdSMark Fasheh 
500*ccd979bdSMark Fasheh 	status = ocfs2_journal_dirty(handle, fe_bh);
501*ccd979bdSMark Fasheh 	if (status < 0) {
502*ccd979bdSMark Fasheh 		mlog_errno(status);
503*ccd979bdSMark Fasheh 		goto bail;
504*ccd979bdSMark Fasheh 	}
505*ccd979bdSMark Fasheh 
506*ccd979bdSMark Fasheh 	*ret_new_eb_bh = new_eb_bh;
507*ccd979bdSMark Fasheh 	new_eb_bh = NULL;
508*ccd979bdSMark Fasheh 	status = 0;
509*ccd979bdSMark Fasheh bail:
510*ccd979bdSMark Fasheh 	if (new_eb_bh)
511*ccd979bdSMark Fasheh 		brelse(new_eb_bh);
512*ccd979bdSMark Fasheh 
513*ccd979bdSMark Fasheh 	mlog_exit(status);
514*ccd979bdSMark Fasheh 	return status;
515*ccd979bdSMark Fasheh }
516*ccd979bdSMark Fasheh 
517*ccd979bdSMark Fasheh /*
518*ccd979bdSMark Fasheh  * Expects the tree to already have room in the rightmost leaf for the
519*ccd979bdSMark Fasheh  * extent.  Updates all the extent blocks (and the dinode) on the way
520*ccd979bdSMark Fasheh  * down.
521*ccd979bdSMark Fasheh  */
522*ccd979bdSMark Fasheh static int ocfs2_do_insert_extent(struct ocfs2_super *osb,
523*ccd979bdSMark Fasheh 				  struct ocfs2_journal_handle *handle,
524*ccd979bdSMark Fasheh 				  struct inode *inode,
525*ccd979bdSMark Fasheh 				  struct buffer_head *fe_bh,
526*ccd979bdSMark Fasheh 				  u64 start_blk,
527*ccd979bdSMark Fasheh 				  u32 new_clusters)
528*ccd979bdSMark Fasheh {
529*ccd979bdSMark Fasheh 	int status, i, num_bhs = 0;
530*ccd979bdSMark Fasheh 	u64 next_blkno;
531*ccd979bdSMark Fasheh 	u16 next_free;
532*ccd979bdSMark Fasheh 	struct buffer_head **eb_bhs = NULL;
533*ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
534*ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
535*ccd979bdSMark Fasheh 	struct ocfs2_extent_list  *el;
536*ccd979bdSMark Fasheh 
537*ccd979bdSMark Fasheh 	mlog_entry_void();
538*ccd979bdSMark Fasheh 
539*ccd979bdSMark Fasheh 	status = ocfs2_journal_access(handle, inode, fe_bh,
540*ccd979bdSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
541*ccd979bdSMark Fasheh 	if (status < 0) {
542*ccd979bdSMark Fasheh 		mlog_errno(status);
543*ccd979bdSMark Fasheh 		goto bail;
544*ccd979bdSMark Fasheh 	}
545*ccd979bdSMark Fasheh 
546*ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
547*ccd979bdSMark Fasheh 	el = &fe->id2.i_list;
548*ccd979bdSMark Fasheh 	if (el->l_tree_depth) {
549*ccd979bdSMark Fasheh 		/* This is another operation where we want to be
550*ccd979bdSMark Fasheh 		 * careful about our tree updates. An error here means
551*ccd979bdSMark Fasheh 		 * none of the previous changes we made should roll
552*ccd979bdSMark Fasheh 		 * forward. As a result, we have to record the buffers
553*ccd979bdSMark Fasheh 		 * for this part of the tree in an array and reserve a
554*ccd979bdSMark Fasheh 		 * journal write to them before making any changes. */
555*ccd979bdSMark Fasheh 		num_bhs = le16_to_cpu(fe->id2.i_list.l_tree_depth);
556*ccd979bdSMark Fasheh 		eb_bhs = kcalloc(num_bhs, sizeof(struct buffer_head *),
557*ccd979bdSMark Fasheh 				 GFP_KERNEL);
558*ccd979bdSMark Fasheh 		if (!eb_bhs) {
559*ccd979bdSMark Fasheh 			status = -ENOMEM;
560*ccd979bdSMark Fasheh 			mlog_errno(status);
561*ccd979bdSMark Fasheh 			goto bail;
562*ccd979bdSMark Fasheh 		}
563*ccd979bdSMark Fasheh 
564*ccd979bdSMark Fasheh 		i = 0;
565*ccd979bdSMark Fasheh 		while(el->l_tree_depth) {
566*ccd979bdSMark Fasheh 			next_free = le16_to_cpu(el->l_next_free_rec);
567*ccd979bdSMark Fasheh 			if (next_free == 0) {
568*ccd979bdSMark Fasheh 				ocfs2_error(inode->i_sb,
569*ccd979bdSMark Fasheh 					    "Dinode %"MLFu64" has a bad "
570*ccd979bdSMark Fasheh 					    "extent list",
571*ccd979bdSMark Fasheh 					    OCFS2_I(inode)->ip_blkno);
572*ccd979bdSMark Fasheh 				status = -EIO;
573*ccd979bdSMark Fasheh 				goto bail;
574*ccd979bdSMark Fasheh 			}
575*ccd979bdSMark Fasheh 			next_blkno = le64_to_cpu(el->l_recs[next_free - 1].e_blkno);
576*ccd979bdSMark Fasheh 
577*ccd979bdSMark Fasheh 			BUG_ON(i >= num_bhs);
578*ccd979bdSMark Fasheh 			status = ocfs2_read_block(osb, next_blkno, &eb_bhs[i],
579*ccd979bdSMark Fasheh 						  OCFS2_BH_CACHED, inode);
580*ccd979bdSMark Fasheh 			if (status < 0) {
581*ccd979bdSMark Fasheh 				mlog_errno(status);
582*ccd979bdSMark Fasheh 				goto bail;
583*ccd979bdSMark Fasheh 			}
584*ccd979bdSMark Fasheh 			eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data;
585*ccd979bdSMark Fasheh 			if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
586*ccd979bdSMark Fasheh 				OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
587*ccd979bdSMark Fasheh 								 eb);
588*ccd979bdSMark Fasheh 				status = -EIO;
589*ccd979bdSMark Fasheh 				goto bail;
590*ccd979bdSMark Fasheh 			}
591*ccd979bdSMark Fasheh 
592*ccd979bdSMark Fasheh 			status = ocfs2_journal_access(handle, inode, eb_bhs[i],
593*ccd979bdSMark Fasheh 						      OCFS2_JOURNAL_ACCESS_WRITE);
594*ccd979bdSMark Fasheh 			if (status < 0) {
595*ccd979bdSMark Fasheh 				mlog_errno(status);
596*ccd979bdSMark Fasheh 				goto bail;
597*ccd979bdSMark Fasheh 			}
598*ccd979bdSMark Fasheh 
599*ccd979bdSMark Fasheh 			el = &eb->h_list;
600*ccd979bdSMark Fasheh 			i++;
601*ccd979bdSMark Fasheh 			/* When we leave this loop, eb_bhs[num_bhs - 1] will
602*ccd979bdSMark Fasheh 			 * hold the bottom-most leaf extent block. */
603*ccd979bdSMark Fasheh 		}
604*ccd979bdSMark Fasheh 		BUG_ON(el->l_tree_depth);
605*ccd979bdSMark Fasheh 
606*ccd979bdSMark Fasheh 		el = &fe->id2.i_list;
607*ccd979bdSMark Fasheh 		/* If we have tree depth, then the fe update is
608*ccd979bdSMark Fasheh 		 * trivial, and we want to switch el out for the
609*ccd979bdSMark Fasheh 		 * bottom-most leaf in order to update it with the
610*ccd979bdSMark Fasheh 		 * actual extent data below. */
611*ccd979bdSMark Fasheh 		next_free = le16_to_cpu(el->l_next_free_rec);
612*ccd979bdSMark Fasheh 		if (next_free == 0) {
613*ccd979bdSMark Fasheh 			ocfs2_error(inode->i_sb,
614*ccd979bdSMark Fasheh 				    "Dinode %"MLFu64" has a bad "
615*ccd979bdSMark Fasheh 				    "extent list",
616*ccd979bdSMark Fasheh 				    OCFS2_I(inode)->ip_blkno);
617*ccd979bdSMark Fasheh 			status = -EIO;
618*ccd979bdSMark Fasheh 			goto bail;
619*ccd979bdSMark Fasheh 		}
620*ccd979bdSMark Fasheh 		le32_add_cpu(&el->l_recs[next_free - 1].e_clusters,
621*ccd979bdSMark Fasheh 			     new_clusters);
622*ccd979bdSMark Fasheh 		/* (num_bhs - 1) to avoid the leaf */
623*ccd979bdSMark Fasheh 		for(i = 0; i < (num_bhs - 1); i++) {
624*ccd979bdSMark Fasheh 			eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data;
625*ccd979bdSMark Fasheh 			el = &eb->h_list;
626*ccd979bdSMark Fasheh 
627*ccd979bdSMark Fasheh 			/* finally, make our actual change to the
628*ccd979bdSMark Fasheh 			 * intermediate extent blocks. */
629*ccd979bdSMark Fasheh 			next_free = le16_to_cpu(el->l_next_free_rec);
630*ccd979bdSMark Fasheh 			le32_add_cpu(&el->l_recs[next_free - 1].e_clusters,
631*ccd979bdSMark Fasheh 				     new_clusters);
632*ccd979bdSMark Fasheh 
633*ccd979bdSMark Fasheh 			status = ocfs2_journal_dirty(handle, eb_bhs[i]);
634*ccd979bdSMark Fasheh 			if (status < 0)
635*ccd979bdSMark Fasheh 				mlog_errno(status);
636*ccd979bdSMark Fasheh 		}
637*ccd979bdSMark Fasheh 		BUG_ON(i != (num_bhs - 1));
638*ccd979bdSMark Fasheh 		/* note that the leaf block wasn't touched in
639*ccd979bdSMark Fasheh 		 * the loop above */
640*ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) eb_bhs[num_bhs - 1]->b_data;
641*ccd979bdSMark Fasheh 		el = &eb->h_list;
642*ccd979bdSMark Fasheh 		BUG_ON(el->l_tree_depth);
643*ccd979bdSMark Fasheh 	}
644*ccd979bdSMark Fasheh 
645*ccd979bdSMark Fasheh 	/* yay, we can finally add the actual extent now! */
646*ccd979bdSMark Fasheh 	i = le16_to_cpu(el->l_next_free_rec) - 1;
647*ccd979bdSMark Fasheh 	if (le16_to_cpu(el->l_next_free_rec) &&
648*ccd979bdSMark Fasheh 	    ocfs2_extent_contig(inode, &el->l_recs[i], start_blk)) {
649*ccd979bdSMark Fasheh 		le32_add_cpu(&el->l_recs[i].e_clusters, new_clusters);
650*ccd979bdSMark Fasheh 	} else if (le16_to_cpu(el->l_next_free_rec) &&
651*ccd979bdSMark Fasheh 		   (le32_to_cpu(el->l_recs[i].e_clusters) == 0)) {
652*ccd979bdSMark Fasheh 		/* having an empty extent at eof is legal. */
653*ccd979bdSMark Fasheh 		if (el->l_recs[i].e_cpos != fe->i_clusters) {
654*ccd979bdSMark Fasheh 			ocfs2_error(inode->i_sb,
655*ccd979bdSMark Fasheh 				    "Dinode %"MLFu64" trailing extent is bad: "
656*ccd979bdSMark Fasheh 				    "cpos (%u) != number of clusters (%u)",
657*ccd979bdSMark Fasheh 				    le32_to_cpu(el->l_recs[i].e_cpos),
658*ccd979bdSMark Fasheh 				    le32_to_cpu(fe->i_clusters));
659*ccd979bdSMark Fasheh 			status = -EIO;
660*ccd979bdSMark Fasheh 			goto bail;
661*ccd979bdSMark Fasheh 		}
662*ccd979bdSMark Fasheh 		el->l_recs[i].e_blkno = cpu_to_le64(start_blk);
663*ccd979bdSMark Fasheh 		el->l_recs[i].e_clusters = cpu_to_le32(new_clusters);
664*ccd979bdSMark Fasheh 	} else {
665*ccd979bdSMark Fasheh 		/* No contiguous record, or no empty record at eof, so
666*ccd979bdSMark Fasheh 		 * we add a new one. */
667*ccd979bdSMark Fasheh 
668*ccd979bdSMark Fasheh 		BUG_ON(le16_to_cpu(el->l_next_free_rec) >=
669*ccd979bdSMark Fasheh 		       le16_to_cpu(el->l_count));
670*ccd979bdSMark Fasheh 		i = le16_to_cpu(el->l_next_free_rec);
671*ccd979bdSMark Fasheh 
672*ccd979bdSMark Fasheh 		el->l_recs[i].e_blkno = cpu_to_le64(start_blk);
673*ccd979bdSMark Fasheh 		el->l_recs[i].e_clusters = cpu_to_le32(new_clusters);
674*ccd979bdSMark Fasheh 		el->l_recs[i].e_cpos = fe->i_clusters;
675*ccd979bdSMark Fasheh 		le16_add_cpu(&el->l_next_free_rec, 1);
676*ccd979bdSMark Fasheh 	}
677*ccd979bdSMark Fasheh 
678*ccd979bdSMark Fasheh 	/*
679*ccd979bdSMark Fasheh 	 * extent_map errors are not fatal, so they are ignored outside
680*ccd979bdSMark Fasheh 	 * of flushing the thing.
681*ccd979bdSMark Fasheh 	 */
682*ccd979bdSMark Fasheh 	status = ocfs2_extent_map_append(inode, &el->l_recs[i],
683*ccd979bdSMark Fasheh 					 new_clusters);
684*ccd979bdSMark Fasheh 	if (status) {
685*ccd979bdSMark Fasheh 		mlog_errno(status);
686*ccd979bdSMark Fasheh 		ocfs2_extent_map_drop(inode, le32_to_cpu(fe->i_clusters));
687*ccd979bdSMark Fasheh 	}
688*ccd979bdSMark Fasheh 
689*ccd979bdSMark Fasheh 	status = ocfs2_journal_dirty(handle, fe_bh);
690*ccd979bdSMark Fasheh 	if (status < 0)
691*ccd979bdSMark Fasheh 		mlog_errno(status);
692*ccd979bdSMark Fasheh 	if (fe->id2.i_list.l_tree_depth) {
693*ccd979bdSMark Fasheh 		status = ocfs2_journal_dirty(handle, eb_bhs[num_bhs - 1]);
694*ccd979bdSMark Fasheh 		if (status < 0)
695*ccd979bdSMark Fasheh 			mlog_errno(status);
696*ccd979bdSMark Fasheh 	}
697*ccd979bdSMark Fasheh 
698*ccd979bdSMark Fasheh 	status = 0;
699*ccd979bdSMark Fasheh bail:
700*ccd979bdSMark Fasheh 	if (eb_bhs) {
701*ccd979bdSMark Fasheh 		for (i = 0; i < num_bhs; i++)
702*ccd979bdSMark Fasheh 			if (eb_bhs[i])
703*ccd979bdSMark Fasheh 				brelse(eb_bhs[i]);
704*ccd979bdSMark Fasheh 		kfree(eb_bhs);
705*ccd979bdSMark Fasheh 	}
706*ccd979bdSMark Fasheh 
707*ccd979bdSMark Fasheh 	mlog_exit(status);
708*ccd979bdSMark Fasheh 	return status;
709*ccd979bdSMark Fasheh }
710*ccd979bdSMark Fasheh 
711*ccd979bdSMark Fasheh /*
712*ccd979bdSMark Fasheh  * Should only be called when there is no space left in any of the
713*ccd979bdSMark Fasheh  * leaf nodes. What we want to do is find the lowest tree depth
714*ccd979bdSMark Fasheh  * non-leaf extent block with room for new records. There are three
715*ccd979bdSMark Fasheh  * valid results of this search:
716*ccd979bdSMark Fasheh  *
717*ccd979bdSMark Fasheh  * 1) a lowest extent block is found, then we pass it back in
718*ccd979bdSMark Fasheh  *    *lowest_eb_bh and return '0'
719*ccd979bdSMark Fasheh  *
720*ccd979bdSMark Fasheh  * 2) the search fails to find anything, but the dinode has room. We
721*ccd979bdSMark Fasheh  *    pass NULL back in *lowest_eb_bh, but still return '0'
722*ccd979bdSMark Fasheh  *
723*ccd979bdSMark Fasheh  * 3) the search fails to find anything AND the dinode is full, in
724*ccd979bdSMark Fasheh  *    which case we return > 0
725*ccd979bdSMark Fasheh  *
726*ccd979bdSMark Fasheh  * return status < 0 indicates an error.
727*ccd979bdSMark Fasheh  */
728*ccd979bdSMark Fasheh static int ocfs2_find_branch_target(struct ocfs2_super *osb,
729*ccd979bdSMark Fasheh 				    struct inode *inode,
730*ccd979bdSMark Fasheh 				    struct buffer_head *fe_bh,
731*ccd979bdSMark Fasheh 				    struct buffer_head **target_bh)
732*ccd979bdSMark Fasheh {
733*ccd979bdSMark Fasheh 	int status = 0, i;
734*ccd979bdSMark Fasheh 	u64 blkno;
735*ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
736*ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
737*ccd979bdSMark Fasheh 	struct ocfs2_extent_list  *el;
738*ccd979bdSMark Fasheh 	struct buffer_head *bh = NULL;
739*ccd979bdSMark Fasheh 	struct buffer_head *lowest_bh = NULL;
740*ccd979bdSMark Fasheh 
741*ccd979bdSMark Fasheh 	mlog_entry_void();
742*ccd979bdSMark Fasheh 
743*ccd979bdSMark Fasheh 	*target_bh = NULL;
744*ccd979bdSMark Fasheh 
745*ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
746*ccd979bdSMark Fasheh 	el = &fe->id2.i_list;
747*ccd979bdSMark Fasheh 
748*ccd979bdSMark Fasheh 	while(le16_to_cpu(el->l_tree_depth) > 1) {
749*ccd979bdSMark Fasheh 		if (le16_to_cpu(el->l_next_free_rec) == 0) {
750*ccd979bdSMark Fasheh 			ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has empty "
751*ccd979bdSMark Fasheh 				    "extent list (next_free_rec == 0)",
752*ccd979bdSMark Fasheh 				    OCFS2_I(inode)->ip_blkno);
753*ccd979bdSMark Fasheh 			status = -EIO;
754*ccd979bdSMark Fasheh 			goto bail;
755*ccd979bdSMark Fasheh 		}
756*ccd979bdSMark Fasheh 		i = le16_to_cpu(el->l_next_free_rec) - 1;
757*ccd979bdSMark Fasheh 		blkno = le64_to_cpu(el->l_recs[i].e_blkno);
758*ccd979bdSMark Fasheh 		if (!blkno) {
759*ccd979bdSMark Fasheh 			ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has extent "
760*ccd979bdSMark Fasheh 				    "list where extent # %d has no physical "
761*ccd979bdSMark Fasheh 				    "block start",
762*ccd979bdSMark Fasheh 				    OCFS2_I(inode)->ip_blkno, i);
763*ccd979bdSMark Fasheh 			status = -EIO;
764*ccd979bdSMark Fasheh 			goto bail;
765*ccd979bdSMark Fasheh 		}
766*ccd979bdSMark Fasheh 
767*ccd979bdSMark Fasheh 		if (bh) {
768*ccd979bdSMark Fasheh 			brelse(bh);
769*ccd979bdSMark Fasheh 			bh = NULL;
770*ccd979bdSMark Fasheh 		}
771*ccd979bdSMark Fasheh 
772*ccd979bdSMark Fasheh 		status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED,
773*ccd979bdSMark Fasheh 					  inode);
774*ccd979bdSMark Fasheh 		if (status < 0) {
775*ccd979bdSMark Fasheh 			mlog_errno(status);
776*ccd979bdSMark Fasheh 			goto bail;
777*ccd979bdSMark Fasheh 		}
778*ccd979bdSMark Fasheh 
779*ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) bh->b_data;
780*ccd979bdSMark Fasheh 		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
781*ccd979bdSMark Fasheh 			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
782*ccd979bdSMark Fasheh 			status = -EIO;
783*ccd979bdSMark Fasheh 			goto bail;
784*ccd979bdSMark Fasheh 		}
785*ccd979bdSMark Fasheh 		el = &eb->h_list;
786*ccd979bdSMark Fasheh 
787*ccd979bdSMark Fasheh 		if (le16_to_cpu(el->l_next_free_rec) <
788*ccd979bdSMark Fasheh 		    le16_to_cpu(el->l_count)) {
789*ccd979bdSMark Fasheh 			if (lowest_bh)
790*ccd979bdSMark Fasheh 				brelse(lowest_bh);
791*ccd979bdSMark Fasheh 			lowest_bh = bh;
792*ccd979bdSMark Fasheh 			get_bh(lowest_bh);
793*ccd979bdSMark Fasheh 		}
794*ccd979bdSMark Fasheh 	}
795*ccd979bdSMark Fasheh 
796*ccd979bdSMark Fasheh 	/* If we didn't find one and the fe doesn't have any room,
797*ccd979bdSMark Fasheh 	 * then return '1' */
798*ccd979bdSMark Fasheh 	if (!lowest_bh
799*ccd979bdSMark Fasheh 	    && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count))
800*ccd979bdSMark Fasheh 		status = 1;
801*ccd979bdSMark Fasheh 
802*ccd979bdSMark Fasheh 	*target_bh = lowest_bh;
803*ccd979bdSMark Fasheh bail:
804*ccd979bdSMark Fasheh 	if (bh)
805*ccd979bdSMark Fasheh 		brelse(bh);
806*ccd979bdSMark Fasheh 
807*ccd979bdSMark Fasheh 	mlog_exit(status);
808*ccd979bdSMark Fasheh 	return status;
809*ccd979bdSMark Fasheh }
810*ccd979bdSMark Fasheh 
811*ccd979bdSMark Fasheh /* the caller needs to update fe->i_clusters */
812*ccd979bdSMark Fasheh int ocfs2_insert_extent(struct ocfs2_super *osb,
813*ccd979bdSMark Fasheh 			struct ocfs2_journal_handle *handle,
814*ccd979bdSMark Fasheh 			struct inode *inode,
815*ccd979bdSMark Fasheh 			struct buffer_head *fe_bh,
816*ccd979bdSMark Fasheh 			u64 start_blk,
817*ccd979bdSMark Fasheh 			u32 new_clusters,
818*ccd979bdSMark Fasheh 			struct ocfs2_alloc_context *meta_ac)
819*ccd979bdSMark Fasheh {
820*ccd979bdSMark Fasheh 	int status, i, shift;
821*ccd979bdSMark Fasheh 	struct buffer_head *last_eb_bh = NULL;
822*ccd979bdSMark Fasheh 	struct buffer_head *bh = NULL;
823*ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
824*ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
825*ccd979bdSMark Fasheh 	struct ocfs2_extent_list  *el;
826*ccd979bdSMark Fasheh 
827*ccd979bdSMark Fasheh 	mlog_entry_void();
828*ccd979bdSMark Fasheh 
829*ccd979bdSMark Fasheh 	mlog(0, "add %u clusters starting at block %"MLFu64" to "
830*ccd979bdSMark Fasheh 		"inode %"MLFu64"\n",
831*ccd979bdSMark Fasheh 	     new_clusters, start_blk, OCFS2_I(inode)->ip_blkno);
832*ccd979bdSMark Fasheh 
833*ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
834*ccd979bdSMark Fasheh 	el = &fe->id2.i_list;
835*ccd979bdSMark Fasheh 
836*ccd979bdSMark Fasheh 	if (el->l_tree_depth) {
837*ccd979bdSMark Fasheh 		/* jump to end of tree */
838*ccd979bdSMark Fasheh 		status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
839*ccd979bdSMark Fasheh 					  &last_eb_bh, OCFS2_BH_CACHED, inode);
840*ccd979bdSMark Fasheh 		if (status < 0) {
841*ccd979bdSMark Fasheh 			mlog_exit(status);
842*ccd979bdSMark Fasheh 			goto bail;
843*ccd979bdSMark Fasheh 		}
844*ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
845*ccd979bdSMark Fasheh 		el = &eb->h_list;
846*ccd979bdSMark Fasheh 	}
847*ccd979bdSMark Fasheh 
848*ccd979bdSMark Fasheh 	/* Can we allocate without adding/shifting tree bits? */
849*ccd979bdSMark Fasheh 	i = le16_to_cpu(el->l_next_free_rec) - 1;
850*ccd979bdSMark Fasheh 	if (le16_to_cpu(el->l_next_free_rec) == 0
851*ccd979bdSMark Fasheh 	    || (le16_to_cpu(el->l_next_free_rec) < le16_to_cpu(el->l_count))
852*ccd979bdSMark Fasheh 	    || le32_to_cpu(el->l_recs[i].e_clusters) == 0
853*ccd979bdSMark Fasheh 	    || ocfs2_extent_contig(inode, &el->l_recs[i], start_blk))
854*ccd979bdSMark Fasheh 		goto out_add;
855*ccd979bdSMark Fasheh 
856*ccd979bdSMark Fasheh 	mlog(0, "ocfs2_allocate_extent: couldn't do a simple add, traversing "
857*ccd979bdSMark Fasheh 	     "tree now.\n");
858*ccd979bdSMark Fasheh 
859*ccd979bdSMark Fasheh 	shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh);
860*ccd979bdSMark Fasheh 	if (shift < 0) {
861*ccd979bdSMark Fasheh 		status = shift;
862*ccd979bdSMark Fasheh 		mlog_errno(status);
863*ccd979bdSMark Fasheh 		goto bail;
864*ccd979bdSMark Fasheh 	}
865*ccd979bdSMark Fasheh 
866*ccd979bdSMark Fasheh 	/* We traveled all the way to the bottom of the allocation tree
867*ccd979bdSMark Fasheh 	 * and didn't find room for any more extents - we need to add
868*ccd979bdSMark Fasheh 	 * another tree level */
869*ccd979bdSMark Fasheh 	if (shift) {
870*ccd979bdSMark Fasheh 		/* if we hit a leaf, we'd better be empty :) */
871*ccd979bdSMark Fasheh 		BUG_ON(le16_to_cpu(el->l_next_free_rec) !=
872*ccd979bdSMark Fasheh 		       le16_to_cpu(el->l_count));
873*ccd979bdSMark Fasheh 		BUG_ON(bh);
874*ccd979bdSMark Fasheh 		mlog(0, "ocfs2_allocate_extent: need to shift tree depth "
875*ccd979bdSMark Fasheh 		     "(current = %u)\n",
876*ccd979bdSMark Fasheh 		     le16_to_cpu(fe->id2.i_list.l_tree_depth));
877*ccd979bdSMark Fasheh 
878*ccd979bdSMark Fasheh 		/* ocfs2_shift_tree_depth will return us a buffer with
879*ccd979bdSMark Fasheh 		 * the new extent block (so we can pass that to
880*ccd979bdSMark Fasheh 		 * ocfs2_add_branch). */
881*ccd979bdSMark Fasheh 		status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh,
882*ccd979bdSMark Fasheh 						meta_ac, &bh);
883*ccd979bdSMark Fasheh 		if (status < 0) {
884*ccd979bdSMark Fasheh 			mlog_errno(status);
885*ccd979bdSMark Fasheh 			goto bail;
886*ccd979bdSMark Fasheh 		}
887*ccd979bdSMark Fasheh 		/* Special case: we have room now if we shifted from
888*ccd979bdSMark Fasheh 		 * tree_depth 0 */
889*ccd979bdSMark Fasheh 		if (fe->id2.i_list.l_tree_depth == cpu_to_le16(1))
890*ccd979bdSMark Fasheh 			goto out_add;
891*ccd979bdSMark Fasheh 	}
892*ccd979bdSMark Fasheh 
893*ccd979bdSMark Fasheh 	/* call ocfs2_add_branch to add the final part of the tree with
894*ccd979bdSMark Fasheh 	 * the new data. */
895*ccd979bdSMark Fasheh 	mlog(0, "ocfs2_allocate_extent: add branch. bh = %p\n", bh);
896*ccd979bdSMark Fasheh 	status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh,
897*ccd979bdSMark Fasheh 				  meta_ac);
898*ccd979bdSMark Fasheh 	if (status < 0) {
899*ccd979bdSMark Fasheh 		mlog_errno(status);
900*ccd979bdSMark Fasheh 		goto bail;
901*ccd979bdSMark Fasheh 	}
902*ccd979bdSMark Fasheh 
903*ccd979bdSMark Fasheh out_add:
904*ccd979bdSMark Fasheh 	/* Finally, we can add clusters. */
905*ccd979bdSMark Fasheh 	status = ocfs2_do_insert_extent(osb, handle, inode, fe_bh,
906*ccd979bdSMark Fasheh 					start_blk, new_clusters);
907*ccd979bdSMark Fasheh 	if (status < 0)
908*ccd979bdSMark Fasheh 		mlog_errno(status);
909*ccd979bdSMark Fasheh 
910*ccd979bdSMark Fasheh bail:
911*ccd979bdSMark Fasheh 	if (bh)
912*ccd979bdSMark Fasheh 		brelse(bh);
913*ccd979bdSMark Fasheh 
914*ccd979bdSMark Fasheh 	if (last_eb_bh)
915*ccd979bdSMark Fasheh 		brelse(last_eb_bh);
916*ccd979bdSMark Fasheh 
917*ccd979bdSMark Fasheh 	mlog_exit(status);
918*ccd979bdSMark Fasheh 	return status;
919*ccd979bdSMark Fasheh }
920*ccd979bdSMark Fasheh 
921*ccd979bdSMark Fasheh static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
922*ccd979bdSMark Fasheh {
923*ccd979bdSMark Fasheh 	struct buffer_head *tl_bh = osb->osb_tl_bh;
924*ccd979bdSMark Fasheh 	struct ocfs2_dinode *di;
925*ccd979bdSMark Fasheh 	struct ocfs2_truncate_log *tl;
926*ccd979bdSMark Fasheh 
927*ccd979bdSMark Fasheh 	di = (struct ocfs2_dinode *) tl_bh->b_data;
928*ccd979bdSMark Fasheh 	tl = &di->id2.i_dealloc;
929*ccd979bdSMark Fasheh 
930*ccd979bdSMark Fasheh 	mlog_bug_on_msg(le16_to_cpu(tl->tl_used) > le16_to_cpu(tl->tl_count),
931*ccd979bdSMark Fasheh 			"slot %d, invalid truncate log parameters: used = "
932*ccd979bdSMark Fasheh 			"%u, count = %u\n", osb->slot_num,
933*ccd979bdSMark Fasheh 			le16_to_cpu(tl->tl_used), le16_to_cpu(tl->tl_count));
934*ccd979bdSMark Fasheh 	return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count);
935*ccd979bdSMark Fasheh }
936*ccd979bdSMark Fasheh 
937*ccd979bdSMark Fasheh static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl,
938*ccd979bdSMark Fasheh 					   unsigned int new_start)
939*ccd979bdSMark Fasheh {
940*ccd979bdSMark Fasheh 	unsigned int tail_index;
941*ccd979bdSMark Fasheh 	unsigned int current_tail;
942*ccd979bdSMark Fasheh 
943*ccd979bdSMark Fasheh 	/* No records, nothing to coalesce */
944*ccd979bdSMark Fasheh 	if (!le16_to_cpu(tl->tl_used))
945*ccd979bdSMark Fasheh 		return 0;
946*ccd979bdSMark Fasheh 
947*ccd979bdSMark Fasheh 	tail_index = le16_to_cpu(tl->tl_used) - 1;
948*ccd979bdSMark Fasheh 	current_tail = le32_to_cpu(tl->tl_recs[tail_index].t_start);
949*ccd979bdSMark Fasheh 	current_tail += le32_to_cpu(tl->tl_recs[tail_index].t_clusters);
950*ccd979bdSMark Fasheh 
951*ccd979bdSMark Fasheh 	return current_tail == new_start;
952*ccd979bdSMark Fasheh }
953*ccd979bdSMark Fasheh 
954*ccd979bdSMark Fasheh static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
955*ccd979bdSMark Fasheh 				     struct ocfs2_journal_handle *handle,
956*ccd979bdSMark Fasheh 				     u64 start_blk,
957*ccd979bdSMark Fasheh 				     unsigned int num_clusters)
958*ccd979bdSMark Fasheh {
959*ccd979bdSMark Fasheh 	int status, index;
960*ccd979bdSMark Fasheh 	unsigned int start_cluster, tl_count;
961*ccd979bdSMark Fasheh 	struct inode *tl_inode = osb->osb_tl_inode;
962*ccd979bdSMark Fasheh 	struct buffer_head *tl_bh = osb->osb_tl_bh;
963*ccd979bdSMark Fasheh 	struct ocfs2_dinode *di;
964*ccd979bdSMark Fasheh 	struct ocfs2_truncate_log *tl;
965*ccd979bdSMark Fasheh 
966*ccd979bdSMark Fasheh 	mlog_entry("start_blk = %"MLFu64", num_clusters = %u\n", start_blk,
967*ccd979bdSMark Fasheh 		   num_clusters);
968*ccd979bdSMark Fasheh 
969*ccd979bdSMark Fasheh 	BUG_ON(!down_trylock(&tl_inode->i_sem));
970*ccd979bdSMark Fasheh 
971*ccd979bdSMark Fasheh 	start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
972*ccd979bdSMark Fasheh 
973*ccd979bdSMark Fasheh 	di = (struct ocfs2_dinode *) tl_bh->b_data;
974*ccd979bdSMark Fasheh 	tl = &di->id2.i_dealloc;
975*ccd979bdSMark Fasheh 	if (!OCFS2_IS_VALID_DINODE(di)) {
976*ccd979bdSMark Fasheh 		OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
977*ccd979bdSMark Fasheh 		status = -EIO;
978*ccd979bdSMark Fasheh 		goto bail;
979*ccd979bdSMark Fasheh 	}
980*ccd979bdSMark Fasheh 
981*ccd979bdSMark Fasheh 	tl_count = le16_to_cpu(tl->tl_count);
982*ccd979bdSMark Fasheh 	mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
983*ccd979bdSMark Fasheh 			tl_count == 0,
984*ccd979bdSMark Fasheh 			"Truncate record count on #%"MLFu64" invalid ("
985*ccd979bdSMark Fasheh 			"wanted %u, actual %u\n", OCFS2_I(tl_inode)->ip_blkno,
986*ccd979bdSMark Fasheh 			ocfs2_truncate_recs_per_inode(osb->sb),
987*ccd979bdSMark Fasheh 			le16_to_cpu(tl->tl_count));
988*ccd979bdSMark Fasheh 
989*ccd979bdSMark Fasheh 	/* Caller should have known to flush before calling us. */
990*ccd979bdSMark Fasheh 	index = le16_to_cpu(tl->tl_used);
991*ccd979bdSMark Fasheh 	if (index >= tl_count) {
992*ccd979bdSMark Fasheh 		status = -ENOSPC;
993*ccd979bdSMark Fasheh 		mlog_errno(status);
994*ccd979bdSMark Fasheh 		goto bail;
995*ccd979bdSMark Fasheh 	}
996*ccd979bdSMark Fasheh 
997*ccd979bdSMark Fasheh 	status = ocfs2_journal_access(handle, tl_inode, tl_bh,
998*ccd979bdSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
999*ccd979bdSMark Fasheh 	if (status < 0) {
1000*ccd979bdSMark Fasheh 		mlog_errno(status);
1001*ccd979bdSMark Fasheh 		goto bail;
1002*ccd979bdSMark Fasheh 	}
1003*ccd979bdSMark Fasheh 
1004*ccd979bdSMark Fasheh 	mlog(0, "Log truncate of %u clusters starting at cluster %u to "
1005*ccd979bdSMark Fasheh 	     "%"MLFu64" (index = %d)\n", num_clusters, start_cluster,
1006*ccd979bdSMark Fasheh 	     OCFS2_I(tl_inode)->ip_blkno, index);
1007*ccd979bdSMark Fasheh 
1008*ccd979bdSMark Fasheh 	if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) {
1009*ccd979bdSMark Fasheh 		/*
1010*ccd979bdSMark Fasheh 		 * Move index back to the record we are coalescing with.
1011*ccd979bdSMark Fasheh 		 * ocfs2_truncate_log_can_coalesce() guarantees nonzero
1012*ccd979bdSMark Fasheh 		 */
1013*ccd979bdSMark Fasheh 		index--;
1014*ccd979bdSMark Fasheh 
1015*ccd979bdSMark Fasheh 		num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters);
1016*ccd979bdSMark Fasheh 		mlog(0, "Coalesce with index %u (start = %u, clusters = %u)\n",
1017*ccd979bdSMark Fasheh 		     index, le32_to_cpu(tl->tl_recs[index].t_start),
1018*ccd979bdSMark Fasheh 		     num_clusters);
1019*ccd979bdSMark Fasheh 	} else {
1020*ccd979bdSMark Fasheh 		tl->tl_recs[index].t_start = cpu_to_le32(start_cluster);
1021*ccd979bdSMark Fasheh 		tl->tl_used = cpu_to_le16(index + 1);
1022*ccd979bdSMark Fasheh 	}
1023*ccd979bdSMark Fasheh 	tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters);
1024*ccd979bdSMark Fasheh 
1025*ccd979bdSMark Fasheh 	status = ocfs2_journal_dirty(handle, tl_bh);
1026*ccd979bdSMark Fasheh 	if (status < 0) {
1027*ccd979bdSMark Fasheh 		mlog_errno(status);
1028*ccd979bdSMark Fasheh 		goto bail;
1029*ccd979bdSMark Fasheh 	}
1030*ccd979bdSMark Fasheh 
1031*ccd979bdSMark Fasheh bail:
1032*ccd979bdSMark Fasheh 	mlog_exit(status);
1033*ccd979bdSMark Fasheh 	return status;
1034*ccd979bdSMark Fasheh }
1035*ccd979bdSMark Fasheh 
1036*ccd979bdSMark Fasheh static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
1037*ccd979bdSMark Fasheh 					 struct ocfs2_journal_handle *handle,
1038*ccd979bdSMark Fasheh 					 struct inode *data_alloc_inode,
1039*ccd979bdSMark Fasheh 					 struct buffer_head *data_alloc_bh)
1040*ccd979bdSMark Fasheh {
1041*ccd979bdSMark Fasheh 	int status = 0;
1042*ccd979bdSMark Fasheh 	int i;
1043*ccd979bdSMark Fasheh 	unsigned int num_clusters;
1044*ccd979bdSMark Fasheh 	u64 start_blk;
1045*ccd979bdSMark Fasheh 	struct ocfs2_truncate_rec rec;
1046*ccd979bdSMark Fasheh 	struct ocfs2_dinode *di;
1047*ccd979bdSMark Fasheh 	struct ocfs2_truncate_log *tl;
1048*ccd979bdSMark Fasheh 	struct inode *tl_inode = osb->osb_tl_inode;
1049*ccd979bdSMark Fasheh 	struct buffer_head *tl_bh = osb->osb_tl_bh;
1050*ccd979bdSMark Fasheh 
1051*ccd979bdSMark Fasheh 	mlog_entry_void();
1052*ccd979bdSMark Fasheh 
1053*ccd979bdSMark Fasheh 	di = (struct ocfs2_dinode *) tl_bh->b_data;
1054*ccd979bdSMark Fasheh 	tl = &di->id2.i_dealloc;
1055*ccd979bdSMark Fasheh 	i = le16_to_cpu(tl->tl_used) - 1;
1056*ccd979bdSMark Fasheh 	while (i >= 0) {
1057*ccd979bdSMark Fasheh 		/* Caller has given us at least enough credits to
1058*ccd979bdSMark Fasheh 		 * update the truncate log dinode */
1059*ccd979bdSMark Fasheh 		status = ocfs2_journal_access(handle, tl_inode, tl_bh,
1060*ccd979bdSMark Fasheh 					      OCFS2_JOURNAL_ACCESS_WRITE);
1061*ccd979bdSMark Fasheh 		if (status < 0) {
1062*ccd979bdSMark Fasheh 			mlog_errno(status);
1063*ccd979bdSMark Fasheh 			goto bail;
1064*ccd979bdSMark Fasheh 		}
1065*ccd979bdSMark Fasheh 
1066*ccd979bdSMark Fasheh 		tl->tl_used = cpu_to_le16(i);
1067*ccd979bdSMark Fasheh 
1068*ccd979bdSMark Fasheh 		status = ocfs2_journal_dirty(handle, tl_bh);
1069*ccd979bdSMark Fasheh 		if (status < 0) {
1070*ccd979bdSMark Fasheh 			mlog_errno(status);
1071*ccd979bdSMark Fasheh 			goto bail;
1072*ccd979bdSMark Fasheh 		}
1073*ccd979bdSMark Fasheh 
1074*ccd979bdSMark Fasheh 		/* TODO: Perhaps we can calculate the bulk of the
1075*ccd979bdSMark Fasheh 		 * credits up front rather than extending like
1076*ccd979bdSMark Fasheh 		 * this. */
1077*ccd979bdSMark Fasheh 		status = ocfs2_extend_trans(handle,
1078*ccd979bdSMark Fasheh 					    OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
1079*ccd979bdSMark Fasheh 		if (status < 0) {
1080*ccd979bdSMark Fasheh 			mlog_errno(status);
1081*ccd979bdSMark Fasheh 			goto bail;
1082*ccd979bdSMark Fasheh 		}
1083*ccd979bdSMark Fasheh 
1084*ccd979bdSMark Fasheh 		rec = tl->tl_recs[i];
1085*ccd979bdSMark Fasheh 		start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb,
1086*ccd979bdSMark Fasheh 						    le32_to_cpu(rec.t_start));
1087*ccd979bdSMark Fasheh 		num_clusters = le32_to_cpu(rec.t_clusters);
1088*ccd979bdSMark Fasheh 
1089*ccd979bdSMark Fasheh 		/* if start_blk is not set, we ignore the record as
1090*ccd979bdSMark Fasheh 		 * invalid. */
1091*ccd979bdSMark Fasheh 		if (start_blk) {
1092*ccd979bdSMark Fasheh 			mlog(0, "free record %d, start = %u, clusters = %u\n",
1093*ccd979bdSMark Fasheh 			     i, le32_to_cpu(rec.t_start), num_clusters);
1094*ccd979bdSMark Fasheh 
1095*ccd979bdSMark Fasheh 			status = ocfs2_free_clusters(handle, data_alloc_inode,
1096*ccd979bdSMark Fasheh 						     data_alloc_bh, start_blk,
1097*ccd979bdSMark Fasheh 						     num_clusters);
1098*ccd979bdSMark Fasheh 			if (status < 0) {
1099*ccd979bdSMark Fasheh 				mlog_errno(status);
1100*ccd979bdSMark Fasheh 				goto bail;
1101*ccd979bdSMark Fasheh 			}
1102*ccd979bdSMark Fasheh 		}
1103*ccd979bdSMark Fasheh 		i--;
1104*ccd979bdSMark Fasheh 	}
1105*ccd979bdSMark Fasheh 
1106*ccd979bdSMark Fasheh bail:
1107*ccd979bdSMark Fasheh 	mlog_exit(status);
1108*ccd979bdSMark Fasheh 	return status;
1109*ccd979bdSMark Fasheh }
1110*ccd979bdSMark Fasheh 
1111*ccd979bdSMark Fasheh /* Expects you to already be holding tl_inode->i_sem */
1112*ccd979bdSMark Fasheh static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
1113*ccd979bdSMark Fasheh {
1114*ccd979bdSMark Fasheh 	int status;
1115*ccd979bdSMark Fasheh 	unsigned int num_to_flush;
1116*ccd979bdSMark Fasheh 	struct ocfs2_journal_handle *handle = NULL;
1117*ccd979bdSMark Fasheh 	struct inode *tl_inode = osb->osb_tl_inode;
1118*ccd979bdSMark Fasheh 	struct inode *data_alloc_inode = NULL;
1119*ccd979bdSMark Fasheh 	struct buffer_head *tl_bh = osb->osb_tl_bh;
1120*ccd979bdSMark Fasheh 	struct buffer_head *data_alloc_bh = NULL;
1121*ccd979bdSMark Fasheh 	struct ocfs2_dinode *di;
1122*ccd979bdSMark Fasheh 	struct ocfs2_truncate_log *tl;
1123*ccd979bdSMark Fasheh 
1124*ccd979bdSMark Fasheh 	mlog_entry_void();
1125*ccd979bdSMark Fasheh 
1126*ccd979bdSMark Fasheh 	BUG_ON(!down_trylock(&tl_inode->i_sem));
1127*ccd979bdSMark Fasheh 
1128*ccd979bdSMark Fasheh 	di = (struct ocfs2_dinode *) tl_bh->b_data;
1129*ccd979bdSMark Fasheh 	tl = &di->id2.i_dealloc;
1130*ccd979bdSMark Fasheh 	if (!OCFS2_IS_VALID_DINODE(di)) {
1131*ccd979bdSMark Fasheh 		OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
1132*ccd979bdSMark Fasheh 		status = -EIO;
1133*ccd979bdSMark Fasheh 		goto bail;
1134*ccd979bdSMark Fasheh 	}
1135*ccd979bdSMark Fasheh 
1136*ccd979bdSMark Fasheh 	num_to_flush = le16_to_cpu(tl->tl_used);
1137*ccd979bdSMark Fasheh 	mlog(0, "Flush %u records from truncate log #%"MLFu64"\n",
1138*ccd979bdSMark Fasheh 	     num_to_flush, OCFS2_I(tl_inode)->ip_blkno);
1139*ccd979bdSMark Fasheh 	if (!num_to_flush) {
1140*ccd979bdSMark Fasheh 		status = 0;
1141*ccd979bdSMark Fasheh 		goto bail;
1142*ccd979bdSMark Fasheh 	}
1143*ccd979bdSMark Fasheh 
1144*ccd979bdSMark Fasheh 	handle = ocfs2_alloc_handle(osb);
1145*ccd979bdSMark Fasheh 	if (!handle) {
1146*ccd979bdSMark Fasheh 		status = -ENOMEM;
1147*ccd979bdSMark Fasheh 		mlog_errno(status);
1148*ccd979bdSMark Fasheh 		goto bail;
1149*ccd979bdSMark Fasheh 	}
1150*ccd979bdSMark Fasheh 
1151*ccd979bdSMark Fasheh 	data_alloc_inode = ocfs2_get_system_file_inode(osb,
1152*ccd979bdSMark Fasheh 						       GLOBAL_BITMAP_SYSTEM_INODE,
1153*ccd979bdSMark Fasheh 						       OCFS2_INVALID_SLOT);
1154*ccd979bdSMark Fasheh 	if (!data_alloc_inode) {
1155*ccd979bdSMark Fasheh 		status = -EINVAL;
1156*ccd979bdSMark Fasheh 		mlog(ML_ERROR, "Could not get bitmap inode!\n");
1157*ccd979bdSMark Fasheh 		goto bail;
1158*ccd979bdSMark Fasheh 	}
1159*ccd979bdSMark Fasheh 
1160*ccd979bdSMark Fasheh 	ocfs2_handle_add_inode(handle, data_alloc_inode);
1161*ccd979bdSMark Fasheh 	status = ocfs2_meta_lock(data_alloc_inode, handle, &data_alloc_bh, 1);
1162*ccd979bdSMark Fasheh 	if (status < 0) {
1163*ccd979bdSMark Fasheh 		mlog_errno(status);
1164*ccd979bdSMark Fasheh 		goto bail;
1165*ccd979bdSMark Fasheh 	}
1166*ccd979bdSMark Fasheh 
1167*ccd979bdSMark Fasheh 	handle = ocfs2_start_trans(osb, handle, OCFS2_TRUNCATE_LOG_UPDATE);
1168*ccd979bdSMark Fasheh 	if (IS_ERR(handle)) {
1169*ccd979bdSMark Fasheh 		status = PTR_ERR(handle);
1170*ccd979bdSMark Fasheh 		handle = NULL;
1171*ccd979bdSMark Fasheh 		mlog_errno(status);
1172*ccd979bdSMark Fasheh 		goto bail;
1173*ccd979bdSMark Fasheh 	}
1174*ccd979bdSMark Fasheh 
1175*ccd979bdSMark Fasheh 	status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode,
1176*ccd979bdSMark Fasheh 					       data_alloc_bh);
1177*ccd979bdSMark Fasheh 	if (status < 0) {
1178*ccd979bdSMark Fasheh 		mlog_errno(status);
1179*ccd979bdSMark Fasheh 		goto bail;
1180*ccd979bdSMark Fasheh 	}
1181*ccd979bdSMark Fasheh 
1182*ccd979bdSMark Fasheh bail:
1183*ccd979bdSMark Fasheh 	if (handle)
1184*ccd979bdSMark Fasheh 		ocfs2_commit_trans(handle);
1185*ccd979bdSMark Fasheh 
1186*ccd979bdSMark Fasheh 	if (data_alloc_inode)
1187*ccd979bdSMark Fasheh 		iput(data_alloc_inode);
1188*ccd979bdSMark Fasheh 
1189*ccd979bdSMark Fasheh 	if (data_alloc_bh)
1190*ccd979bdSMark Fasheh 		brelse(data_alloc_bh);
1191*ccd979bdSMark Fasheh 
1192*ccd979bdSMark Fasheh 	mlog_exit(status);
1193*ccd979bdSMark Fasheh 	return status;
1194*ccd979bdSMark Fasheh }
1195*ccd979bdSMark Fasheh 
1196*ccd979bdSMark Fasheh int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
1197*ccd979bdSMark Fasheh {
1198*ccd979bdSMark Fasheh 	int status;
1199*ccd979bdSMark Fasheh 	struct inode *tl_inode = osb->osb_tl_inode;
1200*ccd979bdSMark Fasheh 
1201*ccd979bdSMark Fasheh 	down(&tl_inode->i_sem);
1202*ccd979bdSMark Fasheh 	status = __ocfs2_flush_truncate_log(osb);
1203*ccd979bdSMark Fasheh 	up(&tl_inode->i_sem);
1204*ccd979bdSMark Fasheh 
1205*ccd979bdSMark Fasheh 	return status;
1206*ccd979bdSMark Fasheh }
1207*ccd979bdSMark Fasheh 
1208*ccd979bdSMark Fasheh static void ocfs2_truncate_log_worker(void *data)
1209*ccd979bdSMark Fasheh {
1210*ccd979bdSMark Fasheh 	int status;
1211*ccd979bdSMark Fasheh 	struct ocfs2_super *osb = data;
1212*ccd979bdSMark Fasheh 
1213*ccd979bdSMark Fasheh 	mlog_entry_void();
1214*ccd979bdSMark Fasheh 
1215*ccd979bdSMark Fasheh 	status = ocfs2_flush_truncate_log(osb);
1216*ccd979bdSMark Fasheh 	if (status < 0)
1217*ccd979bdSMark Fasheh 		mlog_errno(status);
1218*ccd979bdSMark Fasheh 
1219*ccd979bdSMark Fasheh 	mlog_exit(status);
1220*ccd979bdSMark Fasheh }
1221*ccd979bdSMark Fasheh 
1222*ccd979bdSMark Fasheh #define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ)
1223*ccd979bdSMark Fasheh void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
1224*ccd979bdSMark Fasheh 				       int cancel)
1225*ccd979bdSMark Fasheh {
1226*ccd979bdSMark Fasheh 	if (osb->osb_tl_inode) {
1227*ccd979bdSMark Fasheh 		/* We want to push off log flushes while truncates are
1228*ccd979bdSMark Fasheh 		 * still running. */
1229*ccd979bdSMark Fasheh 		if (cancel)
1230*ccd979bdSMark Fasheh 			cancel_delayed_work(&osb->osb_truncate_log_wq);
1231*ccd979bdSMark Fasheh 
1232*ccd979bdSMark Fasheh 		queue_delayed_work(ocfs2_wq, &osb->osb_truncate_log_wq,
1233*ccd979bdSMark Fasheh 				   OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL);
1234*ccd979bdSMark Fasheh 	}
1235*ccd979bdSMark Fasheh }
1236*ccd979bdSMark Fasheh 
1237*ccd979bdSMark Fasheh static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
1238*ccd979bdSMark Fasheh 				       int slot_num,
1239*ccd979bdSMark Fasheh 				       struct inode **tl_inode,
1240*ccd979bdSMark Fasheh 				       struct buffer_head **tl_bh)
1241*ccd979bdSMark Fasheh {
1242*ccd979bdSMark Fasheh 	int status;
1243*ccd979bdSMark Fasheh 	struct inode *inode = NULL;
1244*ccd979bdSMark Fasheh 	struct buffer_head *bh = NULL;
1245*ccd979bdSMark Fasheh 
1246*ccd979bdSMark Fasheh 	inode = ocfs2_get_system_file_inode(osb,
1247*ccd979bdSMark Fasheh 					   TRUNCATE_LOG_SYSTEM_INODE,
1248*ccd979bdSMark Fasheh 					   slot_num);
1249*ccd979bdSMark Fasheh 	if (!inode) {
1250*ccd979bdSMark Fasheh 		status = -EINVAL;
1251*ccd979bdSMark Fasheh 		mlog(ML_ERROR, "Could not get load truncate log inode!\n");
1252*ccd979bdSMark Fasheh 		goto bail;
1253*ccd979bdSMark Fasheh 	}
1254*ccd979bdSMark Fasheh 
1255*ccd979bdSMark Fasheh 	status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
1256*ccd979bdSMark Fasheh 				  OCFS2_BH_CACHED, inode);
1257*ccd979bdSMark Fasheh 	if (status < 0) {
1258*ccd979bdSMark Fasheh 		iput(inode);
1259*ccd979bdSMark Fasheh 		mlog_errno(status);
1260*ccd979bdSMark Fasheh 		goto bail;
1261*ccd979bdSMark Fasheh 	}
1262*ccd979bdSMark Fasheh 
1263*ccd979bdSMark Fasheh 	*tl_inode = inode;
1264*ccd979bdSMark Fasheh 	*tl_bh    = bh;
1265*ccd979bdSMark Fasheh bail:
1266*ccd979bdSMark Fasheh 	mlog_exit(status);
1267*ccd979bdSMark Fasheh 	return status;
1268*ccd979bdSMark Fasheh }
1269*ccd979bdSMark Fasheh 
1270*ccd979bdSMark Fasheh /* called during the 1st stage of node recovery. we stamp a clean
1271*ccd979bdSMark Fasheh  * truncate log and pass back a copy for processing later. if the
1272*ccd979bdSMark Fasheh  * truncate log does not require processing, a *tl_copy is set to
1273*ccd979bdSMark Fasheh  * NULL. */
1274*ccd979bdSMark Fasheh int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
1275*ccd979bdSMark Fasheh 				      int slot_num,
1276*ccd979bdSMark Fasheh 				      struct ocfs2_dinode **tl_copy)
1277*ccd979bdSMark Fasheh {
1278*ccd979bdSMark Fasheh 	int status;
1279*ccd979bdSMark Fasheh 	struct inode *tl_inode = NULL;
1280*ccd979bdSMark Fasheh 	struct buffer_head *tl_bh = NULL;
1281*ccd979bdSMark Fasheh 	struct ocfs2_dinode *di;
1282*ccd979bdSMark Fasheh 	struct ocfs2_truncate_log *tl;
1283*ccd979bdSMark Fasheh 
1284*ccd979bdSMark Fasheh 	*tl_copy = NULL;
1285*ccd979bdSMark Fasheh 
1286*ccd979bdSMark Fasheh 	mlog(0, "recover truncate log from slot %d\n", slot_num);
1287*ccd979bdSMark Fasheh 
1288*ccd979bdSMark Fasheh 	status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh);
1289*ccd979bdSMark Fasheh 	if (status < 0) {
1290*ccd979bdSMark Fasheh 		mlog_errno(status);
1291*ccd979bdSMark Fasheh 		goto bail;
1292*ccd979bdSMark Fasheh 	}
1293*ccd979bdSMark Fasheh 
1294*ccd979bdSMark Fasheh 	di = (struct ocfs2_dinode *) tl_bh->b_data;
1295*ccd979bdSMark Fasheh 	tl = &di->id2.i_dealloc;
1296*ccd979bdSMark Fasheh 	if (!OCFS2_IS_VALID_DINODE(di)) {
1297*ccd979bdSMark Fasheh 		OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di);
1298*ccd979bdSMark Fasheh 		status = -EIO;
1299*ccd979bdSMark Fasheh 		goto bail;
1300*ccd979bdSMark Fasheh 	}
1301*ccd979bdSMark Fasheh 
1302*ccd979bdSMark Fasheh 	if (le16_to_cpu(tl->tl_used)) {
1303*ccd979bdSMark Fasheh 		mlog(0, "We'll have %u logs to recover\n",
1304*ccd979bdSMark Fasheh 		     le16_to_cpu(tl->tl_used));
1305*ccd979bdSMark Fasheh 
1306*ccd979bdSMark Fasheh 		*tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
1307*ccd979bdSMark Fasheh 		if (!(*tl_copy)) {
1308*ccd979bdSMark Fasheh 			status = -ENOMEM;
1309*ccd979bdSMark Fasheh 			mlog_errno(status);
1310*ccd979bdSMark Fasheh 			goto bail;
1311*ccd979bdSMark Fasheh 		}
1312*ccd979bdSMark Fasheh 
1313*ccd979bdSMark Fasheh 		/* Assuming the write-out below goes well, this copy
1314*ccd979bdSMark Fasheh 		 * will be passed back to recovery for processing. */
1315*ccd979bdSMark Fasheh 		memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size);
1316*ccd979bdSMark Fasheh 
1317*ccd979bdSMark Fasheh 		/* All we need to do to clear the truncate log is set
1318*ccd979bdSMark Fasheh 		 * tl_used. */
1319*ccd979bdSMark Fasheh 		tl->tl_used = 0;
1320*ccd979bdSMark Fasheh 
1321*ccd979bdSMark Fasheh 		status = ocfs2_write_block(osb, tl_bh, tl_inode);
1322*ccd979bdSMark Fasheh 		if (status < 0) {
1323*ccd979bdSMark Fasheh 			mlog_errno(status);
1324*ccd979bdSMark Fasheh 			goto bail;
1325*ccd979bdSMark Fasheh 		}
1326*ccd979bdSMark Fasheh 	}
1327*ccd979bdSMark Fasheh 
1328*ccd979bdSMark Fasheh bail:
1329*ccd979bdSMark Fasheh 	if (tl_inode)
1330*ccd979bdSMark Fasheh 		iput(tl_inode);
1331*ccd979bdSMark Fasheh 	if (tl_bh)
1332*ccd979bdSMark Fasheh 		brelse(tl_bh);
1333*ccd979bdSMark Fasheh 
1334*ccd979bdSMark Fasheh 	if (status < 0 && (*tl_copy)) {
1335*ccd979bdSMark Fasheh 		kfree(*tl_copy);
1336*ccd979bdSMark Fasheh 		*tl_copy = NULL;
1337*ccd979bdSMark Fasheh 	}
1338*ccd979bdSMark Fasheh 
1339*ccd979bdSMark Fasheh 	mlog_exit(status);
1340*ccd979bdSMark Fasheh 	return status;
1341*ccd979bdSMark Fasheh }
1342*ccd979bdSMark Fasheh 
1343*ccd979bdSMark Fasheh int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
1344*ccd979bdSMark Fasheh 					 struct ocfs2_dinode *tl_copy)
1345*ccd979bdSMark Fasheh {
1346*ccd979bdSMark Fasheh 	int status = 0;
1347*ccd979bdSMark Fasheh 	int i;
1348*ccd979bdSMark Fasheh 	unsigned int clusters, num_recs, start_cluster;
1349*ccd979bdSMark Fasheh 	u64 start_blk;
1350*ccd979bdSMark Fasheh 	struct ocfs2_journal_handle *handle;
1351*ccd979bdSMark Fasheh 	struct inode *tl_inode = osb->osb_tl_inode;
1352*ccd979bdSMark Fasheh 	struct ocfs2_truncate_log *tl;
1353*ccd979bdSMark Fasheh 
1354*ccd979bdSMark Fasheh 	mlog_entry_void();
1355*ccd979bdSMark Fasheh 
1356*ccd979bdSMark Fasheh 	if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) {
1357*ccd979bdSMark Fasheh 		mlog(ML_ERROR, "Asked to recover my own truncate log!\n");
1358*ccd979bdSMark Fasheh 		return -EINVAL;
1359*ccd979bdSMark Fasheh 	}
1360*ccd979bdSMark Fasheh 
1361*ccd979bdSMark Fasheh 	tl = &tl_copy->id2.i_dealloc;
1362*ccd979bdSMark Fasheh 	num_recs = le16_to_cpu(tl->tl_used);
1363*ccd979bdSMark Fasheh 	mlog(0, "cleanup %u records from %"MLFu64"\n", num_recs,
1364*ccd979bdSMark Fasheh 	     tl_copy->i_blkno);
1365*ccd979bdSMark Fasheh 
1366*ccd979bdSMark Fasheh 	down(&tl_inode->i_sem);
1367*ccd979bdSMark Fasheh 	for(i = 0; i < num_recs; i++) {
1368*ccd979bdSMark Fasheh 		if (ocfs2_truncate_log_needs_flush(osb)) {
1369*ccd979bdSMark Fasheh 			status = __ocfs2_flush_truncate_log(osb);
1370*ccd979bdSMark Fasheh 			if (status < 0) {
1371*ccd979bdSMark Fasheh 				mlog_errno(status);
1372*ccd979bdSMark Fasheh 				goto bail_up;
1373*ccd979bdSMark Fasheh 			}
1374*ccd979bdSMark Fasheh 		}
1375*ccd979bdSMark Fasheh 
1376*ccd979bdSMark Fasheh 		handle = ocfs2_start_trans(osb, NULL,
1377*ccd979bdSMark Fasheh 					   OCFS2_TRUNCATE_LOG_UPDATE);
1378*ccd979bdSMark Fasheh 		if (IS_ERR(handle)) {
1379*ccd979bdSMark Fasheh 			status = PTR_ERR(handle);
1380*ccd979bdSMark Fasheh 			mlog_errno(status);
1381*ccd979bdSMark Fasheh 			goto bail_up;
1382*ccd979bdSMark Fasheh 		}
1383*ccd979bdSMark Fasheh 
1384*ccd979bdSMark Fasheh 		clusters = le32_to_cpu(tl->tl_recs[i].t_clusters);
1385*ccd979bdSMark Fasheh 		start_cluster = le32_to_cpu(tl->tl_recs[i].t_start);
1386*ccd979bdSMark Fasheh 		start_blk = ocfs2_clusters_to_blocks(osb->sb, start_cluster);
1387*ccd979bdSMark Fasheh 
1388*ccd979bdSMark Fasheh 		status = ocfs2_truncate_log_append(osb, handle,
1389*ccd979bdSMark Fasheh 						   start_blk, clusters);
1390*ccd979bdSMark Fasheh 		ocfs2_commit_trans(handle);
1391*ccd979bdSMark Fasheh 		if (status < 0) {
1392*ccd979bdSMark Fasheh 			mlog_errno(status);
1393*ccd979bdSMark Fasheh 			goto bail_up;
1394*ccd979bdSMark Fasheh 		}
1395*ccd979bdSMark Fasheh 	}
1396*ccd979bdSMark Fasheh 
1397*ccd979bdSMark Fasheh bail_up:
1398*ccd979bdSMark Fasheh 	up(&tl_inode->i_sem);
1399*ccd979bdSMark Fasheh 
1400*ccd979bdSMark Fasheh 	mlog_exit(status);
1401*ccd979bdSMark Fasheh 	return status;
1402*ccd979bdSMark Fasheh }
1403*ccd979bdSMark Fasheh 
1404*ccd979bdSMark Fasheh void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
1405*ccd979bdSMark Fasheh {
1406*ccd979bdSMark Fasheh 	int status;
1407*ccd979bdSMark Fasheh 	struct inode *tl_inode = osb->osb_tl_inode;
1408*ccd979bdSMark Fasheh 
1409*ccd979bdSMark Fasheh 	mlog_entry_void();
1410*ccd979bdSMark Fasheh 
1411*ccd979bdSMark Fasheh 	if (tl_inode) {
1412*ccd979bdSMark Fasheh 		cancel_delayed_work(&osb->osb_truncate_log_wq);
1413*ccd979bdSMark Fasheh 		flush_workqueue(ocfs2_wq);
1414*ccd979bdSMark Fasheh 
1415*ccd979bdSMark Fasheh 		status = ocfs2_flush_truncate_log(osb);
1416*ccd979bdSMark Fasheh 		if (status < 0)
1417*ccd979bdSMark Fasheh 			mlog_errno(status);
1418*ccd979bdSMark Fasheh 
1419*ccd979bdSMark Fasheh 		brelse(osb->osb_tl_bh);
1420*ccd979bdSMark Fasheh 		iput(osb->osb_tl_inode);
1421*ccd979bdSMark Fasheh 	}
1422*ccd979bdSMark Fasheh 
1423*ccd979bdSMark Fasheh 	mlog_exit_void();
1424*ccd979bdSMark Fasheh }
1425*ccd979bdSMark Fasheh 
1426*ccd979bdSMark Fasheh int ocfs2_truncate_log_init(struct ocfs2_super *osb)
1427*ccd979bdSMark Fasheh {
1428*ccd979bdSMark Fasheh 	int status;
1429*ccd979bdSMark Fasheh 	struct inode *tl_inode = NULL;
1430*ccd979bdSMark Fasheh 	struct buffer_head *tl_bh = NULL;
1431*ccd979bdSMark Fasheh 
1432*ccd979bdSMark Fasheh 	mlog_entry_void();
1433*ccd979bdSMark Fasheh 
1434*ccd979bdSMark Fasheh 	status = ocfs2_get_truncate_log_info(osb,
1435*ccd979bdSMark Fasheh 					     osb->slot_num,
1436*ccd979bdSMark Fasheh 					     &tl_inode,
1437*ccd979bdSMark Fasheh 					     &tl_bh);
1438*ccd979bdSMark Fasheh 	if (status < 0)
1439*ccd979bdSMark Fasheh 		mlog_errno(status);
1440*ccd979bdSMark Fasheh 
1441*ccd979bdSMark Fasheh 	/* ocfs2_truncate_log_shutdown keys on the existence of
1442*ccd979bdSMark Fasheh 	 * osb->osb_tl_inode so we don't set any of the osb variables
1443*ccd979bdSMark Fasheh 	 * until we're sure all is well. */
1444*ccd979bdSMark Fasheh 	INIT_WORK(&osb->osb_truncate_log_wq, ocfs2_truncate_log_worker, osb);
1445*ccd979bdSMark Fasheh 	osb->osb_tl_bh    = tl_bh;
1446*ccd979bdSMark Fasheh 	osb->osb_tl_inode = tl_inode;
1447*ccd979bdSMark Fasheh 
1448*ccd979bdSMark Fasheh 	mlog_exit(status);
1449*ccd979bdSMark Fasheh 	return status;
1450*ccd979bdSMark Fasheh }
1451*ccd979bdSMark Fasheh 
1452*ccd979bdSMark Fasheh /* This function will figure out whether the currently last extent
1453*ccd979bdSMark Fasheh  * block will be deleted, and if it will, what the new last extent
1454*ccd979bdSMark Fasheh  * block will be so we can update his h_next_leaf_blk field, as well
1455*ccd979bdSMark Fasheh  * as the dinodes i_last_eb_blk */
1456*ccd979bdSMark Fasheh static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb,
1457*ccd979bdSMark Fasheh 				       struct inode *inode,
1458*ccd979bdSMark Fasheh 				       struct ocfs2_dinode *fe,
1459*ccd979bdSMark Fasheh 				       u32 new_i_clusters,
1460*ccd979bdSMark Fasheh 				       struct buffer_head *old_last_eb,
1461*ccd979bdSMark Fasheh 				       struct buffer_head **new_last_eb)
1462*ccd979bdSMark Fasheh {
1463*ccd979bdSMark Fasheh 	int i, status = 0;
1464*ccd979bdSMark Fasheh 	u64 block = 0;
1465*ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
1466*ccd979bdSMark Fasheh 	struct ocfs2_extent_list *el;
1467*ccd979bdSMark Fasheh 	struct buffer_head *bh = NULL;
1468*ccd979bdSMark Fasheh 
1469*ccd979bdSMark Fasheh 	*new_last_eb = NULL;
1470*ccd979bdSMark Fasheh 
1471*ccd979bdSMark Fasheh 	if (!OCFS2_IS_VALID_DINODE(fe)) {
1472*ccd979bdSMark Fasheh 		OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
1473*ccd979bdSMark Fasheh 		status = -EIO;
1474*ccd979bdSMark Fasheh 		goto bail;
1475*ccd979bdSMark Fasheh 	}
1476*ccd979bdSMark Fasheh 
1477*ccd979bdSMark Fasheh 	/* we have no tree, so of course, no last_eb. */
1478*ccd979bdSMark Fasheh 	if (!fe->id2.i_list.l_tree_depth)
1479*ccd979bdSMark Fasheh 		goto bail;
1480*ccd979bdSMark Fasheh 
1481*ccd979bdSMark Fasheh 	/* trunc to zero special case - this makes tree_depth = 0
1482*ccd979bdSMark Fasheh 	 * regardless of what it is.  */
1483*ccd979bdSMark Fasheh 	if (!new_i_clusters)
1484*ccd979bdSMark Fasheh 		goto bail;
1485*ccd979bdSMark Fasheh 
1486*ccd979bdSMark Fasheh 	eb = (struct ocfs2_extent_block *) old_last_eb->b_data;
1487*ccd979bdSMark Fasheh 	el = &(eb->h_list);
1488*ccd979bdSMark Fasheh 	BUG_ON(!el->l_next_free_rec);
1489*ccd979bdSMark Fasheh 
1490*ccd979bdSMark Fasheh 	/* Make sure that this guy will actually be empty after we
1491*ccd979bdSMark Fasheh 	 * clear away the data. */
1492*ccd979bdSMark Fasheh 	if (le32_to_cpu(el->l_recs[0].e_cpos) < new_i_clusters)
1493*ccd979bdSMark Fasheh 		goto bail;
1494*ccd979bdSMark Fasheh 
1495*ccd979bdSMark Fasheh 	/* Ok, at this point, we know that last_eb will definitely
1496*ccd979bdSMark Fasheh 	 * change, so lets traverse the tree and find the second to
1497*ccd979bdSMark Fasheh 	 * last extent block. */
1498*ccd979bdSMark Fasheh 	el = &(fe->id2.i_list);
1499*ccd979bdSMark Fasheh 	/* go down the tree, */
1500*ccd979bdSMark Fasheh 	do {
1501*ccd979bdSMark Fasheh 		for(i = (le16_to_cpu(el->l_next_free_rec) - 1); i >= 0; i--) {
1502*ccd979bdSMark Fasheh 			if (le32_to_cpu(el->l_recs[i].e_cpos) <
1503*ccd979bdSMark Fasheh 			    new_i_clusters) {
1504*ccd979bdSMark Fasheh 				block = le64_to_cpu(el->l_recs[i].e_blkno);
1505*ccd979bdSMark Fasheh 				break;
1506*ccd979bdSMark Fasheh 			}
1507*ccd979bdSMark Fasheh 		}
1508*ccd979bdSMark Fasheh 		BUG_ON(i < 0);
1509*ccd979bdSMark Fasheh 
1510*ccd979bdSMark Fasheh 		if (bh) {
1511*ccd979bdSMark Fasheh 			brelse(bh);
1512*ccd979bdSMark Fasheh 			bh = NULL;
1513*ccd979bdSMark Fasheh 		}
1514*ccd979bdSMark Fasheh 
1515*ccd979bdSMark Fasheh 		status = ocfs2_read_block(osb, block, &bh, OCFS2_BH_CACHED,
1516*ccd979bdSMark Fasheh 					 inode);
1517*ccd979bdSMark Fasheh 		if (status < 0) {
1518*ccd979bdSMark Fasheh 			mlog_errno(status);
1519*ccd979bdSMark Fasheh 			goto bail;
1520*ccd979bdSMark Fasheh 		}
1521*ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) bh->b_data;
1522*ccd979bdSMark Fasheh 		el = &eb->h_list;
1523*ccd979bdSMark Fasheh 		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
1524*ccd979bdSMark Fasheh 			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
1525*ccd979bdSMark Fasheh 			status = -EIO;
1526*ccd979bdSMark Fasheh 			goto bail;
1527*ccd979bdSMark Fasheh 		}
1528*ccd979bdSMark Fasheh 	} while (el->l_tree_depth);
1529*ccd979bdSMark Fasheh 
1530*ccd979bdSMark Fasheh 	*new_last_eb = bh;
1531*ccd979bdSMark Fasheh 	get_bh(*new_last_eb);
1532*ccd979bdSMark Fasheh 	mlog(0, "returning block %"MLFu64"\n", le64_to_cpu(eb->h_blkno));
1533*ccd979bdSMark Fasheh bail:
1534*ccd979bdSMark Fasheh 	if (bh)
1535*ccd979bdSMark Fasheh 		brelse(bh);
1536*ccd979bdSMark Fasheh 
1537*ccd979bdSMark Fasheh 	return status;
1538*ccd979bdSMark Fasheh }
1539*ccd979bdSMark Fasheh 
1540*ccd979bdSMark Fasheh static int ocfs2_do_truncate(struct ocfs2_super *osb,
1541*ccd979bdSMark Fasheh 			     unsigned int clusters_to_del,
1542*ccd979bdSMark Fasheh 			     struct inode *inode,
1543*ccd979bdSMark Fasheh 			     struct buffer_head *fe_bh,
1544*ccd979bdSMark Fasheh 			     struct buffer_head *old_last_eb_bh,
1545*ccd979bdSMark Fasheh 			     struct ocfs2_journal_handle *handle,
1546*ccd979bdSMark Fasheh 			     struct ocfs2_truncate_context *tc)
1547*ccd979bdSMark Fasheh {
1548*ccd979bdSMark Fasheh 	int status, i, depth;
1549*ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
1550*ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
1551*ccd979bdSMark Fasheh 	struct ocfs2_extent_block *last_eb = NULL;
1552*ccd979bdSMark Fasheh 	struct ocfs2_extent_list *el;
1553*ccd979bdSMark Fasheh 	struct buffer_head *eb_bh = NULL;
1554*ccd979bdSMark Fasheh 	struct buffer_head *last_eb_bh = NULL;
1555*ccd979bdSMark Fasheh 	u64 next_eb = 0;
1556*ccd979bdSMark Fasheh 	u64 delete_blk = 0;
1557*ccd979bdSMark Fasheh 
1558*ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
1559*ccd979bdSMark Fasheh 
1560*ccd979bdSMark Fasheh 	status = ocfs2_find_new_last_ext_blk(osb,
1561*ccd979bdSMark Fasheh 					     inode,
1562*ccd979bdSMark Fasheh 					     fe,
1563*ccd979bdSMark Fasheh 					     le32_to_cpu(fe->i_clusters) -
1564*ccd979bdSMark Fasheh 					     		clusters_to_del,
1565*ccd979bdSMark Fasheh 					     old_last_eb_bh,
1566*ccd979bdSMark Fasheh 					     &last_eb_bh);
1567*ccd979bdSMark Fasheh 	if (status < 0) {
1568*ccd979bdSMark Fasheh 		mlog_errno(status);
1569*ccd979bdSMark Fasheh 		goto bail;
1570*ccd979bdSMark Fasheh 	}
1571*ccd979bdSMark Fasheh 	if (last_eb_bh)
1572*ccd979bdSMark Fasheh 		last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
1573*ccd979bdSMark Fasheh 
1574*ccd979bdSMark Fasheh 	status = ocfs2_journal_access(handle, inode, fe_bh,
1575*ccd979bdSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
1576*ccd979bdSMark Fasheh 	if (status < 0) {
1577*ccd979bdSMark Fasheh 		mlog_errno(status);
1578*ccd979bdSMark Fasheh 		goto bail;
1579*ccd979bdSMark Fasheh 	}
1580*ccd979bdSMark Fasheh 	el = &(fe->id2.i_list);
1581*ccd979bdSMark Fasheh 
1582*ccd979bdSMark Fasheh 	spin_lock(&OCFS2_I(inode)->ip_lock);
1583*ccd979bdSMark Fasheh 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
1584*ccd979bdSMark Fasheh 				      clusters_to_del;
1585*ccd979bdSMark Fasheh 	spin_unlock(&OCFS2_I(inode)->ip_lock);
1586*ccd979bdSMark Fasheh 	le32_add_cpu(&fe->i_clusters, -clusters_to_del);
1587*ccd979bdSMark Fasheh 	fe->i_mtime = cpu_to_le64(CURRENT_TIME.tv_sec);
1588*ccd979bdSMark Fasheh 	fe->i_mtime_nsec = cpu_to_le32(CURRENT_TIME.tv_nsec);
1589*ccd979bdSMark Fasheh 
1590*ccd979bdSMark Fasheh 	i = le16_to_cpu(el->l_next_free_rec) - 1;
1591*ccd979bdSMark Fasheh 
1592*ccd979bdSMark Fasheh 	BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del);
1593*ccd979bdSMark Fasheh 	le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del);
1594*ccd979bdSMark Fasheh 	/* tree depth zero, we can just delete the clusters, otherwise
1595*ccd979bdSMark Fasheh 	 * we need to record the offset of the next level extent block
1596*ccd979bdSMark Fasheh 	 * as we may overwrite it. */
1597*ccd979bdSMark Fasheh 	if (!el->l_tree_depth)
1598*ccd979bdSMark Fasheh 		delete_blk = le64_to_cpu(el->l_recs[i].e_blkno)
1599*ccd979bdSMark Fasheh 			+ ocfs2_clusters_to_blocks(osb->sb,
1600*ccd979bdSMark Fasheh 					le32_to_cpu(el->l_recs[i].e_clusters));
1601*ccd979bdSMark Fasheh 	else
1602*ccd979bdSMark Fasheh 		next_eb = le64_to_cpu(el->l_recs[i].e_blkno);
1603*ccd979bdSMark Fasheh 
1604*ccd979bdSMark Fasheh 	if (!el->l_recs[i].e_clusters) {
1605*ccd979bdSMark Fasheh 		/* if we deleted the whole extent record, then clear
1606*ccd979bdSMark Fasheh 		 * out the other fields and update the extent
1607*ccd979bdSMark Fasheh 		 * list. For depth > 0 trees, we've already recorded
1608*ccd979bdSMark Fasheh 		 * the extent block in 'next_eb' */
1609*ccd979bdSMark Fasheh 		el->l_recs[i].e_cpos = 0;
1610*ccd979bdSMark Fasheh 		el->l_recs[i].e_blkno = 0;
1611*ccd979bdSMark Fasheh 		BUG_ON(!el->l_next_free_rec);
1612*ccd979bdSMark Fasheh 		le16_add_cpu(&el->l_next_free_rec, -1);
1613*ccd979bdSMark Fasheh 	}
1614*ccd979bdSMark Fasheh 
1615*ccd979bdSMark Fasheh 	depth = le16_to_cpu(el->l_tree_depth);
1616*ccd979bdSMark Fasheh 	if (!fe->i_clusters) {
1617*ccd979bdSMark Fasheh 		/* trunc to zero is a special case. */
1618*ccd979bdSMark Fasheh 		el->l_tree_depth = 0;
1619*ccd979bdSMark Fasheh 		fe->i_last_eb_blk = 0;
1620*ccd979bdSMark Fasheh 	} else if (last_eb)
1621*ccd979bdSMark Fasheh 		fe->i_last_eb_blk = last_eb->h_blkno;
1622*ccd979bdSMark Fasheh 
1623*ccd979bdSMark Fasheh 	status = ocfs2_journal_dirty(handle, fe_bh);
1624*ccd979bdSMark Fasheh 	if (status < 0) {
1625*ccd979bdSMark Fasheh 		mlog_errno(status);
1626*ccd979bdSMark Fasheh 		goto bail;
1627*ccd979bdSMark Fasheh 	}
1628*ccd979bdSMark Fasheh 
1629*ccd979bdSMark Fasheh 	if (last_eb) {
1630*ccd979bdSMark Fasheh 		/* If there will be a new last extent block, then by
1631*ccd979bdSMark Fasheh 		 * definition, there cannot be any leaves to the right of
1632*ccd979bdSMark Fasheh 		 * him. */
1633*ccd979bdSMark Fasheh 		status = ocfs2_journal_access(handle, inode, last_eb_bh,
1634*ccd979bdSMark Fasheh 					      OCFS2_JOURNAL_ACCESS_WRITE);
1635*ccd979bdSMark Fasheh 		if (status < 0) {
1636*ccd979bdSMark Fasheh 			mlog_errno(status);
1637*ccd979bdSMark Fasheh 			goto bail;
1638*ccd979bdSMark Fasheh 		}
1639*ccd979bdSMark Fasheh 		last_eb->h_next_leaf_blk = 0;
1640*ccd979bdSMark Fasheh 		status = ocfs2_journal_dirty(handle, last_eb_bh);
1641*ccd979bdSMark Fasheh 		if (status < 0) {
1642*ccd979bdSMark Fasheh 			mlog_errno(status);
1643*ccd979bdSMark Fasheh 			goto bail;
1644*ccd979bdSMark Fasheh 		}
1645*ccd979bdSMark Fasheh 	}
1646*ccd979bdSMark Fasheh 
1647*ccd979bdSMark Fasheh 	/* if our tree depth > 0, update all the tree blocks below us. */
1648*ccd979bdSMark Fasheh 	while (depth) {
1649*ccd979bdSMark Fasheh 		mlog(0, "traveling tree (depth = %d, next_eb = %"MLFu64")\n",
1650*ccd979bdSMark Fasheh 		     depth,  next_eb);
1651*ccd979bdSMark Fasheh 		status = ocfs2_read_block(osb, next_eb, &eb_bh,
1652*ccd979bdSMark Fasheh 					  OCFS2_BH_CACHED, inode);
1653*ccd979bdSMark Fasheh 		if (status < 0) {
1654*ccd979bdSMark Fasheh 			mlog_errno(status);
1655*ccd979bdSMark Fasheh 			goto bail;
1656*ccd979bdSMark Fasheh 		}
1657*ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *)eb_bh->b_data;
1658*ccd979bdSMark Fasheh 		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
1659*ccd979bdSMark Fasheh 			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
1660*ccd979bdSMark Fasheh 			status = -EIO;
1661*ccd979bdSMark Fasheh 			goto bail;
1662*ccd979bdSMark Fasheh 		}
1663*ccd979bdSMark Fasheh 		el = &(eb->h_list);
1664*ccd979bdSMark Fasheh 
1665*ccd979bdSMark Fasheh 		status = ocfs2_journal_access(handle, inode, eb_bh,
1666*ccd979bdSMark Fasheh 					      OCFS2_JOURNAL_ACCESS_WRITE);
1667*ccd979bdSMark Fasheh 		if (status < 0) {
1668*ccd979bdSMark Fasheh 			mlog_errno(status);
1669*ccd979bdSMark Fasheh 			goto bail;
1670*ccd979bdSMark Fasheh 		}
1671*ccd979bdSMark Fasheh 
1672*ccd979bdSMark Fasheh 		BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
1673*ccd979bdSMark Fasheh 		BUG_ON(depth != (le16_to_cpu(el->l_tree_depth) + 1));
1674*ccd979bdSMark Fasheh 
1675*ccd979bdSMark Fasheh 		i = le16_to_cpu(el->l_next_free_rec) - 1;
1676*ccd979bdSMark Fasheh 
1677*ccd979bdSMark Fasheh 		mlog(0, "extent block %"MLFu64", before: record %d: "
1678*ccd979bdSMark Fasheh 		     "(%u, %u, %"MLFu64"), next = %u\n",
1679*ccd979bdSMark Fasheh 		     le64_to_cpu(eb->h_blkno), i,
1680*ccd979bdSMark Fasheh 		     le32_to_cpu(el->l_recs[i].e_cpos),
1681*ccd979bdSMark Fasheh 		     le32_to_cpu(el->l_recs[i].e_clusters),
1682*ccd979bdSMark Fasheh 		     le64_to_cpu(el->l_recs[i].e_blkno),
1683*ccd979bdSMark Fasheh 		     le16_to_cpu(el->l_next_free_rec));
1684*ccd979bdSMark Fasheh 
1685*ccd979bdSMark Fasheh 		BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del);
1686*ccd979bdSMark Fasheh 		le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del);
1687*ccd979bdSMark Fasheh 
1688*ccd979bdSMark Fasheh 		next_eb = le64_to_cpu(el->l_recs[i].e_blkno);
1689*ccd979bdSMark Fasheh 		/* bottom-most block requires us to delete data.*/
1690*ccd979bdSMark Fasheh 		if (!el->l_tree_depth)
1691*ccd979bdSMark Fasheh 			delete_blk = le64_to_cpu(el->l_recs[i].e_blkno)
1692*ccd979bdSMark Fasheh 				+ ocfs2_clusters_to_blocks(osb->sb,
1693*ccd979bdSMark Fasheh 					le32_to_cpu(el->l_recs[i].e_clusters));
1694*ccd979bdSMark Fasheh 		if (!el->l_recs[i].e_clusters) {
1695*ccd979bdSMark Fasheh 			el->l_recs[i].e_cpos = 0;
1696*ccd979bdSMark Fasheh 			el->l_recs[i].e_blkno = 0;
1697*ccd979bdSMark Fasheh 			BUG_ON(!el->l_next_free_rec);
1698*ccd979bdSMark Fasheh 			le16_add_cpu(&el->l_next_free_rec, -1);
1699*ccd979bdSMark Fasheh 		}
1700*ccd979bdSMark Fasheh 		mlog(0, "extent block %"MLFu64", after: record %d: "
1701*ccd979bdSMark Fasheh 		     "(%u, %u, %"MLFu64"), next = %u\n",
1702*ccd979bdSMark Fasheh 		     le64_to_cpu(eb->h_blkno), i,
1703*ccd979bdSMark Fasheh 		     le32_to_cpu(el->l_recs[i].e_cpos),
1704*ccd979bdSMark Fasheh 		     le32_to_cpu(el->l_recs[i].e_clusters),
1705*ccd979bdSMark Fasheh 		     le64_to_cpu(el->l_recs[i].e_blkno),
1706*ccd979bdSMark Fasheh 		     le16_to_cpu(el->l_next_free_rec));
1707*ccd979bdSMark Fasheh 
1708*ccd979bdSMark Fasheh 		status = ocfs2_journal_dirty(handle, eb_bh);
1709*ccd979bdSMark Fasheh 		if (status < 0) {
1710*ccd979bdSMark Fasheh 			mlog_errno(status);
1711*ccd979bdSMark Fasheh 			goto bail;
1712*ccd979bdSMark Fasheh 		}
1713*ccd979bdSMark Fasheh 
1714*ccd979bdSMark Fasheh 		if (!el->l_next_free_rec) {
1715*ccd979bdSMark Fasheh 			mlog(0, "deleting this extent block.\n");
1716*ccd979bdSMark Fasheh 
1717*ccd979bdSMark Fasheh 			ocfs2_remove_from_cache(inode, eb_bh);
1718*ccd979bdSMark Fasheh 
1719*ccd979bdSMark Fasheh 			BUG_ON(eb->h_suballoc_slot);
1720*ccd979bdSMark Fasheh 			BUG_ON(el->l_recs[0].e_clusters);
1721*ccd979bdSMark Fasheh 			BUG_ON(el->l_recs[0].e_cpos);
1722*ccd979bdSMark Fasheh 			BUG_ON(el->l_recs[0].e_blkno);
1723*ccd979bdSMark Fasheh 			status = ocfs2_free_extent_block(handle,
1724*ccd979bdSMark Fasheh 							 tc->tc_ext_alloc_inode,
1725*ccd979bdSMark Fasheh 							 tc->tc_ext_alloc_bh,
1726*ccd979bdSMark Fasheh 							 eb);
1727*ccd979bdSMark Fasheh 			if (status < 0) {
1728*ccd979bdSMark Fasheh 				mlog_errno(status);
1729*ccd979bdSMark Fasheh 				goto bail;
1730*ccd979bdSMark Fasheh 			}
1731*ccd979bdSMark Fasheh 		}
1732*ccd979bdSMark Fasheh 		brelse(eb_bh);
1733*ccd979bdSMark Fasheh 		eb_bh = NULL;
1734*ccd979bdSMark Fasheh 		depth--;
1735*ccd979bdSMark Fasheh 	}
1736*ccd979bdSMark Fasheh 
1737*ccd979bdSMark Fasheh 	BUG_ON(!delete_blk);
1738*ccd979bdSMark Fasheh 	status = ocfs2_truncate_log_append(osb, handle, delete_blk,
1739*ccd979bdSMark Fasheh 					   clusters_to_del);
1740*ccd979bdSMark Fasheh 	if (status < 0) {
1741*ccd979bdSMark Fasheh 		mlog_errno(status);
1742*ccd979bdSMark Fasheh 		goto bail;
1743*ccd979bdSMark Fasheh 	}
1744*ccd979bdSMark Fasheh 	status = 0;
1745*ccd979bdSMark Fasheh bail:
1746*ccd979bdSMark Fasheh 	if (!status)
1747*ccd979bdSMark Fasheh 		ocfs2_extent_map_trunc(inode, le32_to_cpu(fe->i_clusters));
1748*ccd979bdSMark Fasheh 	else
1749*ccd979bdSMark Fasheh 		ocfs2_extent_map_drop(inode, 0);
1750*ccd979bdSMark Fasheh 	mlog_exit(status);
1751*ccd979bdSMark Fasheh 	return status;
1752*ccd979bdSMark Fasheh }
1753*ccd979bdSMark Fasheh 
1754*ccd979bdSMark Fasheh /*
1755*ccd979bdSMark Fasheh  * It is expected, that by the time you call this function,
1756*ccd979bdSMark Fasheh  * inode->i_size and fe->i_size have been adjusted.
1757*ccd979bdSMark Fasheh  *
1758*ccd979bdSMark Fasheh  * WARNING: This will kfree the truncate context
1759*ccd979bdSMark Fasheh  */
1760*ccd979bdSMark Fasheh int ocfs2_commit_truncate(struct ocfs2_super *osb,
1761*ccd979bdSMark Fasheh 			  struct inode *inode,
1762*ccd979bdSMark Fasheh 			  struct buffer_head *fe_bh,
1763*ccd979bdSMark Fasheh 			  struct ocfs2_truncate_context *tc)
1764*ccd979bdSMark Fasheh {
1765*ccd979bdSMark Fasheh 	int status, i, credits, tl_sem = 0;
1766*ccd979bdSMark Fasheh 	u32 clusters_to_del, target_i_clusters;
1767*ccd979bdSMark Fasheh 	u64 last_eb = 0;
1768*ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
1769*ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
1770*ccd979bdSMark Fasheh 	struct ocfs2_extent_list *el;
1771*ccd979bdSMark Fasheh 	struct buffer_head *last_eb_bh;
1772*ccd979bdSMark Fasheh 	struct ocfs2_journal_handle *handle = NULL;
1773*ccd979bdSMark Fasheh 	struct inode *tl_inode = osb->osb_tl_inode;
1774*ccd979bdSMark Fasheh 
1775*ccd979bdSMark Fasheh 	mlog_entry_void();
1776*ccd979bdSMark Fasheh 
1777*ccd979bdSMark Fasheh 	down_write(&OCFS2_I(inode)->ip_alloc_sem);
1778*ccd979bdSMark Fasheh 
1779*ccd979bdSMark Fasheh 	target_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
1780*ccd979bdSMark Fasheh 						     i_size_read(inode));
1781*ccd979bdSMark Fasheh 
1782*ccd979bdSMark Fasheh 	last_eb_bh = tc->tc_last_eb_bh;
1783*ccd979bdSMark Fasheh 	tc->tc_last_eb_bh = NULL;
1784*ccd979bdSMark Fasheh 
1785*ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
1786*ccd979bdSMark Fasheh 
1787*ccd979bdSMark Fasheh 	if (fe->id2.i_list.l_tree_depth) {
1788*ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
1789*ccd979bdSMark Fasheh 		el = &eb->h_list;
1790*ccd979bdSMark Fasheh 	} else
1791*ccd979bdSMark Fasheh 		el = &fe->id2.i_list;
1792*ccd979bdSMark Fasheh 	last_eb = le64_to_cpu(fe->i_last_eb_blk);
1793*ccd979bdSMark Fasheh start:
1794*ccd979bdSMark Fasheh 	mlog(0, "ocfs2_commit_truncate: fe->i_clusters = %u, "
1795*ccd979bdSMark Fasheh 	     "last_eb = %"MLFu64", fe->i_last_eb_blk = %"MLFu64", "
1796*ccd979bdSMark Fasheh 	     "fe->id2.i_list.l_tree_depth = %u last_eb_bh = %p\n",
1797*ccd979bdSMark Fasheh 	     le32_to_cpu(fe->i_clusters), last_eb,
1798*ccd979bdSMark Fasheh 	     le64_to_cpu(fe->i_last_eb_blk),
1799*ccd979bdSMark Fasheh 	     le16_to_cpu(fe->id2.i_list.l_tree_depth), last_eb_bh);
1800*ccd979bdSMark Fasheh 
1801*ccd979bdSMark Fasheh 	if (last_eb != le64_to_cpu(fe->i_last_eb_blk)) {
1802*ccd979bdSMark Fasheh 		mlog(0, "last_eb changed!\n");
1803*ccd979bdSMark Fasheh 		BUG_ON(!fe->id2.i_list.l_tree_depth);
1804*ccd979bdSMark Fasheh 		last_eb = le64_to_cpu(fe->i_last_eb_blk);
1805*ccd979bdSMark Fasheh 		/* i_last_eb_blk may have changed, read it if
1806*ccd979bdSMark Fasheh 		 * necessary. We don't have to worry about the
1807*ccd979bdSMark Fasheh 		 * truncate to zero case here (where there becomes no
1808*ccd979bdSMark Fasheh 		 * last_eb) because we never loop back after our work
1809*ccd979bdSMark Fasheh 		 * is done. */
1810*ccd979bdSMark Fasheh 		if (last_eb_bh) {
1811*ccd979bdSMark Fasheh 			brelse(last_eb_bh);
1812*ccd979bdSMark Fasheh 			last_eb_bh = NULL;
1813*ccd979bdSMark Fasheh 		}
1814*ccd979bdSMark Fasheh 
1815*ccd979bdSMark Fasheh 		status = ocfs2_read_block(osb, last_eb,
1816*ccd979bdSMark Fasheh 					  &last_eb_bh, OCFS2_BH_CACHED,
1817*ccd979bdSMark Fasheh 					  inode);
1818*ccd979bdSMark Fasheh 		if (status < 0) {
1819*ccd979bdSMark Fasheh 			mlog_errno(status);
1820*ccd979bdSMark Fasheh 			goto bail;
1821*ccd979bdSMark Fasheh 		}
1822*ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
1823*ccd979bdSMark Fasheh 		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
1824*ccd979bdSMark Fasheh 			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
1825*ccd979bdSMark Fasheh 			status = -EIO;
1826*ccd979bdSMark Fasheh 			goto bail;
1827*ccd979bdSMark Fasheh 		}
1828*ccd979bdSMark Fasheh 		el = &(eb->h_list);
1829*ccd979bdSMark Fasheh 	}
1830*ccd979bdSMark Fasheh 
1831*ccd979bdSMark Fasheh 	/* by now, el will point to the extent list on the bottom most
1832*ccd979bdSMark Fasheh 	 * portion of this tree. */
1833*ccd979bdSMark Fasheh 	i = le16_to_cpu(el->l_next_free_rec) - 1;
1834*ccd979bdSMark Fasheh 	if (le32_to_cpu(el->l_recs[i].e_cpos) >= target_i_clusters)
1835*ccd979bdSMark Fasheh 		clusters_to_del = le32_to_cpu(el->l_recs[i].e_clusters);
1836*ccd979bdSMark Fasheh 	else
1837*ccd979bdSMark Fasheh 		clusters_to_del = (le32_to_cpu(el->l_recs[i].e_clusters) +
1838*ccd979bdSMark Fasheh 				   le32_to_cpu(el->l_recs[i].e_cpos)) -
1839*ccd979bdSMark Fasheh 				  target_i_clusters;
1840*ccd979bdSMark Fasheh 
1841*ccd979bdSMark Fasheh 	mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del);
1842*ccd979bdSMark Fasheh 
1843*ccd979bdSMark Fasheh 	down(&tl_inode->i_sem);
1844*ccd979bdSMark Fasheh 	tl_sem = 1;
1845*ccd979bdSMark Fasheh 	/* ocfs2_truncate_log_needs_flush guarantees us at least one
1846*ccd979bdSMark Fasheh 	 * record is free for use. If there isn't any, we flush to get
1847*ccd979bdSMark Fasheh 	 * an empty truncate log.  */
1848*ccd979bdSMark Fasheh 	if (ocfs2_truncate_log_needs_flush(osb)) {
1849*ccd979bdSMark Fasheh 		status = __ocfs2_flush_truncate_log(osb);
1850*ccd979bdSMark Fasheh 		if (status < 0) {
1851*ccd979bdSMark Fasheh 			mlog_errno(status);
1852*ccd979bdSMark Fasheh 			goto bail;
1853*ccd979bdSMark Fasheh 		}
1854*ccd979bdSMark Fasheh 	}
1855*ccd979bdSMark Fasheh 
1856*ccd979bdSMark Fasheh 	credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
1857*ccd979bdSMark Fasheh 						fe, el);
1858*ccd979bdSMark Fasheh 	handle = ocfs2_start_trans(osb, NULL, credits);
1859*ccd979bdSMark Fasheh 	if (IS_ERR(handle)) {
1860*ccd979bdSMark Fasheh 		status = PTR_ERR(handle);
1861*ccd979bdSMark Fasheh 		handle = NULL;
1862*ccd979bdSMark Fasheh 		mlog_errno(status);
1863*ccd979bdSMark Fasheh 		goto bail;
1864*ccd979bdSMark Fasheh 	}
1865*ccd979bdSMark Fasheh 
1866*ccd979bdSMark Fasheh 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
1867*ccd979bdSMark Fasheh 	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
1868*ccd979bdSMark Fasheh 	if (status < 0)
1869*ccd979bdSMark Fasheh 		mlog_errno(status);
1870*ccd979bdSMark Fasheh 
1871*ccd979bdSMark Fasheh 	status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh,
1872*ccd979bdSMark Fasheh 				   last_eb_bh, handle, tc);
1873*ccd979bdSMark Fasheh 	if (status < 0) {
1874*ccd979bdSMark Fasheh 		mlog_errno(status);
1875*ccd979bdSMark Fasheh 		goto bail;
1876*ccd979bdSMark Fasheh 	}
1877*ccd979bdSMark Fasheh 
1878*ccd979bdSMark Fasheh 	up(&tl_inode->i_sem);
1879*ccd979bdSMark Fasheh 	tl_sem = 0;
1880*ccd979bdSMark Fasheh 
1881*ccd979bdSMark Fasheh 	ocfs2_commit_trans(handle);
1882*ccd979bdSMark Fasheh 	handle = NULL;
1883*ccd979bdSMark Fasheh 
1884*ccd979bdSMark Fasheh 	BUG_ON(le32_to_cpu(fe->i_clusters) < target_i_clusters);
1885*ccd979bdSMark Fasheh 	if (le32_to_cpu(fe->i_clusters) > target_i_clusters)
1886*ccd979bdSMark Fasheh 		goto start;
1887*ccd979bdSMark Fasheh bail:
1888*ccd979bdSMark Fasheh 	up_write(&OCFS2_I(inode)->ip_alloc_sem);
1889*ccd979bdSMark Fasheh 
1890*ccd979bdSMark Fasheh 	ocfs2_schedule_truncate_log_flush(osb, 1);
1891*ccd979bdSMark Fasheh 
1892*ccd979bdSMark Fasheh 	if (tl_sem)
1893*ccd979bdSMark Fasheh 		up(&tl_inode->i_sem);
1894*ccd979bdSMark Fasheh 
1895*ccd979bdSMark Fasheh 	if (handle)
1896*ccd979bdSMark Fasheh 		ocfs2_commit_trans(handle);
1897*ccd979bdSMark Fasheh 
1898*ccd979bdSMark Fasheh 	if (last_eb_bh)
1899*ccd979bdSMark Fasheh 		brelse(last_eb_bh);
1900*ccd979bdSMark Fasheh 
1901*ccd979bdSMark Fasheh 	/* This will drop the ext_alloc cluster lock for us */
1902*ccd979bdSMark Fasheh 	ocfs2_free_truncate_context(tc);
1903*ccd979bdSMark Fasheh 
1904*ccd979bdSMark Fasheh 	mlog_exit(status);
1905*ccd979bdSMark Fasheh 	return status;
1906*ccd979bdSMark Fasheh }
1907*ccd979bdSMark Fasheh 
1908*ccd979bdSMark Fasheh 
1909*ccd979bdSMark Fasheh /*
1910*ccd979bdSMark Fasheh  * Expects the inode to already be locked. This will figure out which
1911*ccd979bdSMark Fasheh  * inodes need to be locked and will put them on the returned truncate
1912*ccd979bdSMark Fasheh  * context.
1913*ccd979bdSMark Fasheh  */
1914*ccd979bdSMark Fasheh int ocfs2_prepare_truncate(struct ocfs2_super *osb,
1915*ccd979bdSMark Fasheh 			   struct inode *inode,
1916*ccd979bdSMark Fasheh 			   struct buffer_head *fe_bh,
1917*ccd979bdSMark Fasheh 			   struct ocfs2_truncate_context **tc)
1918*ccd979bdSMark Fasheh {
1919*ccd979bdSMark Fasheh 	int status, metadata_delete;
1920*ccd979bdSMark Fasheh 	unsigned int new_i_clusters;
1921*ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
1922*ccd979bdSMark Fasheh 	struct ocfs2_extent_block *eb;
1923*ccd979bdSMark Fasheh 	struct ocfs2_extent_list *el;
1924*ccd979bdSMark Fasheh 	struct buffer_head *last_eb_bh = NULL;
1925*ccd979bdSMark Fasheh 	struct inode *ext_alloc_inode = NULL;
1926*ccd979bdSMark Fasheh 	struct buffer_head *ext_alloc_bh = NULL;
1927*ccd979bdSMark Fasheh 
1928*ccd979bdSMark Fasheh 	mlog_entry_void();
1929*ccd979bdSMark Fasheh 
1930*ccd979bdSMark Fasheh 	*tc = NULL;
1931*ccd979bdSMark Fasheh 
1932*ccd979bdSMark Fasheh 	new_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
1933*ccd979bdSMark Fasheh 						  i_size_read(inode));
1934*ccd979bdSMark Fasheh 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
1935*ccd979bdSMark Fasheh 
1936*ccd979bdSMark Fasheh 	mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
1937*ccd979bdSMark Fasheh 	     "%"MLFu64"\n", fe->i_clusters, new_i_clusters, fe->i_size);
1938*ccd979bdSMark Fasheh 
1939*ccd979bdSMark Fasheh 	if (le32_to_cpu(fe->i_clusters) <= new_i_clusters) {
1940*ccd979bdSMark Fasheh 		ocfs2_error(inode->i_sb, "Dinode %"MLFu64" has cluster count "
1941*ccd979bdSMark Fasheh 			    "%u and size %"MLFu64" whereas struct inode has "
1942*ccd979bdSMark Fasheh 			    "cluster count %u and size %llu which caused an "
1943*ccd979bdSMark Fasheh 			    "invalid truncate to %u clusters.",
1944*ccd979bdSMark Fasheh 			    le64_to_cpu(fe->i_blkno),
1945*ccd979bdSMark Fasheh 			    le32_to_cpu(fe->i_clusters),
1946*ccd979bdSMark Fasheh 			    le64_to_cpu(fe->i_size),
1947*ccd979bdSMark Fasheh 			    OCFS2_I(inode)->ip_clusters, i_size_read(inode),
1948*ccd979bdSMark Fasheh 			    new_i_clusters);
1949*ccd979bdSMark Fasheh 		mlog_meta_lvb(ML_ERROR, &OCFS2_I(inode)->ip_meta_lockres);
1950*ccd979bdSMark Fasheh 		status = -EIO;
1951*ccd979bdSMark Fasheh 		goto bail;
1952*ccd979bdSMark Fasheh 	}
1953*ccd979bdSMark Fasheh 
1954*ccd979bdSMark Fasheh 	*tc = kcalloc(1, sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
1955*ccd979bdSMark Fasheh 	if (!(*tc)) {
1956*ccd979bdSMark Fasheh 		status = -ENOMEM;
1957*ccd979bdSMark Fasheh 		mlog_errno(status);
1958*ccd979bdSMark Fasheh 		goto bail;
1959*ccd979bdSMark Fasheh 	}
1960*ccd979bdSMark Fasheh 
1961*ccd979bdSMark Fasheh 	metadata_delete = 0;
1962*ccd979bdSMark Fasheh 	if (fe->id2.i_list.l_tree_depth) {
1963*ccd979bdSMark Fasheh 		/* If we have a tree, then the truncate may result in
1964*ccd979bdSMark Fasheh 		 * metadata deletes. Figure this out from the
1965*ccd979bdSMark Fasheh 		 * rightmost leaf block.*/
1966*ccd979bdSMark Fasheh 		status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
1967*ccd979bdSMark Fasheh 					  &last_eb_bh, OCFS2_BH_CACHED, inode);
1968*ccd979bdSMark Fasheh 		if (status < 0) {
1969*ccd979bdSMark Fasheh 			mlog_errno(status);
1970*ccd979bdSMark Fasheh 			goto bail;
1971*ccd979bdSMark Fasheh 		}
1972*ccd979bdSMark Fasheh 		eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
1973*ccd979bdSMark Fasheh 		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
1974*ccd979bdSMark Fasheh 			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
1975*ccd979bdSMark Fasheh 
1976*ccd979bdSMark Fasheh 			brelse(last_eb_bh);
1977*ccd979bdSMark Fasheh 			status = -EIO;
1978*ccd979bdSMark Fasheh 			goto bail;
1979*ccd979bdSMark Fasheh 		}
1980*ccd979bdSMark Fasheh 		el = &(eb->h_list);
1981*ccd979bdSMark Fasheh 		if (le32_to_cpu(el->l_recs[0].e_cpos) >= new_i_clusters)
1982*ccd979bdSMark Fasheh 			metadata_delete = 1;
1983*ccd979bdSMark Fasheh 	}
1984*ccd979bdSMark Fasheh 
1985*ccd979bdSMark Fasheh 	(*tc)->tc_last_eb_bh = last_eb_bh;
1986*ccd979bdSMark Fasheh 
1987*ccd979bdSMark Fasheh 	if (metadata_delete) {
1988*ccd979bdSMark Fasheh 		mlog(0, "Will have to delete metadata for this trunc. "
1989*ccd979bdSMark Fasheh 		     "locking allocator.\n");
1990*ccd979bdSMark Fasheh 		ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0);
1991*ccd979bdSMark Fasheh 		if (!ext_alloc_inode) {
1992*ccd979bdSMark Fasheh 			status = -ENOMEM;
1993*ccd979bdSMark Fasheh 			mlog_errno(status);
1994*ccd979bdSMark Fasheh 			goto bail;
1995*ccd979bdSMark Fasheh 		}
1996*ccd979bdSMark Fasheh 
1997*ccd979bdSMark Fasheh 		down(&ext_alloc_inode->i_sem);
1998*ccd979bdSMark Fasheh 		(*tc)->tc_ext_alloc_inode = ext_alloc_inode;
1999*ccd979bdSMark Fasheh 
2000*ccd979bdSMark Fasheh 		status = ocfs2_meta_lock(ext_alloc_inode,
2001*ccd979bdSMark Fasheh 					 NULL,
2002*ccd979bdSMark Fasheh 					 &ext_alloc_bh,
2003*ccd979bdSMark Fasheh 					 1);
2004*ccd979bdSMark Fasheh 		if (status < 0) {
2005*ccd979bdSMark Fasheh 			mlog_errno(status);
2006*ccd979bdSMark Fasheh 			goto bail;
2007*ccd979bdSMark Fasheh 		}
2008*ccd979bdSMark Fasheh 		(*tc)->tc_ext_alloc_bh = ext_alloc_bh;
2009*ccd979bdSMark Fasheh 		(*tc)->tc_ext_alloc_locked = 1;
2010*ccd979bdSMark Fasheh 	}
2011*ccd979bdSMark Fasheh 
2012*ccd979bdSMark Fasheh 	status = 0;
2013*ccd979bdSMark Fasheh bail:
2014*ccd979bdSMark Fasheh 	if (status < 0) {
2015*ccd979bdSMark Fasheh 		if (*tc)
2016*ccd979bdSMark Fasheh 			ocfs2_free_truncate_context(*tc);
2017*ccd979bdSMark Fasheh 		*tc = NULL;
2018*ccd979bdSMark Fasheh 	}
2019*ccd979bdSMark Fasheh 	mlog_exit_void();
2020*ccd979bdSMark Fasheh 	return status;
2021*ccd979bdSMark Fasheh }
2022*ccd979bdSMark Fasheh 
2023*ccd979bdSMark Fasheh static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
2024*ccd979bdSMark Fasheh {
2025*ccd979bdSMark Fasheh 	if (tc->tc_ext_alloc_inode) {
2026*ccd979bdSMark Fasheh 		if (tc->tc_ext_alloc_locked)
2027*ccd979bdSMark Fasheh 			ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1);
2028*ccd979bdSMark Fasheh 
2029*ccd979bdSMark Fasheh 		up(&tc->tc_ext_alloc_inode->i_sem);
2030*ccd979bdSMark Fasheh 		iput(tc->tc_ext_alloc_inode);
2031*ccd979bdSMark Fasheh 	}
2032*ccd979bdSMark Fasheh 
2033*ccd979bdSMark Fasheh 	if (tc->tc_ext_alloc_bh)
2034*ccd979bdSMark Fasheh 		brelse(tc->tc_ext_alloc_bh);
2035*ccd979bdSMark Fasheh 
2036*ccd979bdSMark Fasheh 	if (tc->tc_last_eb_bh)
2037*ccd979bdSMark Fasheh 		brelse(tc->tc_last_eb_bh);
2038*ccd979bdSMark Fasheh 
2039*ccd979bdSMark Fasheh 	kfree(tc);
2040*ccd979bdSMark Fasheh }
2041