xref: /openbmc/linux/fs/ext4/extents.c (revision 4d33b1ef10995d7ba6191d67456202c697a92a32)
1a86c6181SAlex Tomas /*
2a86c6181SAlex Tomas  * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
3a86c6181SAlex Tomas  * Written by Alex Tomas <alex@clusterfs.com>
4a86c6181SAlex Tomas  *
5a86c6181SAlex Tomas  * Architecture independence:
6a86c6181SAlex Tomas  *   Copyright (c) 2005, Bull S.A.
7a86c6181SAlex Tomas  *   Written by Pierre Peiffer <pierre.peiffer@bull.net>
8a86c6181SAlex Tomas  *
9a86c6181SAlex Tomas  * This program is free software; you can redistribute it and/or modify
10a86c6181SAlex Tomas  * it under the terms of the GNU General Public License version 2 as
11a86c6181SAlex Tomas  * published by the Free Software Foundation.
12a86c6181SAlex Tomas  *
13a86c6181SAlex Tomas  * This program is distributed in the hope that it will be useful,
14a86c6181SAlex Tomas  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15a86c6181SAlex Tomas  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16a86c6181SAlex Tomas  * GNU General Public License for more details.
17a86c6181SAlex Tomas  *
18a86c6181SAlex Tomas  * You should have received a copy of the GNU General Public Licens
19a86c6181SAlex Tomas  * along with this program; if not, write to the Free Software
20a86c6181SAlex Tomas  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
21a86c6181SAlex Tomas  */
22a86c6181SAlex Tomas 
23a86c6181SAlex Tomas /*
24a86c6181SAlex Tomas  * Extents support for EXT4
25a86c6181SAlex Tomas  *
26a86c6181SAlex Tomas  * TODO:
27a86c6181SAlex Tomas  *   - ext4*_error() should be used in some situations
28a86c6181SAlex Tomas  *   - analyze all BUG()/BUG_ON(), use -EIO where appropriate
29a86c6181SAlex Tomas  *   - smart tree reduction
30a86c6181SAlex Tomas  */
31a86c6181SAlex Tomas 
32a86c6181SAlex Tomas #include <linux/module.h>
33a86c6181SAlex Tomas #include <linux/fs.h>
34a86c6181SAlex Tomas #include <linux/time.h>
35cd02ff0bSMingming Cao #include <linux/jbd2.h>
36a86c6181SAlex Tomas #include <linux/highuid.h>
37a86c6181SAlex Tomas #include <linux/pagemap.h>
38a86c6181SAlex Tomas #include <linux/quotaops.h>
39a86c6181SAlex Tomas #include <linux/string.h>
40a86c6181SAlex Tomas #include <linux/slab.h>
41a2df2a63SAmit Arora #include <linux/falloc.h>
42a86c6181SAlex Tomas #include <asm/uaccess.h>
436873fa0dSEric Sandeen #include <linux/fiemap.h>
443dcf5451SChristoph Hellwig #include "ext4_jbd2.h"
453dcf5451SChristoph Hellwig #include "ext4_extents.h"
46a86c6181SAlex Tomas 
470562e0baSJiaying Zhang #include <trace/events/ext4.h>
480562e0baSJiaying Zhang 
49d583fb87SAllison Henderson static int ext4_split_extent(handle_t *handle,
50d583fb87SAllison Henderson 				struct inode *inode,
51d583fb87SAllison Henderson 				struct ext4_ext_path *path,
52d583fb87SAllison Henderson 				struct ext4_map_blocks *map,
53d583fb87SAllison Henderson 				int split_flag,
54d583fb87SAllison Henderson 				int flags);
55d583fb87SAllison Henderson 
56487caeefSJan Kara static int ext4_ext_truncate_extend_restart(handle_t *handle,
57487caeefSJan Kara 					    struct inode *inode,
58487caeefSJan Kara 					    int needed)
59a86c6181SAlex Tomas {
60a86c6181SAlex Tomas 	int err;
61a86c6181SAlex Tomas 
620390131bSFrank Mayhar 	if (!ext4_handle_valid(handle))
630390131bSFrank Mayhar 		return 0;
64a86c6181SAlex Tomas 	if (handle->h_buffer_credits > needed)
659102e4faSShen Feng 		return 0;
669102e4faSShen Feng 	err = ext4_journal_extend(handle, needed);
670123c939STheodore Ts'o 	if (err <= 0)
689102e4faSShen Feng 		return err;
69487caeefSJan Kara 	err = ext4_truncate_restart_trans(handle, inode, needed);
700617b83fSDmitry Monakhov 	if (err == 0)
710617b83fSDmitry Monakhov 		err = -EAGAIN;
72487caeefSJan Kara 
73487caeefSJan Kara 	return err;
74a86c6181SAlex Tomas }
75a86c6181SAlex Tomas 
76a86c6181SAlex Tomas /*
77a86c6181SAlex Tomas  * could return:
78a86c6181SAlex Tomas  *  - EROFS
79a86c6181SAlex Tomas  *  - ENOMEM
80a86c6181SAlex Tomas  */
81a86c6181SAlex Tomas static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
82a86c6181SAlex Tomas 				struct ext4_ext_path *path)
83a86c6181SAlex Tomas {
84a86c6181SAlex Tomas 	if (path->p_bh) {
85a86c6181SAlex Tomas 		/* path points to block */
86a86c6181SAlex Tomas 		return ext4_journal_get_write_access(handle, path->p_bh);
87a86c6181SAlex Tomas 	}
88a86c6181SAlex Tomas 	/* path points to leaf/index in inode body */
89a86c6181SAlex Tomas 	/* we use in-core data, no need to protect them */
90a86c6181SAlex Tomas 	return 0;
91a86c6181SAlex Tomas }
92a86c6181SAlex Tomas 
93a86c6181SAlex Tomas /*
94a86c6181SAlex Tomas  * could return:
95a86c6181SAlex Tomas  *  - EROFS
96a86c6181SAlex Tomas  *  - ENOMEM
97a86c6181SAlex Tomas  *  - EIO
98a86c6181SAlex Tomas  */
999ea7a0dfSTheodore Ts'o #define ext4_ext_dirty(handle, inode, path) \
1009ea7a0dfSTheodore Ts'o 		__ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
1019ea7a0dfSTheodore Ts'o static int __ext4_ext_dirty(const char *where, unsigned int line,
1029ea7a0dfSTheodore Ts'o 			    handle_t *handle, struct inode *inode,
103a86c6181SAlex Tomas 			    struct ext4_ext_path *path)
104a86c6181SAlex Tomas {
105a86c6181SAlex Tomas 	int err;
106a86c6181SAlex Tomas 	if (path->p_bh) {
107a86c6181SAlex Tomas 		/* path points to block */
1089ea7a0dfSTheodore Ts'o 		err = __ext4_handle_dirty_metadata(where, line, handle,
1099ea7a0dfSTheodore Ts'o 						   inode, path->p_bh);
110a86c6181SAlex Tomas 	} else {
111a86c6181SAlex Tomas 		/* path points to leaf/index in inode body */
112a86c6181SAlex Tomas 		err = ext4_mark_inode_dirty(handle, inode);
113a86c6181SAlex Tomas 	}
114a86c6181SAlex Tomas 	return err;
115a86c6181SAlex Tomas }
116a86c6181SAlex Tomas 
117f65e6fbaSAlex Tomas static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
118a86c6181SAlex Tomas 			      struct ext4_ext_path *path,
119725d26d3SAneesh Kumar K.V 			      ext4_lblk_t block)
120a86c6181SAlex Tomas {
121a86c6181SAlex Tomas 	int depth;
122a86c6181SAlex Tomas 
123a86c6181SAlex Tomas 	if (path) {
124a86c6181SAlex Tomas 		struct ext4_extent *ex;
125a86c6181SAlex Tomas 		depth = path->p_depth;
126a86c6181SAlex Tomas 
127ad4fb9caSKazuya Mio 		/*
128ad4fb9caSKazuya Mio 		 * Try to predict block placement assuming that we are
129ad4fb9caSKazuya Mio 		 * filling in a file which will eventually be
130ad4fb9caSKazuya Mio 		 * non-sparse --- i.e., in the case of libbfd writing
131ad4fb9caSKazuya Mio 		 * an ELF object sections out-of-order but in a way
132ad4fb9caSKazuya Mio 		 * the eventually results in a contiguous object or
133ad4fb9caSKazuya Mio 		 * executable file, or some database extending a table
134ad4fb9caSKazuya Mio 		 * space file.  However, this is actually somewhat
135ad4fb9caSKazuya Mio 		 * non-ideal if we are writing a sparse file such as
136ad4fb9caSKazuya Mio 		 * qemu or KVM writing a raw image file that is going
137ad4fb9caSKazuya Mio 		 * to stay fairly sparse, since it will end up
138ad4fb9caSKazuya Mio 		 * fragmenting the file system's free space.  Maybe we
139ad4fb9caSKazuya Mio 		 * should have some hueristics or some way to allow
140ad4fb9caSKazuya Mio 		 * userspace to pass a hint to file system,
141b8d6568aSTao Ma 		 * especially if the latter case turns out to be
142ad4fb9caSKazuya Mio 		 * common.
143ad4fb9caSKazuya Mio 		 */
1447e028976SAvantika Mathur 		ex = path[depth].p_ext;
145ad4fb9caSKazuya Mio 		if (ex) {
146ad4fb9caSKazuya Mio 			ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
147ad4fb9caSKazuya Mio 			ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
148ad4fb9caSKazuya Mio 
149ad4fb9caSKazuya Mio 			if (block > ext_block)
150ad4fb9caSKazuya Mio 				return ext_pblk + (block - ext_block);
151ad4fb9caSKazuya Mio 			else
152ad4fb9caSKazuya Mio 				return ext_pblk - (ext_block - block);
153ad4fb9caSKazuya Mio 		}
154a86c6181SAlex Tomas 
155d0d856e8SRandy Dunlap 		/* it looks like index is empty;
156d0d856e8SRandy Dunlap 		 * try to find starting block from index itself */
157a86c6181SAlex Tomas 		if (path[depth].p_bh)
158a86c6181SAlex Tomas 			return path[depth].p_bh->b_blocknr;
159a86c6181SAlex Tomas 	}
160a86c6181SAlex Tomas 
161a86c6181SAlex Tomas 	/* OK. use inode's group */
162f86186b4SEric Sandeen 	return ext4_inode_to_goal_block(inode);
163a86c6181SAlex Tomas }
164a86c6181SAlex Tomas 
165654b4908SAneesh Kumar K.V /*
166654b4908SAneesh Kumar K.V  * Allocation for a meta data block
167654b4908SAneesh Kumar K.V  */
168f65e6fbaSAlex Tomas static ext4_fsblk_t
169654b4908SAneesh Kumar K.V ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
170a86c6181SAlex Tomas 			struct ext4_ext_path *path,
17155f020dbSAllison Henderson 			struct ext4_extent *ex, int *err, unsigned int flags)
172a86c6181SAlex Tomas {
173f65e6fbaSAlex Tomas 	ext4_fsblk_t goal, newblock;
174a86c6181SAlex Tomas 
175a86c6181SAlex Tomas 	goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
17655f020dbSAllison Henderson 	newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
17755f020dbSAllison Henderson 					NULL, err);
178a86c6181SAlex Tomas 	return newblock;
179a86c6181SAlex Tomas }
180a86c6181SAlex Tomas 
18155ad63bfSTheodore Ts'o static inline int ext4_ext_space_block(struct inode *inode, int check)
182a86c6181SAlex Tomas {
183a86c6181SAlex Tomas 	int size;
184a86c6181SAlex Tomas 
185a86c6181SAlex Tomas 	size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
186a86c6181SAlex Tomas 			/ sizeof(struct ext4_extent);
18755ad63bfSTheodore Ts'o 	if (!check) {
188bbf2f9fbSRobert P. J. Day #ifdef AGGRESSIVE_TEST
189a86c6181SAlex Tomas 		if (size > 6)
190a86c6181SAlex Tomas 			size = 6;
191a86c6181SAlex Tomas #endif
19255ad63bfSTheodore Ts'o 	}
193a86c6181SAlex Tomas 	return size;
194a86c6181SAlex Tomas }
195a86c6181SAlex Tomas 
19655ad63bfSTheodore Ts'o static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
197a86c6181SAlex Tomas {
198a86c6181SAlex Tomas 	int size;
199a86c6181SAlex Tomas 
200a86c6181SAlex Tomas 	size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
201a86c6181SAlex Tomas 			/ sizeof(struct ext4_extent_idx);
20255ad63bfSTheodore Ts'o 	if (!check) {
203bbf2f9fbSRobert P. J. Day #ifdef AGGRESSIVE_TEST
204a86c6181SAlex Tomas 		if (size > 5)
205a86c6181SAlex Tomas 			size = 5;
206a86c6181SAlex Tomas #endif
20755ad63bfSTheodore Ts'o 	}
208a86c6181SAlex Tomas 	return size;
209a86c6181SAlex Tomas }
210a86c6181SAlex Tomas 
21155ad63bfSTheodore Ts'o static inline int ext4_ext_space_root(struct inode *inode, int check)
212a86c6181SAlex Tomas {
213a86c6181SAlex Tomas 	int size;
214a86c6181SAlex Tomas 
215a86c6181SAlex Tomas 	size = sizeof(EXT4_I(inode)->i_data);
216a86c6181SAlex Tomas 	size -= sizeof(struct ext4_extent_header);
217a86c6181SAlex Tomas 	size /= sizeof(struct ext4_extent);
21855ad63bfSTheodore Ts'o 	if (!check) {
219bbf2f9fbSRobert P. J. Day #ifdef AGGRESSIVE_TEST
220a86c6181SAlex Tomas 		if (size > 3)
221a86c6181SAlex Tomas 			size = 3;
222a86c6181SAlex Tomas #endif
22355ad63bfSTheodore Ts'o 	}
224a86c6181SAlex Tomas 	return size;
225a86c6181SAlex Tomas }
226a86c6181SAlex Tomas 
22755ad63bfSTheodore Ts'o static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
228a86c6181SAlex Tomas {
229a86c6181SAlex Tomas 	int size;
230a86c6181SAlex Tomas 
231a86c6181SAlex Tomas 	size = sizeof(EXT4_I(inode)->i_data);
232a86c6181SAlex Tomas 	size -= sizeof(struct ext4_extent_header);
233a86c6181SAlex Tomas 	size /= sizeof(struct ext4_extent_idx);
23455ad63bfSTheodore Ts'o 	if (!check) {
235bbf2f9fbSRobert P. J. Day #ifdef AGGRESSIVE_TEST
236a86c6181SAlex Tomas 		if (size > 4)
237a86c6181SAlex Tomas 			size = 4;
238a86c6181SAlex Tomas #endif
23955ad63bfSTheodore Ts'o 	}
240a86c6181SAlex Tomas 	return size;
241a86c6181SAlex Tomas }
242a86c6181SAlex Tomas 
243d2a17637SMingming Cao /*
244d2a17637SMingming Cao  * Calculate the number of metadata blocks needed
245d2a17637SMingming Cao  * to allocate @blocks
246d2a17637SMingming Cao  * Worse case is one block per extent
247d2a17637SMingming Cao  */
24801f49d0bSTheodore Ts'o int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
249d2a17637SMingming Cao {
2509d0be502STheodore Ts'o 	struct ext4_inode_info *ei = EXT4_I(inode);
2519d0be502STheodore Ts'o 	int idxs, num = 0;
252d2a17637SMingming Cao 
2539d0be502STheodore Ts'o 	idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
2549d0be502STheodore Ts'o 		/ sizeof(struct ext4_extent_idx));
255d2a17637SMingming Cao 
256d2a17637SMingming Cao 	/*
2579d0be502STheodore Ts'o 	 * If the new delayed allocation block is contiguous with the
2589d0be502STheodore Ts'o 	 * previous da block, it can share index blocks with the
2599d0be502STheodore Ts'o 	 * previous block, so we only need to allocate a new index
2609d0be502STheodore Ts'o 	 * block every idxs leaf blocks.  At ldxs**2 blocks, we need
2619d0be502STheodore Ts'o 	 * an additional index block, and at ldxs**3 blocks, yet
2629d0be502STheodore Ts'o 	 * another index blocks.
263d2a17637SMingming Cao 	 */
2649d0be502STheodore Ts'o 	if (ei->i_da_metadata_calc_len &&
2659d0be502STheodore Ts'o 	    ei->i_da_metadata_calc_last_lblock+1 == lblock) {
2669d0be502STheodore Ts'o 		if ((ei->i_da_metadata_calc_len % idxs) == 0)
2679d0be502STheodore Ts'o 			num++;
2689d0be502STheodore Ts'o 		if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
2699d0be502STheodore Ts'o 			num++;
2709d0be502STheodore Ts'o 		if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
2719d0be502STheodore Ts'o 			num++;
2729d0be502STheodore Ts'o 			ei->i_da_metadata_calc_len = 0;
2739d0be502STheodore Ts'o 		} else
2749d0be502STheodore Ts'o 			ei->i_da_metadata_calc_len++;
2759d0be502STheodore Ts'o 		ei->i_da_metadata_calc_last_lblock++;
276d2a17637SMingming Cao 		return num;
277d2a17637SMingming Cao 	}
278d2a17637SMingming Cao 
2799d0be502STheodore Ts'o 	/*
2809d0be502STheodore Ts'o 	 * In the worst case we need a new set of index blocks at
2819d0be502STheodore Ts'o 	 * every level of the inode's extent tree.
2829d0be502STheodore Ts'o 	 */
2839d0be502STheodore Ts'o 	ei->i_da_metadata_calc_len = 1;
2849d0be502STheodore Ts'o 	ei->i_da_metadata_calc_last_lblock = lblock;
2859d0be502STheodore Ts'o 	return ext_depth(inode) + 1;
2869d0be502STheodore Ts'o }
2879d0be502STheodore Ts'o 
288c29c0ae7SAlex Tomas static int
289c29c0ae7SAlex Tomas ext4_ext_max_entries(struct inode *inode, int depth)
290c29c0ae7SAlex Tomas {
291c29c0ae7SAlex Tomas 	int max;
292c29c0ae7SAlex Tomas 
293c29c0ae7SAlex Tomas 	if (depth == ext_depth(inode)) {
294c29c0ae7SAlex Tomas 		if (depth == 0)
29555ad63bfSTheodore Ts'o 			max = ext4_ext_space_root(inode, 1);
296c29c0ae7SAlex Tomas 		else
29755ad63bfSTheodore Ts'o 			max = ext4_ext_space_root_idx(inode, 1);
298c29c0ae7SAlex Tomas 	} else {
299c29c0ae7SAlex Tomas 		if (depth == 0)
30055ad63bfSTheodore Ts'o 			max = ext4_ext_space_block(inode, 1);
301c29c0ae7SAlex Tomas 		else
30255ad63bfSTheodore Ts'o 			max = ext4_ext_space_block_idx(inode, 1);
303c29c0ae7SAlex Tomas 	}
304c29c0ae7SAlex Tomas 
305c29c0ae7SAlex Tomas 	return max;
306c29c0ae7SAlex Tomas }
307c29c0ae7SAlex Tomas 
30856b19868SAneesh Kumar K.V static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
30956b19868SAneesh Kumar K.V {
310bf89d16fSTheodore Ts'o 	ext4_fsblk_t block = ext4_ext_pblock(ext);
31156b19868SAneesh Kumar K.V 	int len = ext4_ext_get_actual_len(ext);
312e84a26ceSTheodore Ts'o 
3136fd058f7STheodore Ts'o 	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
31456b19868SAneesh Kumar K.V }
31556b19868SAneesh Kumar K.V 
31656b19868SAneesh Kumar K.V static int ext4_valid_extent_idx(struct inode *inode,
31756b19868SAneesh Kumar K.V 				struct ext4_extent_idx *ext_idx)
31856b19868SAneesh Kumar K.V {
319bf89d16fSTheodore Ts'o 	ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
320e84a26ceSTheodore Ts'o 
3216fd058f7STheodore Ts'o 	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
32256b19868SAneesh Kumar K.V }
32356b19868SAneesh Kumar K.V 
32456b19868SAneesh Kumar K.V static int ext4_valid_extent_entries(struct inode *inode,
32556b19868SAneesh Kumar K.V 				struct ext4_extent_header *eh,
32656b19868SAneesh Kumar K.V 				int depth)
32756b19868SAneesh Kumar K.V {
32856b19868SAneesh Kumar K.V 	struct ext4_extent *ext;
32956b19868SAneesh Kumar K.V 	struct ext4_extent_idx *ext_idx;
33056b19868SAneesh Kumar K.V 	unsigned short entries;
33156b19868SAneesh Kumar K.V 	if (eh->eh_entries == 0)
33256b19868SAneesh Kumar K.V 		return 1;
33356b19868SAneesh Kumar K.V 
33456b19868SAneesh Kumar K.V 	entries = le16_to_cpu(eh->eh_entries);
33556b19868SAneesh Kumar K.V 
33656b19868SAneesh Kumar K.V 	if (depth == 0) {
33756b19868SAneesh Kumar K.V 		/* leaf entries */
33856b19868SAneesh Kumar K.V 		ext = EXT_FIRST_EXTENT(eh);
33956b19868SAneesh Kumar K.V 		while (entries) {
34056b19868SAneesh Kumar K.V 			if (!ext4_valid_extent(inode, ext))
34156b19868SAneesh Kumar K.V 				return 0;
34256b19868SAneesh Kumar K.V 			ext++;
34356b19868SAneesh Kumar K.V 			entries--;
34456b19868SAneesh Kumar K.V 		}
34556b19868SAneesh Kumar K.V 	} else {
34656b19868SAneesh Kumar K.V 		ext_idx = EXT_FIRST_INDEX(eh);
34756b19868SAneesh Kumar K.V 		while (entries) {
34856b19868SAneesh Kumar K.V 			if (!ext4_valid_extent_idx(inode, ext_idx))
34956b19868SAneesh Kumar K.V 				return 0;
35056b19868SAneesh Kumar K.V 			ext_idx++;
35156b19868SAneesh Kumar K.V 			entries--;
35256b19868SAneesh Kumar K.V 		}
35356b19868SAneesh Kumar K.V 	}
35456b19868SAneesh Kumar K.V 	return 1;
35556b19868SAneesh Kumar K.V }
35656b19868SAneesh Kumar K.V 
357c398eda0STheodore Ts'o static int __ext4_ext_check(const char *function, unsigned int line,
358c398eda0STheodore Ts'o 			    struct inode *inode, struct ext4_extent_header *eh,
359c29c0ae7SAlex Tomas 			    int depth)
360c29c0ae7SAlex Tomas {
361c29c0ae7SAlex Tomas 	const char *error_msg;
362c29c0ae7SAlex Tomas 	int max = 0;
363c29c0ae7SAlex Tomas 
364c29c0ae7SAlex Tomas 	if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
365c29c0ae7SAlex Tomas 		error_msg = "invalid magic";
366c29c0ae7SAlex Tomas 		goto corrupted;
367c29c0ae7SAlex Tomas 	}
368c29c0ae7SAlex Tomas 	if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) {
369c29c0ae7SAlex Tomas 		error_msg = "unexpected eh_depth";
370c29c0ae7SAlex Tomas 		goto corrupted;
371c29c0ae7SAlex Tomas 	}
372c29c0ae7SAlex Tomas 	if (unlikely(eh->eh_max == 0)) {
373c29c0ae7SAlex Tomas 		error_msg = "invalid eh_max";
374c29c0ae7SAlex Tomas 		goto corrupted;
375c29c0ae7SAlex Tomas 	}
376c29c0ae7SAlex Tomas 	max = ext4_ext_max_entries(inode, depth);
377c29c0ae7SAlex Tomas 	if (unlikely(le16_to_cpu(eh->eh_max) > max)) {
378c29c0ae7SAlex Tomas 		error_msg = "too large eh_max";
379c29c0ae7SAlex Tomas 		goto corrupted;
380c29c0ae7SAlex Tomas 	}
381c29c0ae7SAlex Tomas 	if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
382c29c0ae7SAlex Tomas 		error_msg = "invalid eh_entries";
383c29c0ae7SAlex Tomas 		goto corrupted;
384c29c0ae7SAlex Tomas 	}
38556b19868SAneesh Kumar K.V 	if (!ext4_valid_extent_entries(inode, eh, depth)) {
38656b19868SAneesh Kumar K.V 		error_msg = "invalid extent entries";
38756b19868SAneesh Kumar K.V 		goto corrupted;
38856b19868SAneesh Kumar K.V 	}
389c29c0ae7SAlex Tomas 	return 0;
390c29c0ae7SAlex Tomas 
391c29c0ae7SAlex Tomas corrupted:
392c398eda0STheodore Ts'o 	ext4_error_inode(inode, function, line, 0,
39324676da4STheodore Ts'o 			"bad header/extent: %s - magic %x, "
394c29c0ae7SAlex Tomas 			"entries %u, max %u(%u), depth %u(%u)",
39524676da4STheodore Ts'o 			error_msg, le16_to_cpu(eh->eh_magic),
396c29c0ae7SAlex Tomas 			le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
397c29c0ae7SAlex Tomas 			max, le16_to_cpu(eh->eh_depth), depth);
398c29c0ae7SAlex Tomas 
399c29c0ae7SAlex Tomas 	return -EIO;
400c29c0ae7SAlex Tomas }
401c29c0ae7SAlex Tomas 
40256b19868SAneesh Kumar K.V #define ext4_ext_check(inode, eh, depth)	\
403c398eda0STheodore Ts'o 	__ext4_ext_check(__func__, __LINE__, inode, eh, depth)
404c29c0ae7SAlex Tomas 
4057a262f7cSAneesh Kumar K.V int ext4_ext_check_inode(struct inode *inode)
4067a262f7cSAneesh Kumar K.V {
4077a262f7cSAneesh Kumar K.V 	return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode));
4087a262f7cSAneesh Kumar K.V }
4097a262f7cSAneesh Kumar K.V 
410a86c6181SAlex Tomas #ifdef EXT_DEBUG
411a86c6181SAlex Tomas static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
412a86c6181SAlex Tomas {
413a86c6181SAlex Tomas 	int k, l = path->p_depth;
414a86c6181SAlex Tomas 
415a86c6181SAlex Tomas 	ext_debug("path:");
416a86c6181SAlex Tomas 	for (k = 0; k <= l; k++, path++) {
417a86c6181SAlex Tomas 		if (path->p_idx) {
4182ae02107SMingming Cao 		  ext_debug("  %d->%llu", le32_to_cpu(path->p_idx->ei_block),
419bf89d16fSTheodore Ts'o 			    ext4_idx_pblock(path->p_idx));
420a86c6181SAlex Tomas 		} else if (path->p_ext) {
421553f9008SMingming 			ext_debug("  %d:[%d]%d:%llu ",
422a86c6181SAlex Tomas 				  le32_to_cpu(path->p_ext->ee_block),
423553f9008SMingming 				  ext4_ext_is_uninitialized(path->p_ext),
424a2df2a63SAmit Arora 				  ext4_ext_get_actual_len(path->p_ext),
425bf89d16fSTheodore Ts'o 				  ext4_ext_pblock(path->p_ext));
426a86c6181SAlex Tomas 		} else
427a86c6181SAlex Tomas 			ext_debug("  []");
428a86c6181SAlex Tomas 	}
429a86c6181SAlex Tomas 	ext_debug("\n");
430a86c6181SAlex Tomas }
431a86c6181SAlex Tomas 
432a86c6181SAlex Tomas static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
433a86c6181SAlex Tomas {
434a86c6181SAlex Tomas 	int depth = ext_depth(inode);
435a86c6181SAlex Tomas 	struct ext4_extent_header *eh;
436a86c6181SAlex Tomas 	struct ext4_extent *ex;
437a86c6181SAlex Tomas 	int i;
438a86c6181SAlex Tomas 
439a86c6181SAlex Tomas 	if (!path)
440a86c6181SAlex Tomas 		return;
441a86c6181SAlex Tomas 
442a86c6181SAlex Tomas 	eh = path[depth].p_hdr;
443a86c6181SAlex Tomas 	ex = EXT_FIRST_EXTENT(eh);
444a86c6181SAlex Tomas 
445553f9008SMingming 	ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino);
446553f9008SMingming 
447a86c6181SAlex Tomas 	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
448553f9008SMingming 		ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
449553f9008SMingming 			  ext4_ext_is_uninitialized(ex),
450bf89d16fSTheodore Ts'o 			  ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
451a86c6181SAlex Tomas 	}
452a86c6181SAlex Tomas 	ext_debug("\n");
453a86c6181SAlex Tomas }
4541b16da77SYongqiang Yang 
4551b16da77SYongqiang Yang static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
4561b16da77SYongqiang Yang 			ext4_fsblk_t newblock, int level)
4571b16da77SYongqiang Yang {
4581b16da77SYongqiang Yang 	int depth = ext_depth(inode);
4591b16da77SYongqiang Yang 	struct ext4_extent *ex;
4601b16da77SYongqiang Yang 
4611b16da77SYongqiang Yang 	if (depth != level) {
4621b16da77SYongqiang Yang 		struct ext4_extent_idx *idx;
4631b16da77SYongqiang Yang 		idx = path[level].p_idx;
4641b16da77SYongqiang Yang 		while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
4651b16da77SYongqiang Yang 			ext_debug("%d: move %d:%llu in new index %llu\n", level,
4661b16da77SYongqiang Yang 					le32_to_cpu(idx->ei_block),
4671b16da77SYongqiang Yang 					ext4_idx_pblock(idx),
4681b16da77SYongqiang Yang 					newblock);
4691b16da77SYongqiang Yang 			idx++;
4701b16da77SYongqiang Yang 		}
4711b16da77SYongqiang Yang 
4721b16da77SYongqiang Yang 		return;
4731b16da77SYongqiang Yang 	}
4741b16da77SYongqiang Yang 
4751b16da77SYongqiang Yang 	ex = path[depth].p_ext;
4761b16da77SYongqiang Yang 	while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
4771b16da77SYongqiang Yang 		ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
4781b16da77SYongqiang Yang 				le32_to_cpu(ex->ee_block),
4791b16da77SYongqiang Yang 				ext4_ext_pblock(ex),
4801b16da77SYongqiang Yang 				ext4_ext_is_uninitialized(ex),
4811b16da77SYongqiang Yang 				ext4_ext_get_actual_len(ex),
4821b16da77SYongqiang Yang 				newblock);
4831b16da77SYongqiang Yang 		ex++;
4841b16da77SYongqiang Yang 	}
4851b16da77SYongqiang Yang }
4861b16da77SYongqiang Yang 
487a86c6181SAlex Tomas #else
488a86c6181SAlex Tomas #define ext4_ext_show_path(inode, path)
489a86c6181SAlex Tomas #define ext4_ext_show_leaf(inode, path)
4901b16da77SYongqiang Yang #define ext4_ext_show_move(inode, path, newblock, level)
491a86c6181SAlex Tomas #endif
492a86c6181SAlex Tomas 
493b35905c1SAneesh Kumar K.V void ext4_ext_drop_refs(struct ext4_ext_path *path)
494a86c6181SAlex Tomas {
495a86c6181SAlex Tomas 	int depth = path->p_depth;
496a86c6181SAlex Tomas 	int i;
497a86c6181SAlex Tomas 
498a86c6181SAlex Tomas 	for (i = 0; i <= depth; i++, path++)
499a86c6181SAlex Tomas 		if (path->p_bh) {
500a86c6181SAlex Tomas 			brelse(path->p_bh);
501a86c6181SAlex Tomas 			path->p_bh = NULL;
502a86c6181SAlex Tomas 		}
503a86c6181SAlex Tomas }
504a86c6181SAlex Tomas 
505a86c6181SAlex Tomas /*
506d0d856e8SRandy Dunlap  * ext4_ext_binsearch_idx:
507d0d856e8SRandy Dunlap  * binary search for the closest index of the given block
508c29c0ae7SAlex Tomas  * the header must be checked before calling this
509a86c6181SAlex Tomas  */
510a86c6181SAlex Tomas static void
511725d26d3SAneesh Kumar K.V ext4_ext_binsearch_idx(struct inode *inode,
512725d26d3SAneesh Kumar K.V 			struct ext4_ext_path *path, ext4_lblk_t block)
513a86c6181SAlex Tomas {
514a86c6181SAlex Tomas 	struct ext4_extent_header *eh = path->p_hdr;
515a86c6181SAlex Tomas 	struct ext4_extent_idx *r, *l, *m;
516a86c6181SAlex Tomas 
517a86c6181SAlex Tomas 
518bba90743SEric Sandeen 	ext_debug("binsearch for %u(idx):  ", block);
519a86c6181SAlex Tomas 
520a86c6181SAlex Tomas 	l = EXT_FIRST_INDEX(eh) + 1;
521e9f410b1SDmitry Monakhov 	r = EXT_LAST_INDEX(eh);
522a86c6181SAlex Tomas 	while (l <= r) {
523a86c6181SAlex Tomas 		m = l + (r - l) / 2;
524a86c6181SAlex Tomas 		if (block < le32_to_cpu(m->ei_block))
525a86c6181SAlex Tomas 			r = m - 1;
526a86c6181SAlex Tomas 		else
527a86c6181SAlex Tomas 			l = m + 1;
52826d535edSDmitry Monakhov 		ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block),
52926d535edSDmitry Monakhov 				m, le32_to_cpu(m->ei_block),
53026d535edSDmitry Monakhov 				r, le32_to_cpu(r->ei_block));
531a86c6181SAlex Tomas 	}
532a86c6181SAlex Tomas 
533a86c6181SAlex Tomas 	path->p_idx = l - 1;
534f65e6fbaSAlex Tomas 	ext_debug("  -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
535bf89d16fSTheodore Ts'o 		  ext4_idx_pblock(path->p_idx));
536a86c6181SAlex Tomas 
537a86c6181SAlex Tomas #ifdef CHECK_BINSEARCH
538a86c6181SAlex Tomas 	{
539a86c6181SAlex Tomas 		struct ext4_extent_idx *chix, *ix;
540a86c6181SAlex Tomas 		int k;
541a86c6181SAlex Tomas 
542a86c6181SAlex Tomas 		chix = ix = EXT_FIRST_INDEX(eh);
543a86c6181SAlex Tomas 		for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
544a86c6181SAlex Tomas 		  if (k != 0 &&
545a86c6181SAlex Tomas 		      le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
5464776004fSTheodore Ts'o 				printk(KERN_DEBUG "k=%d, ix=0x%p, "
5474776004fSTheodore Ts'o 				       "first=0x%p\n", k,
548a86c6181SAlex Tomas 				       ix, EXT_FIRST_INDEX(eh));
5494776004fSTheodore Ts'o 				printk(KERN_DEBUG "%u <= %u\n",
550a86c6181SAlex Tomas 				       le32_to_cpu(ix->ei_block),
551a86c6181SAlex Tomas 				       le32_to_cpu(ix[-1].ei_block));
552a86c6181SAlex Tomas 			}
553a86c6181SAlex Tomas 			BUG_ON(k && le32_to_cpu(ix->ei_block)
554a86c6181SAlex Tomas 					   <= le32_to_cpu(ix[-1].ei_block));
555a86c6181SAlex Tomas 			if (block < le32_to_cpu(ix->ei_block))
556a86c6181SAlex Tomas 				break;
557a86c6181SAlex Tomas 			chix = ix;
558a86c6181SAlex Tomas 		}
559a86c6181SAlex Tomas 		BUG_ON(chix != path->p_idx);
560a86c6181SAlex Tomas 	}
561a86c6181SAlex Tomas #endif
562a86c6181SAlex Tomas 
563a86c6181SAlex Tomas }
564a86c6181SAlex Tomas 
565a86c6181SAlex Tomas /*
566d0d856e8SRandy Dunlap  * ext4_ext_binsearch:
567d0d856e8SRandy Dunlap  * binary search for closest extent of the given block
568c29c0ae7SAlex Tomas  * the header must be checked before calling this
569a86c6181SAlex Tomas  */
570a86c6181SAlex Tomas static void
571725d26d3SAneesh Kumar K.V ext4_ext_binsearch(struct inode *inode,
572725d26d3SAneesh Kumar K.V 		struct ext4_ext_path *path, ext4_lblk_t block)
573a86c6181SAlex Tomas {
574a86c6181SAlex Tomas 	struct ext4_extent_header *eh = path->p_hdr;
575a86c6181SAlex Tomas 	struct ext4_extent *r, *l, *m;
576a86c6181SAlex Tomas 
577a86c6181SAlex Tomas 	if (eh->eh_entries == 0) {
578a86c6181SAlex Tomas 		/*
579d0d856e8SRandy Dunlap 		 * this leaf is empty:
580a86c6181SAlex Tomas 		 * we get such a leaf in split/add case
581a86c6181SAlex Tomas 		 */
582a86c6181SAlex Tomas 		return;
583a86c6181SAlex Tomas 	}
584a86c6181SAlex Tomas 
585bba90743SEric Sandeen 	ext_debug("binsearch for %u:  ", block);
586a86c6181SAlex Tomas 
587a86c6181SAlex Tomas 	l = EXT_FIRST_EXTENT(eh) + 1;
588e9f410b1SDmitry Monakhov 	r = EXT_LAST_EXTENT(eh);
589a86c6181SAlex Tomas 
590a86c6181SAlex Tomas 	while (l <= r) {
591a86c6181SAlex Tomas 		m = l + (r - l) / 2;
592a86c6181SAlex Tomas 		if (block < le32_to_cpu(m->ee_block))
593a86c6181SAlex Tomas 			r = m - 1;
594a86c6181SAlex Tomas 		else
595a86c6181SAlex Tomas 			l = m + 1;
59626d535edSDmitry Monakhov 		ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block),
59726d535edSDmitry Monakhov 				m, le32_to_cpu(m->ee_block),
59826d535edSDmitry Monakhov 				r, le32_to_cpu(r->ee_block));
599a86c6181SAlex Tomas 	}
600a86c6181SAlex Tomas 
601a86c6181SAlex Tomas 	path->p_ext = l - 1;
602553f9008SMingming 	ext_debug("  -> %d:%llu:[%d]%d ",
603a86c6181SAlex Tomas 			le32_to_cpu(path->p_ext->ee_block),
604bf89d16fSTheodore Ts'o 			ext4_ext_pblock(path->p_ext),
605553f9008SMingming 			ext4_ext_is_uninitialized(path->p_ext),
606a2df2a63SAmit Arora 			ext4_ext_get_actual_len(path->p_ext));
607a86c6181SAlex Tomas 
608a86c6181SAlex Tomas #ifdef CHECK_BINSEARCH
609a86c6181SAlex Tomas 	{
610a86c6181SAlex Tomas 		struct ext4_extent *chex, *ex;
611a86c6181SAlex Tomas 		int k;
612a86c6181SAlex Tomas 
613a86c6181SAlex Tomas 		chex = ex = EXT_FIRST_EXTENT(eh);
614a86c6181SAlex Tomas 		for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
615a86c6181SAlex Tomas 			BUG_ON(k && le32_to_cpu(ex->ee_block)
616a86c6181SAlex Tomas 					  <= le32_to_cpu(ex[-1].ee_block));
617a86c6181SAlex Tomas 			if (block < le32_to_cpu(ex->ee_block))
618a86c6181SAlex Tomas 				break;
619a86c6181SAlex Tomas 			chex = ex;
620a86c6181SAlex Tomas 		}
621a86c6181SAlex Tomas 		BUG_ON(chex != path->p_ext);
622a86c6181SAlex Tomas 	}
623a86c6181SAlex Tomas #endif
624a86c6181SAlex Tomas 
625a86c6181SAlex Tomas }
626a86c6181SAlex Tomas 
627a86c6181SAlex Tomas int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
628a86c6181SAlex Tomas {
629a86c6181SAlex Tomas 	struct ext4_extent_header *eh;
630a86c6181SAlex Tomas 
631a86c6181SAlex Tomas 	eh = ext_inode_hdr(inode);
632a86c6181SAlex Tomas 	eh->eh_depth = 0;
633a86c6181SAlex Tomas 	eh->eh_entries = 0;
634a86c6181SAlex Tomas 	eh->eh_magic = EXT4_EXT_MAGIC;
63555ad63bfSTheodore Ts'o 	eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
636a86c6181SAlex Tomas 	ext4_mark_inode_dirty(handle, inode);
637a86c6181SAlex Tomas 	ext4_ext_invalidate_cache(inode);
638a86c6181SAlex Tomas 	return 0;
639a86c6181SAlex Tomas }
640a86c6181SAlex Tomas 
641a86c6181SAlex Tomas struct ext4_ext_path *
642725d26d3SAneesh Kumar K.V ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
643725d26d3SAneesh Kumar K.V 					struct ext4_ext_path *path)
644a86c6181SAlex Tomas {
645a86c6181SAlex Tomas 	struct ext4_extent_header *eh;
646a86c6181SAlex Tomas 	struct buffer_head *bh;
647a86c6181SAlex Tomas 	short int depth, i, ppos = 0, alloc = 0;
648a86c6181SAlex Tomas 
649a86c6181SAlex Tomas 	eh = ext_inode_hdr(inode);
650c29c0ae7SAlex Tomas 	depth = ext_depth(inode);
651a86c6181SAlex Tomas 
652a86c6181SAlex Tomas 	/* account possible depth increase */
653a86c6181SAlex Tomas 	if (!path) {
6545d4958f9SAvantika Mathur 		path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
655a86c6181SAlex Tomas 				GFP_NOFS);
656a86c6181SAlex Tomas 		if (!path)
657a86c6181SAlex Tomas 			return ERR_PTR(-ENOMEM);
658a86c6181SAlex Tomas 		alloc = 1;
659a86c6181SAlex Tomas 	}
660a86c6181SAlex Tomas 	path[0].p_hdr = eh;
6611973adcbSShen Feng 	path[0].p_bh = NULL;
662a86c6181SAlex Tomas 
663c29c0ae7SAlex Tomas 	i = depth;
664a86c6181SAlex Tomas 	/* walk through the tree */
665a86c6181SAlex Tomas 	while (i) {
6667a262f7cSAneesh Kumar K.V 		int need_to_validate = 0;
6677a262f7cSAneesh Kumar K.V 
668a86c6181SAlex Tomas 		ext_debug("depth %d: num %d, max %d\n",
669a86c6181SAlex Tomas 			  ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
670c29c0ae7SAlex Tomas 
671a86c6181SAlex Tomas 		ext4_ext_binsearch_idx(inode, path + ppos, block);
672bf89d16fSTheodore Ts'o 		path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
673a86c6181SAlex Tomas 		path[ppos].p_depth = i;
674a86c6181SAlex Tomas 		path[ppos].p_ext = NULL;
675a86c6181SAlex Tomas 
6767a262f7cSAneesh Kumar K.V 		bh = sb_getblk(inode->i_sb, path[ppos].p_block);
6777a262f7cSAneesh Kumar K.V 		if (unlikely(!bh))
678a86c6181SAlex Tomas 			goto err;
6797a262f7cSAneesh Kumar K.V 		if (!bh_uptodate_or_lock(bh)) {
6800562e0baSJiaying Zhang 			trace_ext4_ext_load_extent(inode, block,
6810562e0baSJiaying Zhang 						path[ppos].p_block);
6827a262f7cSAneesh Kumar K.V 			if (bh_submit_read(bh) < 0) {
6837a262f7cSAneesh Kumar K.V 				put_bh(bh);
6847a262f7cSAneesh Kumar K.V 				goto err;
6857a262f7cSAneesh Kumar K.V 			}
6867a262f7cSAneesh Kumar K.V 			/* validate the extent entries */
6877a262f7cSAneesh Kumar K.V 			need_to_validate = 1;
6887a262f7cSAneesh Kumar K.V 		}
689a86c6181SAlex Tomas 		eh = ext_block_hdr(bh);
690a86c6181SAlex Tomas 		ppos++;
691273df556SFrank Mayhar 		if (unlikely(ppos > depth)) {
692273df556SFrank Mayhar 			put_bh(bh);
693273df556SFrank Mayhar 			EXT4_ERROR_INODE(inode,
694273df556SFrank Mayhar 					 "ppos %d > depth %d", ppos, depth);
695273df556SFrank Mayhar 			goto err;
696273df556SFrank Mayhar 		}
697a86c6181SAlex Tomas 		path[ppos].p_bh = bh;
698a86c6181SAlex Tomas 		path[ppos].p_hdr = eh;
699a86c6181SAlex Tomas 		i--;
700a86c6181SAlex Tomas 
7017a262f7cSAneesh Kumar K.V 		if (need_to_validate && ext4_ext_check(inode, eh, i))
702a86c6181SAlex Tomas 			goto err;
703a86c6181SAlex Tomas 	}
704a86c6181SAlex Tomas 
705a86c6181SAlex Tomas 	path[ppos].p_depth = i;
706a86c6181SAlex Tomas 	path[ppos].p_ext = NULL;
707a86c6181SAlex Tomas 	path[ppos].p_idx = NULL;
708a86c6181SAlex Tomas 
709a86c6181SAlex Tomas 	/* find extent */
710a86c6181SAlex Tomas 	ext4_ext_binsearch(inode, path + ppos, block);
7111973adcbSShen Feng 	/* if not an empty leaf */
7121973adcbSShen Feng 	if (path[ppos].p_ext)
713bf89d16fSTheodore Ts'o 		path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
714a86c6181SAlex Tomas 
715a86c6181SAlex Tomas 	ext4_ext_show_path(inode, path);
716a86c6181SAlex Tomas 
717a86c6181SAlex Tomas 	return path;
718a86c6181SAlex Tomas 
719a86c6181SAlex Tomas err:
720a86c6181SAlex Tomas 	ext4_ext_drop_refs(path);
721a86c6181SAlex Tomas 	if (alloc)
722a86c6181SAlex Tomas 		kfree(path);
723a86c6181SAlex Tomas 	return ERR_PTR(-EIO);
724a86c6181SAlex Tomas }
725a86c6181SAlex Tomas 
726a86c6181SAlex Tomas /*
727d0d856e8SRandy Dunlap  * ext4_ext_insert_index:
728d0d856e8SRandy Dunlap  * insert new index [@logical;@ptr] into the block at @curp;
729d0d856e8SRandy Dunlap  * check where to insert: before @curp or after @curp
730a86c6181SAlex Tomas  */
7311f109d5aSTheodore Ts'o static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
732a86c6181SAlex Tomas 				 struct ext4_ext_path *curp,
733f65e6fbaSAlex Tomas 				 int logical, ext4_fsblk_t ptr)
734a86c6181SAlex Tomas {
735a86c6181SAlex Tomas 	struct ext4_extent_idx *ix;
736a86c6181SAlex Tomas 	int len, err;
737a86c6181SAlex Tomas 
7387e028976SAvantika Mathur 	err = ext4_ext_get_access(handle, inode, curp);
7397e028976SAvantika Mathur 	if (err)
740a86c6181SAlex Tomas 		return err;
741a86c6181SAlex Tomas 
742273df556SFrank Mayhar 	if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
743273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode,
744273df556SFrank Mayhar 				 "logical %d == ei_block %d!",
745273df556SFrank Mayhar 				 logical, le32_to_cpu(curp->p_idx->ei_block));
746273df556SFrank Mayhar 		return -EIO;
747273df556SFrank Mayhar 	}
748d4620315SRobin Dong 
749d4620315SRobin Dong 	if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
750d4620315SRobin Dong 			     >= le16_to_cpu(curp->p_hdr->eh_max))) {
751d4620315SRobin Dong 		EXT4_ERROR_INODE(inode,
752d4620315SRobin Dong 				 "eh_entries %d >= eh_max %d!",
753d4620315SRobin Dong 				 le16_to_cpu(curp->p_hdr->eh_entries),
754d4620315SRobin Dong 				 le16_to_cpu(curp->p_hdr->eh_max));
755d4620315SRobin Dong 		return -EIO;
756d4620315SRobin Dong 	}
757d4620315SRobin Dong 
758a86c6181SAlex Tomas 	len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
759a86c6181SAlex Tomas 	if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
760a86c6181SAlex Tomas 		/* insert after */
761a86c6181SAlex Tomas 		if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
762a86c6181SAlex Tomas 			len = (len - 1) * sizeof(struct ext4_extent_idx);
763a86c6181SAlex Tomas 			len = len < 0 ? 0 : len;
76426d535edSDmitry Monakhov 			ext_debug("insert new index %d after: %llu. "
765a86c6181SAlex Tomas 					"move %d from 0x%p to 0x%p\n",
766a86c6181SAlex Tomas 					logical, ptr, len,
767a86c6181SAlex Tomas 					(curp->p_idx + 1), (curp->p_idx + 2));
768a86c6181SAlex Tomas 			memmove(curp->p_idx + 2, curp->p_idx + 1, len);
769a86c6181SAlex Tomas 		}
770a86c6181SAlex Tomas 		ix = curp->p_idx + 1;
771a86c6181SAlex Tomas 	} else {
772a86c6181SAlex Tomas 		/* insert before */
773a86c6181SAlex Tomas 		len = len * sizeof(struct ext4_extent_idx);
774a86c6181SAlex Tomas 		len = len < 0 ? 0 : len;
77526d535edSDmitry Monakhov 		ext_debug("insert new index %d before: %llu. "
776a86c6181SAlex Tomas 				"move %d from 0x%p to 0x%p\n",
777a86c6181SAlex Tomas 				logical, ptr, len,
778a86c6181SAlex Tomas 				curp->p_idx, (curp->p_idx + 1));
779a86c6181SAlex Tomas 		memmove(curp->p_idx + 1, curp->p_idx, len);
780a86c6181SAlex Tomas 		ix = curp->p_idx;
781a86c6181SAlex Tomas 	}
782a86c6181SAlex Tomas 
783a86c6181SAlex Tomas 	ix->ei_block = cpu_to_le32(logical);
784f65e6fbaSAlex Tomas 	ext4_idx_store_pblock(ix, ptr);
785e8546d06SMarcin Slusarz 	le16_add_cpu(&curp->p_hdr->eh_entries, 1);
786a86c6181SAlex Tomas 
787273df556SFrank Mayhar 	if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
788273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
789273df556SFrank Mayhar 		return -EIO;
790273df556SFrank Mayhar 	}
791a86c6181SAlex Tomas 
792a86c6181SAlex Tomas 	err = ext4_ext_dirty(handle, inode, curp);
793a86c6181SAlex Tomas 	ext4_std_error(inode->i_sb, err);
794a86c6181SAlex Tomas 
795a86c6181SAlex Tomas 	return err;
796a86c6181SAlex Tomas }
797a86c6181SAlex Tomas 
798a86c6181SAlex Tomas /*
799d0d856e8SRandy Dunlap  * ext4_ext_split:
800d0d856e8SRandy Dunlap  * inserts new subtree into the path, using free index entry
801d0d856e8SRandy Dunlap  * at depth @at:
802a86c6181SAlex Tomas  * - allocates all needed blocks (new leaf and all intermediate index blocks)
803a86c6181SAlex Tomas  * - makes decision where to split
804d0d856e8SRandy Dunlap  * - moves remaining extents and index entries (right to the split point)
805a86c6181SAlex Tomas  *   into the newly allocated blocks
806d0d856e8SRandy Dunlap  * - initializes subtree
807a86c6181SAlex Tomas  */
808a86c6181SAlex Tomas static int ext4_ext_split(handle_t *handle, struct inode *inode,
80955f020dbSAllison Henderson 			  unsigned int flags,
810a86c6181SAlex Tomas 			  struct ext4_ext_path *path,
811a86c6181SAlex Tomas 			  struct ext4_extent *newext, int at)
812a86c6181SAlex Tomas {
813a86c6181SAlex Tomas 	struct buffer_head *bh = NULL;
814a86c6181SAlex Tomas 	int depth = ext_depth(inode);
815a86c6181SAlex Tomas 	struct ext4_extent_header *neh;
816a86c6181SAlex Tomas 	struct ext4_extent_idx *fidx;
817a86c6181SAlex Tomas 	int i = at, k, m, a;
818f65e6fbaSAlex Tomas 	ext4_fsblk_t newblock, oldblock;
819a86c6181SAlex Tomas 	__le32 border;
820f65e6fbaSAlex Tomas 	ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
821a86c6181SAlex Tomas 	int err = 0;
822a86c6181SAlex Tomas 
823a86c6181SAlex Tomas 	/* make decision: where to split? */
824d0d856e8SRandy Dunlap 	/* FIXME: now decision is simplest: at current extent */
825a86c6181SAlex Tomas 
826d0d856e8SRandy Dunlap 	/* if current leaf will be split, then we should use
827a86c6181SAlex Tomas 	 * border from split point */
828273df556SFrank Mayhar 	if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
829273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
830273df556SFrank Mayhar 		return -EIO;
831273df556SFrank Mayhar 	}
832a86c6181SAlex Tomas 	if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
833a86c6181SAlex Tomas 		border = path[depth].p_ext[1].ee_block;
834d0d856e8SRandy Dunlap 		ext_debug("leaf will be split."
835a86c6181SAlex Tomas 				" next leaf starts at %d\n",
836a86c6181SAlex Tomas 				  le32_to_cpu(border));
837a86c6181SAlex Tomas 	} else {
838a86c6181SAlex Tomas 		border = newext->ee_block;
839a86c6181SAlex Tomas 		ext_debug("leaf will be added."
840a86c6181SAlex Tomas 				" next leaf starts at %d\n",
841a86c6181SAlex Tomas 				le32_to_cpu(border));
842a86c6181SAlex Tomas 	}
843a86c6181SAlex Tomas 
844a86c6181SAlex Tomas 	/*
845d0d856e8SRandy Dunlap 	 * If error occurs, then we break processing
846d0d856e8SRandy Dunlap 	 * and mark filesystem read-only. index won't
847a86c6181SAlex Tomas 	 * be inserted and tree will be in consistent
848d0d856e8SRandy Dunlap 	 * state. Next mount will repair buffers too.
849a86c6181SAlex Tomas 	 */
850a86c6181SAlex Tomas 
851a86c6181SAlex Tomas 	/*
852d0d856e8SRandy Dunlap 	 * Get array to track all allocated blocks.
853d0d856e8SRandy Dunlap 	 * We need this to handle errors and free blocks
854d0d856e8SRandy Dunlap 	 * upon them.
855a86c6181SAlex Tomas 	 */
8565d4958f9SAvantika Mathur 	ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
857a86c6181SAlex Tomas 	if (!ablocks)
858a86c6181SAlex Tomas 		return -ENOMEM;
859a86c6181SAlex Tomas 
860a86c6181SAlex Tomas 	/* allocate all needed blocks */
861a86c6181SAlex Tomas 	ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
862a86c6181SAlex Tomas 	for (a = 0; a < depth - at; a++) {
863654b4908SAneesh Kumar K.V 		newblock = ext4_ext_new_meta_block(handle, inode, path,
86455f020dbSAllison Henderson 						   newext, &err, flags);
865a86c6181SAlex Tomas 		if (newblock == 0)
866a86c6181SAlex Tomas 			goto cleanup;
867a86c6181SAlex Tomas 		ablocks[a] = newblock;
868a86c6181SAlex Tomas 	}
869a86c6181SAlex Tomas 
870a86c6181SAlex Tomas 	/* initialize new leaf */
871a86c6181SAlex Tomas 	newblock = ablocks[--a];
872273df556SFrank Mayhar 	if (unlikely(newblock == 0)) {
873273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode, "newblock == 0!");
874273df556SFrank Mayhar 		err = -EIO;
875273df556SFrank Mayhar 		goto cleanup;
876273df556SFrank Mayhar 	}
877a86c6181SAlex Tomas 	bh = sb_getblk(inode->i_sb, newblock);
878a86c6181SAlex Tomas 	if (!bh) {
879a86c6181SAlex Tomas 		err = -EIO;
880a86c6181SAlex Tomas 		goto cleanup;
881a86c6181SAlex Tomas 	}
882a86c6181SAlex Tomas 	lock_buffer(bh);
883a86c6181SAlex Tomas 
8847e028976SAvantika Mathur 	err = ext4_journal_get_create_access(handle, bh);
8857e028976SAvantika Mathur 	if (err)
886a86c6181SAlex Tomas 		goto cleanup;
887a86c6181SAlex Tomas 
888a86c6181SAlex Tomas 	neh = ext_block_hdr(bh);
889a86c6181SAlex Tomas 	neh->eh_entries = 0;
89055ad63bfSTheodore Ts'o 	neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
891a86c6181SAlex Tomas 	neh->eh_magic = EXT4_EXT_MAGIC;
892a86c6181SAlex Tomas 	neh->eh_depth = 0;
893a86c6181SAlex Tomas 
894d0d856e8SRandy Dunlap 	/* move remainder of path[depth] to the new leaf */
895273df556SFrank Mayhar 	if (unlikely(path[depth].p_hdr->eh_entries !=
896273df556SFrank Mayhar 		     path[depth].p_hdr->eh_max)) {
897273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
898273df556SFrank Mayhar 				 path[depth].p_hdr->eh_entries,
899273df556SFrank Mayhar 				 path[depth].p_hdr->eh_max);
900273df556SFrank Mayhar 		err = -EIO;
901273df556SFrank Mayhar 		goto cleanup;
902273df556SFrank Mayhar 	}
903a86c6181SAlex Tomas 	/* start copy from next extent */
9041b16da77SYongqiang Yang 	m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++;
9051b16da77SYongqiang Yang 	ext4_ext_show_move(inode, path, newblock, depth);
906a86c6181SAlex Tomas 	if (m) {
9071b16da77SYongqiang Yang 		struct ext4_extent *ex;
9081b16da77SYongqiang Yang 		ex = EXT_FIRST_EXTENT(neh);
9091b16da77SYongqiang Yang 		memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m);
910e8546d06SMarcin Slusarz 		le16_add_cpu(&neh->eh_entries, m);
911a86c6181SAlex Tomas 	}
912a86c6181SAlex Tomas 
913a86c6181SAlex Tomas 	set_buffer_uptodate(bh);
914a86c6181SAlex Tomas 	unlock_buffer(bh);
915a86c6181SAlex Tomas 
9160390131bSFrank Mayhar 	err = ext4_handle_dirty_metadata(handle, inode, bh);
9177e028976SAvantika Mathur 	if (err)
918a86c6181SAlex Tomas 		goto cleanup;
919a86c6181SAlex Tomas 	brelse(bh);
920a86c6181SAlex Tomas 	bh = NULL;
921a86c6181SAlex Tomas 
922a86c6181SAlex Tomas 	/* correct old leaf */
923a86c6181SAlex Tomas 	if (m) {
9247e028976SAvantika Mathur 		err = ext4_ext_get_access(handle, inode, path + depth);
9257e028976SAvantika Mathur 		if (err)
926a86c6181SAlex Tomas 			goto cleanup;
927e8546d06SMarcin Slusarz 		le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
9287e028976SAvantika Mathur 		err = ext4_ext_dirty(handle, inode, path + depth);
9297e028976SAvantika Mathur 		if (err)
930a86c6181SAlex Tomas 			goto cleanup;
931a86c6181SAlex Tomas 
932a86c6181SAlex Tomas 	}
933a86c6181SAlex Tomas 
934a86c6181SAlex Tomas 	/* create intermediate indexes */
935a86c6181SAlex Tomas 	k = depth - at - 1;
936273df556SFrank Mayhar 	if (unlikely(k < 0)) {
937273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode, "k %d < 0!", k);
938273df556SFrank Mayhar 		err = -EIO;
939273df556SFrank Mayhar 		goto cleanup;
940273df556SFrank Mayhar 	}
941a86c6181SAlex Tomas 	if (k)
942a86c6181SAlex Tomas 		ext_debug("create %d intermediate indices\n", k);
943a86c6181SAlex Tomas 	/* insert new index into current index block */
944a86c6181SAlex Tomas 	/* current depth stored in i var */
945a86c6181SAlex Tomas 	i = depth - 1;
946a86c6181SAlex Tomas 	while (k--) {
947a86c6181SAlex Tomas 		oldblock = newblock;
948a86c6181SAlex Tomas 		newblock = ablocks[--a];
949bba90743SEric Sandeen 		bh = sb_getblk(inode->i_sb, newblock);
950a86c6181SAlex Tomas 		if (!bh) {
951a86c6181SAlex Tomas 			err = -EIO;
952a86c6181SAlex Tomas 			goto cleanup;
953a86c6181SAlex Tomas 		}
954a86c6181SAlex Tomas 		lock_buffer(bh);
955a86c6181SAlex Tomas 
9567e028976SAvantika Mathur 		err = ext4_journal_get_create_access(handle, bh);
9577e028976SAvantika Mathur 		if (err)
958a86c6181SAlex Tomas 			goto cleanup;
959a86c6181SAlex Tomas 
960a86c6181SAlex Tomas 		neh = ext_block_hdr(bh);
961a86c6181SAlex Tomas 		neh->eh_entries = cpu_to_le16(1);
962a86c6181SAlex Tomas 		neh->eh_magic = EXT4_EXT_MAGIC;
96355ad63bfSTheodore Ts'o 		neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
964a86c6181SAlex Tomas 		neh->eh_depth = cpu_to_le16(depth - i);
965a86c6181SAlex Tomas 		fidx = EXT_FIRST_INDEX(neh);
966a86c6181SAlex Tomas 		fidx->ei_block = border;
967f65e6fbaSAlex Tomas 		ext4_idx_store_pblock(fidx, oldblock);
968a86c6181SAlex Tomas 
969bba90743SEric Sandeen 		ext_debug("int.index at %d (block %llu): %u -> %llu\n",
970bba90743SEric Sandeen 				i, newblock, le32_to_cpu(border), oldblock);
971a86c6181SAlex Tomas 
9721b16da77SYongqiang Yang 		/* move remainder of path[i] to the new index block */
973273df556SFrank Mayhar 		if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
974273df556SFrank Mayhar 					EXT_LAST_INDEX(path[i].p_hdr))) {
975273df556SFrank Mayhar 			EXT4_ERROR_INODE(inode,
976273df556SFrank Mayhar 					 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
977273df556SFrank Mayhar 					 le32_to_cpu(path[i].p_ext->ee_block));
978273df556SFrank Mayhar 			err = -EIO;
979273df556SFrank Mayhar 			goto cleanup;
980273df556SFrank Mayhar 		}
9811b16da77SYongqiang Yang 		/* start copy indexes */
9821b16da77SYongqiang Yang 		m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++;
9831b16da77SYongqiang Yang 		ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
9841b16da77SYongqiang Yang 				EXT_MAX_INDEX(path[i].p_hdr));
9851b16da77SYongqiang Yang 		ext4_ext_show_move(inode, path, newblock, i);
986a86c6181SAlex Tomas 		if (m) {
9871b16da77SYongqiang Yang 			memmove(++fidx, path[i].p_idx,
988a86c6181SAlex Tomas 				sizeof(struct ext4_extent_idx) * m);
989e8546d06SMarcin Slusarz 			le16_add_cpu(&neh->eh_entries, m);
990a86c6181SAlex Tomas 		}
991a86c6181SAlex Tomas 		set_buffer_uptodate(bh);
992a86c6181SAlex Tomas 		unlock_buffer(bh);
993a86c6181SAlex Tomas 
9940390131bSFrank Mayhar 		err = ext4_handle_dirty_metadata(handle, inode, bh);
9957e028976SAvantika Mathur 		if (err)
996a86c6181SAlex Tomas 			goto cleanup;
997a86c6181SAlex Tomas 		brelse(bh);
998a86c6181SAlex Tomas 		bh = NULL;
999a86c6181SAlex Tomas 
1000a86c6181SAlex Tomas 		/* correct old index */
1001a86c6181SAlex Tomas 		if (m) {
1002a86c6181SAlex Tomas 			err = ext4_ext_get_access(handle, inode, path + i);
1003a86c6181SAlex Tomas 			if (err)
1004a86c6181SAlex Tomas 				goto cleanup;
1005e8546d06SMarcin Slusarz 			le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
1006a86c6181SAlex Tomas 			err = ext4_ext_dirty(handle, inode, path + i);
1007a86c6181SAlex Tomas 			if (err)
1008a86c6181SAlex Tomas 				goto cleanup;
1009a86c6181SAlex Tomas 		}
1010a86c6181SAlex Tomas 
1011a86c6181SAlex Tomas 		i--;
1012a86c6181SAlex Tomas 	}
1013a86c6181SAlex Tomas 
1014a86c6181SAlex Tomas 	/* insert new index */
1015a86c6181SAlex Tomas 	err = ext4_ext_insert_index(handle, inode, path + at,
1016a86c6181SAlex Tomas 				    le32_to_cpu(border), newblock);
1017a86c6181SAlex Tomas 
1018a86c6181SAlex Tomas cleanup:
1019a86c6181SAlex Tomas 	if (bh) {
1020a86c6181SAlex Tomas 		if (buffer_locked(bh))
1021a86c6181SAlex Tomas 			unlock_buffer(bh);
1022a86c6181SAlex Tomas 		brelse(bh);
1023a86c6181SAlex Tomas 	}
1024a86c6181SAlex Tomas 
1025a86c6181SAlex Tomas 	if (err) {
1026a86c6181SAlex Tomas 		/* free all allocated blocks in error case */
1027a86c6181SAlex Tomas 		for (i = 0; i < depth; i++) {
1028a86c6181SAlex Tomas 			if (!ablocks[i])
1029a86c6181SAlex Tomas 				continue;
10307dc57615SPeter Huewe 			ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
1031e6362609STheodore Ts'o 					 EXT4_FREE_BLOCKS_METADATA);
1032a86c6181SAlex Tomas 		}
1033a86c6181SAlex Tomas 	}
1034a86c6181SAlex Tomas 	kfree(ablocks);
1035a86c6181SAlex Tomas 
1036a86c6181SAlex Tomas 	return err;
1037a86c6181SAlex Tomas }
1038a86c6181SAlex Tomas 
1039a86c6181SAlex Tomas /*
1040d0d856e8SRandy Dunlap  * ext4_ext_grow_indepth:
1041d0d856e8SRandy Dunlap  * implements tree growing procedure:
1042a86c6181SAlex Tomas  * - allocates new block
1043a86c6181SAlex Tomas  * - moves top-level data (index block or leaf) into the new block
1044d0d856e8SRandy Dunlap  * - initializes new top-level, creating index that points to the
1045a86c6181SAlex Tomas  *   just created block
1046a86c6181SAlex Tomas  */
1047a86c6181SAlex Tomas static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
104855f020dbSAllison Henderson 				 unsigned int flags,
1049a86c6181SAlex Tomas 				 struct ext4_ext_path *path,
1050a86c6181SAlex Tomas 				 struct ext4_extent *newext)
1051a86c6181SAlex Tomas {
1052a86c6181SAlex Tomas 	struct ext4_ext_path *curp = path;
1053a86c6181SAlex Tomas 	struct ext4_extent_header *neh;
1054a86c6181SAlex Tomas 	struct buffer_head *bh;
1055f65e6fbaSAlex Tomas 	ext4_fsblk_t newblock;
1056a86c6181SAlex Tomas 	int err = 0;
1057a86c6181SAlex Tomas 
105855f020dbSAllison Henderson 	newblock = ext4_ext_new_meta_block(handle, inode, path,
105955f020dbSAllison Henderson 		newext, &err, flags);
1060a86c6181SAlex Tomas 	if (newblock == 0)
1061a86c6181SAlex Tomas 		return err;
1062a86c6181SAlex Tomas 
1063a86c6181SAlex Tomas 	bh = sb_getblk(inode->i_sb, newblock);
1064a86c6181SAlex Tomas 	if (!bh) {
1065a86c6181SAlex Tomas 		err = -EIO;
1066a86c6181SAlex Tomas 		ext4_std_error(inode->i_sb, err);
1067a86c6181SAlex Tomas 		return err;
1068a86c6181SAlex Tomas 	}
1069a86c6181SAlex Tomas 	lock_buffer(bh);
1070a86c6181SAlex Tomas 
10717e028976SAvantika Mathur 	err = ext4_journal_get_create_access(handle, bh);
10727e028976SAvantika Mathur 	if (err) {
1073a86c6181SAlex Tomas 		unlock_buffer(bh);
1074a86c6181SAlex Tomas 		goto out;
1075a86c6181SAlex Tomas 	}
1076a86c6181SAlex Tomas 
1077a86c6181SAlex Tomas 	/* move top-level index/leaf into new block */
1078a86c6181SAlex Tomas 	memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data));
1079a86c6181SAlex Tomas 
1080a86c6181SAlex Tomas 	/* set size of new block */
1081a86c6181SAlex Tomas 	neh = ext_block_hdr(bh);
1082a86c6181SAlex Tomas 	/* old root could have indexes or leaves
1083a86c6181SAlex Tomas 	 * so calculate e_max right way */
1084a86c6181SAlex Tomas 	if (ext_depth(inode))
108555ad63bfSTheodore Ts'o 		neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
1086a86c6181SAlex Tomas 	else
108755ad63bfSTheodore Ts'o 		neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
1088a86c6181SAlex Tomas 	neh->eh_magic = EXT4_EXT_MAGIC;
1089a86c6181SAlex Tomas 	set_buffer_uptodate(bh);
1090a86c6181SAlex Tomas 	unlock_buffer(bh);
1091a86c6181SAlex Tomas 
10920390131bSFrank Mayhar 	err = ext4_handle_dirty_metadata(handle, inode, bh);
10937e028976SAvantika Mathur 	if (err)
1094a86c6181SAlex Tomas 		goto out;
1095a86c6181SAlex Tomas 
1096a86c6181SAlex Tomas 	/* create index in new top-level index: num,max,pointer */
10977e028976SAvantika Mathur 	err = ext4_ext_get_access(handle, inode, curp);
10987e028976SAvantika Mathur 	if (err)
1099a86c6181SAlex Tomas 		goto out;
1100a86c6181SAlex Tomas 
1101a86c6181SAlex Tomas 	curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
110255ad63bfSTheodore Ts'o 	curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
1103a86c6181SAlex Tomas 	curp->p_hdr->eh_entries = cpu_to_le16(1);
1104a86c6181SAlex Tomas 	curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
1105e9f410b1SDmitry Monakhov 
1106e9f410b1SDmitry Monakhov 	if (path[0].p_hdr->eh_depth)
1107e9f410b1SDmitry Monakhov 		curp->p_idx->ei_block =
1108e9f410b1SDmitry Monakhov 			EXT_FIRST_INDEX(path[0].p_hdr)->ei_block;
1109e9f410b1SDmitry Monakhov 	else
1110e9f410b1SDmitry Monakhov 		curp->p_idx->ei_block =
1111e9f410b1SDmitry Monakhov 			EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
1112f65e6fbaSAlex Tomas 	ext4_idx_store_pblock(curp->p_idx, newblock);
1113a86c6181SAlex Tomas 
1114a86c6181SAlex Tomas 	neh = ext_inode_hdr(inode);
11152ae02107SMingming Cao 	ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
1116a86c6181SAlex Tomas 		  le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
11175a0790c2SAndi Kleen 		  le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1118bf89d16fSTheodore Ts'o 		  ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1119a86c6181SAlex Tomas 
1120a86c6181SAlex Tomas 	neh->eh_depth = cpu_to_le16(path->p_depth + 1);
1121a86c6181SAlex Tomas 	err = ext4_ext_dirty(handle, inode, curp);
1122a86c6181SAlex Tomas out:
1123a86c6181SAlex Tomas 	brelse(bh);
1124a86c6181SAlex Tomas 
1125a86c6181SAlex Tomas 	return err;
1126a86c6181SAlex Tomas }
1127a86c6181SAlex Tomas 
1128a86c6181SAlex Tomas /*
1129d0d856e8SRandy Dunlap  * ext4_ext_create_new_leaf:
1130d0d856e8SRandy Dunlap  * finds empty index and adds new leaf.
1131d0d856e8SRandy Dunlap  * if no free index is found, then it requests in-depth growing.
1132a86c6181SAlex Tomas  */
1133a86c6181SAlex Tomas static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
113455f020dbSAllison Henderson 				    unsigned int flags,
1135a86c6181SAlex Tomas 				    struct ext4_ext_path *path,
1136a86c6181SAlex Tomas 				    struct ext4_extent *newext)
1137a86c6181SAlex Tomas {
1138a86c6181SAlex Tomas 	struct ext4_ext_path *curp;
1139a86c6181SAlex Tomas 	int depth, i, err = 0;
1140a86c6181SAlex Tomas 
1141a86c6181SAlex Tomas repeat:
1142a86c6181SAlex Tomas 	i = depth = ext_depth(inode);
1143a86c6181SAlex Tomas 
1144a86c6181SAlex Tomas 	/* walk up to the tree and look for free index entry */
1145a86c6181SAlex Tomas 	curp = path + depth;
1146a86c6181SAlex Tomas 	while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
1147a86c6181SAlex Tomas 		i--;
1148a86c6181SAlex Tomas 		curp--;
1149a86c6181SAlex Tomas 	}
1150a86c6181SAlex Tomas 
1151d0d856e8SRandy Dunlap 	/* we use already allocated block for index block,
1152d0d856e8SRandy Dunlap 	 * so subsequent data blocks should be contiguous */
1153a86c6181SAlex Tomas 	if (EXT_HAS_FREE_INDEX(curp)) {
1154a86c6181SAlex Tomas 		/* if we found index with free entry, then use that
1155a86c6181SAlex Tomas 		 * entry: create all needed subtree and add new leaf */
115655f020dbSAllison Henderson 		err = ext4_ext_split(handle, inode, flags, path, newext, i);
1157787e0981SShen Feng 		if (err)
1158787e0981SShen Feng 			goto out;
1159a86c6181SAlex Tomas 
1160a86c6181SAlex Tomas 		/* refill path */
1161a86c6181SAlex Tomas 		ext4_ext_drop_refs(path);
1162a86c6181SAlex Tomas 		path = ext4_ext_find_extent(inode,
1163725d26d3SAneesh Kumar K.V 				    (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1164a86c6181SAlex Tomas 				    path);
1165a86c6181SAlex Tomas 		if (IS_ERR(path))
1166a86c6181SAlex Tomas 			err = PTR_ERR(path);
1167a86c6181SAlex Tomas 	} else {
1168a86c6181SAlex Tomas 		/* tree is full, time to grow in depth */
116955f020dbSAllison Henderson 		err = ext4_ext_grow_indepth(handle, inode, flags,
117055f020dbSAllison Henderson 					    path, newext);
1171a86c6181SAlex Tomas 		if (err)
1172a86c6181SAlex Tomas 			goto out;
1173a86c6181SAlex Tomas 
1174a86c6181SAlex Tomas 		/* refill path */
1175a86c6181SAlex Tomas 		ext4_ext_drop_refs(path);
1176a86c6181SAlex Tomas 		path = ext4_ext_find_extent(inode,
1177725d26d3SAneesh Kumar K.V 				   (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1178a86c6181SAlex Tomas 				    path);
1179a86c6181SAlex Tomas 		if (IS_ERR(path)) {
1180a86c6181SAlex Tomas 			err = PTR_ERR(path);
1181a86c6181SAlex Tomas 			goto out;
1182a86c6181SAlex Tomas 		}
1183a86c6181SAlex Tomas 
1184a86c6181SAlex Tomas 		/*
1185d0d856e8SRandy Dunlap 		 * only first (depth 0 -> 1) produces free space;
1186d0d856e8SRandy Dunlap 		 * in all other cases we have to split the grown tree
1187a86c6181SAlex Tomas 		 */
1188a86c6181SAlex Tomas 		depth = ext_depth(inode);
1189a86c6181SAlex Tomas 		if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
1190d0d856e8SRandy Dunlap 			/* now we need to split */
1191a86c6181SAlex Tomas 			goto repeat;
1192a86c6181SAlex Tomas 		}
1193a86c6181SAlex Tomas 	}
1194a86c6181SAlex Tomas 
1195a86c6181SAlex Tomas out:
1196a86c6181SAlex Tomas 	return err;
1197a86c6181SAlex Tomas }
1198a86c6181SAlex Tomas 
1199a86c6181SAlex Tomas /*
12001988b51eSAlex Tomas  * search the closest allocated block to the left for *logical
12011988b51eSAlex Tomas  * and returns it at @logical + it's physical address at @phys
12021988b51eSAlex Tomas  * if *logical is the smallest allocated block, the function
12031988b51eSAlex Tomas  * returns 0 at @phys
12041988b51eSAlex Tomas  * return value contains 0 (success) or error code
12051988b51eSAlex Tomas  */
12061f109d5aSTheodore Ts'o static int ext4_ext_search_left(struct inode *inode,
12071f109d5aSTheodore Ts'o 				struct ext4_ext_path *path,
12081988b51eSAlex Tomas 				ext4_lblk_t *logical, ext4_fsblk_t *phys)
12091988b51eSAlex Tomas {
12101988b51eSAlex Tomas 	struct ext4_extent_idx *ix;
12111988b51eSAlex Tomas 	struct ext4_extent *ex;
1212b939e376SAneesh Kumar K.V 	int depth, ee_len;
12131988b51eSAlex Tomas 
1214273df556SFrank Mayhar 	if (unlikely(path == NULL)) {
1215273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1216273df556SFrank Mayhar 		return -EIO;
1217273df556SFrank Mayhar 	}
12181988b51eSAlex Tomas 	depth = path->p_depth;
12191988b51eSAlex Tomas 	*phys = 0;
12201988b51eSAlex Tomas 
12211988b51eSAlex Tomas 	if (depth == 0 && path->p_ext == NULL)
12221988b51eSAlex Tomas 		return 0;
12231988b51eSAlex Tomas 
12241988b51eSAlex Tomas 	/* usually extent in the path covers blocks smaller
12251988b51eSAlex Tomas 	 * then *logical, but it can be that extent is the
12261988b51eSAlex Tomas 	 * first one in the file */
12271988b51eSAlex Tomas 
12281988b51eSAlex Tomas 	ex = path[depth].p_ext;
1229b939e376SAneesh Kumar K.V 	ee_len = ext4_ext_get_actual_len(ex);
12301988b51eSAlex Tomas 	if (*logical < le32_to_cpu(ex->ee_block)) {
1231273df556SFrank Mayhar 		if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1232273df556SFrank Mayhar 			EXT4_ERROR_INODE(inode,
1233273df556SFrank Mayhar 					 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
1234273df556SFrank Mayhar 					 *logical, le32_to_cpu(ex->ee_block));
1235273df556SFrank Mayhar 			return -EIO;
1236273df556SFrank Mayhar 		}
12371988b51eSAlex Tomas 		while (--depth >= 0) {
12381988b51eSAlex Tomas 			ix = path[depth].p_idx;
1239273df556SFrank Mayhar 			if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1240273df556SFrank Mayhar 				EXT4_ERROR_INODE(inode,
1241273df556SFrank Mayhar 				  "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
1242273df556SFrank Mayhar 				  ix != NULL ? ix->ei_block : 0,
1243273df556SFrank Mayhar 				  EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
1244273df556SFrank Mayhar 				    EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0,
1245273df556SFrank Mayhar 				  depth);
1246273df556SFrank Mayhar 				return -EIO;
1247273df556SFrank Mayhar 			}
12481988b51eSAlex Tomas 		}
12491988b51eSAlex Tomas 		return 0;
12501988b51eSAlex Tomas 	}
12511988b51eSAlex Tomas 
1252273df556SFrank Mayhar 	if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1253273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode,
1254273df556SFrank Mayhar 				 "logical %d < ee_block %d + ee_len %d!",
1255273df556SFrank Mayhar 				 *logical, le32_to_cpu(ex->ee_block), ee_len);
1256273df556SFrank Mayhar 		return -EIO;
1257273df556SFrank Mayhar 	}
12581988b51eSAlex Tomas 
1259b939e376SAneesh Kumar K.V 	*logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1260bf89d16fSTheodore Ts'o 	*phys = ext4_ext_pblock(ex) + ee_len - 1;
12611988b51eSAlex Tomas 	return 0;
12621988b51eSAlex Tomas }
12631988b51eSAlex Tomas 
12641988b51eSAlex Tomas /*
12651988b51eSAlex Tomas  * search the closest allocated block to the right for *logical
12661988b51eSAlex Tomas  * and returns it at @logical + it's physical address at @phys
12671988b51eSAlex Tomas  * if *logical is the smallest allocated block, the function
12681988b51eSAlex Tomas  * returns 0 at @phys
12691988b51eSAlex Tomas  * return value contains 0 (success) or error code
12701988b51eSAlex Tomas  */
12711f109d5aSTheodore Ts'o static int ext4_ext_search_right(struct inode *inode,
12721f109d5aSTheodore Ts'o 				 struct ext4_ext_path *path,
1273*4d33b1efSTheodore Ts'o 				 ext4_lblk_t *logical, ext4_fsblk_t *phys,
1274*4d33b1efSTheodore Ts'o 				 struct ext4_extent **ret_ex)
12751988b51eSAlex Tomas {
12761988b51eSAlex Tomas 	struct buffer_head *bh = NULL;
12771988b51eSAlex Tomas 	struct ext4_extent_header *eh;
12781988b51eSAlex Tomas 	struct ext4_extent_idx *ix;
12791988b51eSAlex Tomas 	struct ext4_extent *ex;
12801988b51eSAlex Tomas 	ext4_fsblk_t block;
1281395a87bfSEric Sandeen 	int depth;	/* Note, NOT eh_depth; depth from top of tree */
1282395a87bfSEric Sandeen 	int ee_len;
12831988b51eSAlex Tomas 
1284273df556SFrank Mayhar 	if (unlikely(path == NULL)) {
1285273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1286273df556SFrank Mayhar 		return -EIO;
1287273df556SFrank Mayhar 	}
12881988b51eSAlex Tomas 	depth = path->p_depth;
12891988b51eSAlex Tomas 	*phys = 0;
12901988b51eSAlex Tomas 
12911988b51eSAlex Tomas 	if (depth == 0 && path->p_ext == NULL)
12921988b51eSAlex Tomas 		return 0;
12931988b51eSAlex Tomas 
12941988b51eSAlex Tomas 	/* usually extent in the path covers blocks smaller
12951988b51eSAlex Tomas 	 * then *logical, but it can be that extent is the
12961988b51eSAlex Tomas 	 * first one in the file */
12971988b51eSAlex Tomas 
12981988b51eSAlex Tomas 	ex = path[depth].p_ext;
1299b939e376SAneesh Kumar K.V 	ee_len = ext4_ext_get_actual_len(ex);
13001988b51eSAlex Tomas 	if (*logical < le32_to_cpu(ex->ee_block)) {
1301273df556SFrank Mayhar 		if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1302273df556SFrank Mayhar 			EXT4_ERROR_INODE(inode,
1303273df556SFrank Mayhar 					 "first_extent(path[%d].p_hdr) != ex",
1304273df556SFrank Mayhar 					 depth);
1305273df556SFrank Mayhar 			return -EIO;
1306273df556SFrank Mayhar 		}
13071988b51eSAlex Tomas 		while (--depth >= 0) {
13081988b51eSAlex Tomas 			ix = path[depth].p_idx;
1309273df556SFrank Mayhar 			if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1310273df556SFrank Mayhar 				EXT4_ERROR_INODE(inode,
1311273df556SFrank Mayhar 						 "ix != EXT_FIRST_INDEX *logical %d!",
1312273df556SFrank Mayhar 						 *logical);
1313273df556SFrank Mayhar 				return -EIO;
1314273df556SFrank Mayhar 			}
13151988b51eSAlex Tomas 		}
1316*4d33b1efSTheodore Ts'o 		goto found_extent;
13171988b51eSAlex Tomas 	}
13181988b51eSAlex Tomas 
1319273df556SFrank Mayhar 	if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1320273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode,
1321273df556SFrank Mayhar 				 "logical %d < ee_block %d + ee_len %d!",
1322273df556SFrank Mayhar 				 *logical, le32_to_cpu(ex->ee_block), ee_len);
1323273df556SFrank Mayhar 		return -EIO;
1324273df556SFrank Mayhar 	}
13251988b51eSAlex Tomas 
13261988b51eSAlex Tomas 	if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
13271988b51eSAlex Tomas 		/* next allocated block in this leaf */
13281988b51eSAlex Tomas 		ex++;
1329*4d33b1efSTheodore Ts'o 		goto found_extent;
13301988b51eSAlex Tomas 	}
13311988b51eSAlex Tomas 
13321988b51eSAlex Tomas 	/* go up and search for index to the right */
13331988b51eSAlex Tomas 	while (--depth >= 0) {
13341988b51eSAlex Tomas 		ix = path[depth].p_idx;
13351988b51eSAlex Tomas 		if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
133625f1ee3aSWu Fengguang 			goto got_index;
13371988b51eSAlex Tomas 	}
13381988b51eSAlex Tomas 
133925f1ee3aSWu Fengguang 	/* we've gone up to the root and found no index to the right */
13401988b51eSAlex Tomas 	return 0;
13411988b51eSAlex Tomas 
134225f1ee3aSWu Fengguang got_index:
13431988b51eSAlex Tomas 	/* we've found index to the right, let's
13441988b51eSAlex Tomas 	 * follow it and find the closest allocated
13451988b51eSAlex Tomas 	 * block to the right */
13461988b51eSAlex Tomas 	ix++;
1347bf89d16fSTheodore Ts'o 	block = ext4_idx_pblock(ix);
13481988b51eSAlex Tomas 	while (++depth < path->p_depth) {
13491988b51eSAlex Tomas 		bh = sb_bread(inode->i_sb, block);
13501988b51eSAlex Tomas 		if (bh == NULL)
13511988b51eSAlex Tomas 			return -EIO;
13521988b51eSAlex Tomas 		eh = ext_block_hdr(bh);
1353395a87bfSEric Sandeen 		/* subtract from p_depth to get proper eh_depth */
135456b19868SAneesh Kumar K.V 		if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
13551988b51eSAlex Tomas 			put_bh(bh);
13561988b51eSAlex Tomas 			return -EIO;
13571988b51eSAlex Tomas 		}
13581988b51eSAlex Tomas 		ix = EXT_FIRST_INDEX(eh);
1359bf89d16fSTheodore Ts'o 		block = ext4_idx_pblock(ix);
13601988b51eSAlex Tomas 		put_bh(bh);
13611988b51eSAlex Tomas 	}
13621988b51eSAlex Tomas 
13631988b51eSAlex Tomas 	bh = sb_bread(inode->i_sb, block);
13641988b51eSAlex Tomas 	if (bh == NULL)
13651988b51eSAlex Tomas 		return -EIO;
13661988b51eSAlex Tomas 	eh = ext_block_hdr(bh);
136756b19868SAneesh Kumar K.V 	if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
13681988b51eSAlex Tomas 		put_bh(bh);
13691988b51eSAlex Tomas 		return -EIO;
13701988b51eSAlex Tomas 	}
13711988b51eSAlex Tomas 	ex = EXT_FIRST_EXTENT(eh);
1372*4d33b1efSTheodore Ts'o found_extent:
13731988b51eSAlex Tomas 	*logical = le32_to_cpu(ex->ee_block);
1374bf89d16fSTheodore Ts'o 	*phys = ext4_ext_pblock(ex);
1375*4d33b1efSTheodore Ts'o 	*ret_ex = ex;
1376*4d33b1efSTheodore Ts'o 	if (bh)
13771988b51eSAlex Tomas 		put_bh(bh);
13781988b51eSAlex Tomas 	return 0;
13791988b51eSAlex Tomas }
13801988b51eSAlex Tomas 
13811988b51eSAlex Tomas /*
1382d0d856e8SRandy Dunlap  * ext4_ext_next_allocated_block:
1383f17722f9SLukas Czerner  * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
1384d0d856e8SRandy Dunlap  * NOTE: it considers block number from index entry as
1385d0d856e8SRandy Dunlap  * allocated block. Thus, index entries have to be consistent
1386d0d856e8SRandy Dunlap  * with leaves.
1387a86c6181SAlex Tomas  */
1388725d26d3SAneesh Kumar K.V static ext4_lblk_t
1389a86c6181SAlex Tomas ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1390a86c6181SAlex Tomas {
1391a86c6181SAlex Tomas 	int depth;
1392a86c6181SAlex Tomas 
1393a86c6181SAlex Tomas 	BUG_ON(path == NULL);
1394a86c6181SAlex Tomas 	depth = path->p_depth;
1395a86c6181SAlex Tomas 
1396a86c6181SAlex Tomas 	if (depth == 0 && path->p_ext == NULL)
1397f17722f9SLukas Czerner 		return EXT_MAX_BLOCKS;
1398a86c6181SAlex Tomas 
1399a86c6181SAlex Tomas 	while (depth >= 0) {
1400a86c6181SAlex Tomas 		if (depth == path->p_depth) {
1401a86c6181SAlex Tomas 			/* leaf */
1402a86c6181SAlex Tomas 			if (path[depth].p_ext !=
1403a86c6181SAlex Tomas 					EXT_LAST_EXTENT(path[depth].p_hdr))
1404a86c6181SAlex Tomas 			  return le32_to_cpu(path[depth].p_ext[1].ee_block);
1405a86c6181SAlex Tomas 		} else {
1406a86c6181SAlex Tomas 			/* index */
1407a86c6181SAlex Tomas 			if (path[depth].p_idx !=
1408a86c6181SAlex Tomas 					EXT_LAST_INDEX(path[depth].p_hdr))
1409a86c6181SAlex Tomas 			  return le32_to_cpu(path[depth].p_idx[1].ei_block);
1410a86c6181SAlex Tomas 		}
1411a86c6181SAlex Tomas 		depth--;
1412a86c6181SAlex Tomas 	}
1413a86c6181SAlex Tomas 
1414f17722f9SLukas Czerner 	return EXT_MAX_BLOCKS;
1415a86c6181SAlex Tomas }
1416a86c6181SAlex Tomas 
1417a86c6181SAlex Tomas /*
1418d0d856e8SRandy Dunlap  * ext4_ext_next_leaf_block:
1419f17722f9SLukas Czerner  * returns first allocated block from next leaf or EXT_MAX_BLOCKS
1420a86c6181SAlex Tomas  */
14215718789dSRobin Dong static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
1422a86c6181SAlex Tomas {
1423a86c6181SAlex Tomas 	int depth;
1424a86c6181SAlex Tomas 
1425a86c6181SAlex Tomas 	BUG_ON(path == NULL);
1426a86c6181SAlex Tomas 	depth = path->p_depth;
1427a86c6181SAlex Tomas 
1428a86c6181SAlex Tomas 	/* zero-tree has no leaf blocks at all */
1429a86c6181SAlex Tomas 	if (depth == 0)
1430f17722f9SLukas Czerner 		return EXT_MAX_BLOCKS;
1431a86c6181SAlex Tomas 
1432a86c6181SAlex Tomas 	/* go to index block */
1433a86c6181SAlex Tomas 	depth--;
1434a86c6181SAlex Tomas 
1435a86c6181SAlex Tomas 	while (depth >= 0) {
1436a86c6181SAlex Tomas 		if (path[depth].p_idx !=
1437a86c6181SAlex Tomas 				EXT_LAST_INDEX(path[depth].p_hdr))
1438725d26d3SAneesh Kumar K.V 			return (ext4_lblk_t)
1439725d26d3SAneesh Kumar K.V 				le32_to_cpu(path[depth].p_idx[1].ei_block);
1440a86c6181SAlex Tomas 		depth--;
1441a86c6181SAlex Tomas 	}
1442a86c6181SAlex Tomas 
1443f17722f9SLukas Czerner 	return EXT_MAX_BLOCKS;
1444a86c6181SAlex Tomas }
1445a86c6181SAlex Tomas 
1446a86c6181SAlex Tomas /*
1447d0d856e8SRandy Dunlap  * ext4_ext_correct_indexes:
1448d0d856e8SRandy Dunlap  * if leaf gets modified and modified extent is first in the leaf,
1449d0d856e8SRandy Dunlap  * then we have to correct all indexes above.
1450a86c6181SAlex Tomas  * TODO: do we need to correct tree in all cases?
1451a86c6181SAlex Tomas  */
14521d03ec98SAneesh Kumar K.V static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
1453a86c6181SAlex Tomas 				struct ext4_ext_path *path)
1454a86c6181SAlex Tomas {
1455a86c6181SAlex Tomas 	struct ext4_extent_header *eh;
1456a86c6181SAlex Tomas 	int depth = ext_depth(inode);
1457a86c6181SAlex Tomas 	struct ext4_extent *ex;
1458a86c6181SAlex Tomas 	__le32 border;
1459a86c6181SAlex Tomas 	int k, err = 0;
1460a86c6181SAlex Tomas 
1461a86c6181SAlex Tomas 	eh = path[depth].p_hdr;
1462a86c6181SAlex Tomas 	ex = path[depth].p_ext;
1463273df556SFrank Mayhar 
1464273df556SFrank Mayhar 	if (unlikely(ex == NULL || eh == NULL)) {
1465273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode,
1466273df556SFrank Mayhar 				 "ex %p == NULL or eh %p == NULL", ex, eh);
1467273df556SFrank Mayhar 		return -EIO;
1468273df556SFrank Mayhar 	}
1469a86c6181SAlex Tomas 
1470a86c6181SAlex Tomas 	if (depth == 0) {
1471a86c6181SAlex Tomas 		/* there is no tree at all */
1472a86c6181SAlex Tomas 		return 0;
1473a86c6181SAlex Tomas 	}
1474a86c6181SAlex Tomas 
1475a86c6181SAlex Tomas 	if (ex != EXT_FIRST_EXTENT(eh)) {
1476a86c6181SAlex Tomas 		/* we correct tree if first leaf got modified only */
1477a86c6181SAlex Tomas 		return 0;
1478a86c6181SAlex Tomas 	}
1479a86c6181SAlex Tomas 
1480a86c6181SAlex Tomas 	/*
1481d0d856e8SRandy Dunlap 	 * TODO: we need correction if border is smaller than current one
1482a86c6181SAlex Tomas 	 */
1483a86c6181SAlex Tomas 	k = depth - 1;
1484a86c6181SAlex Tomas 	border = path[depth].p_ext->ee_block;
14857e028976SAvantika Mathur 	err = ext4_ext_get_access(handle, inode, path + k);
14867e028976SAvantika Mathur 	if (err)
1487a86c6181SAlex Tomas 		return err;
1488a86c6181SAlex Tomas 	path[k].p_idx->ei_block = border;
14897e028976SAvantika Mathur 	err = ext4_ext_dirty(handle, inode, path + k);
14907e028976SAvantika Mathur 	if (err)
1491a86c6181SAlex Tomas 		return err;
1492a86c6181SAlex Tomas 
1493a86c6181SAlex Tomas 	while (k--) {
1494a86c6181SAlex Tomas 		/* change all left-side indexes */
1495a86c6181SAlex Tomas 		if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
1496a86c6181SAlex Tomas 			break;
14977e028976SAvantika Mathur 		err = ext4_ext_get_access(handle, inode, path + k);
14987e028976SAvantika Mathur 		if (err)
1499a86c6181SAlex Tomas 			break;
1500a86c6181SAlex Tomas 		path[k].p_idx->ei_block = border;
15017e028976SAvantika Mathur 		err = ext4_ext_dirty(handle, inode, path + k);
15027e028976SAvantika Mathur 		if (err)
1503a86c6181SAlex Tomas 			break;
1504a86c6181SAlex Tomas 	}
1505a86c6181SAlex Tomas 
1506a86c6181SAlex Tomas 	return err;
1507a86c6181SAlex Tomas }
1508a86c6181SAlex Tomas 
1509748de673SAkira Fujita int
1510a86c6181SAlex Tomas ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1511a86c6181SAlex Tomas 				struct ext4_extent *ex2)
1512a86c6181SAlex Tomas {
1513749269faSAmit Arora 	unsigned short ext1_ee_len, ext2_ee_len, max_len;
1514a2df2a63SAmit Arora 
1515a2df2a63SAmit Arora 	/*
1516a2df2a63SAmit Arora 	 * Make sure that either both extents are uninitialized, or
1517a2df2a63SAmit Arora 	 * both are _not_.
1518a2df2a63SAmit Arora 	 */
1519a2df2a63SAmit Arora 	if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2))
1520a2df2a63SAmit Arora 		return 0;
1521a2df2a63SAmit Arora 
1522749269faSAmit Arora 	if (ext4_ext_is_uninitialized(ex1))
1523749269faSAmit Arora 		max_len = EXT_UNINIT_MAX_LEN;
1524749269faSAmit Arora 	else
1525749269faSAmit Arora 		max_len = EXT_INIT_MAX_LEN;
1526749269faSAmit Arora 
1527a2df2a63SAmit Arora 	ext1_ee_len = ext4_ext_get_actual_len(ex1);
1528a2df2a63SAmit Arora 	ext2_ee_len = ext4_ext_get_actual_len(ex2);
1529a2df2a63SAmit Arora 
1530a2df2a63SAmit Arora 	if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
153163f57933SAndrew Morton 			le32_to_cpu(ex2->ee_block))
1532a86c6181SAlex Tomas 		return 0;
1533a86c6181SAlex Tomas 
1534471d4011SSuparna Bhattacharya 	/*
1535471d4011SSuparna Bhattacharya 	 * To allow future support for preallocated extents to be added
1536471d4011SSuparna Bhattacharya 	 * as an RO_COMPAT feature, refuse to merge to extents if
1537d0d856e8SRandy Dunlap 	 * this can result in the top bit of ee_len being set.
1538471d4011SSuparna Bhattacharya 	 */
1539749269faSAmit Arora 	if (ext1_ee_len + ext2_ee_len > max_len)
1540471d4011SSuparna Bhattacharya 		return 0;
1541bbf2f9fbSRobert P. J. Day #ifdef AGGRESSIVE_TEST
1542b939e376SAneesh Kumar K.V 	if (ext1_ee_len >= 4)
1543a86c6181SAlex Tomas 		return 0;
1544a86c6181SAlex Tomas #endif
1545a86c6181SAlex Tomas 
1546bf89d16fSTheodore Ts'o 	if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
1547a86c6181SAlex Tomas 		return 1;
1548a86c6181SAlex Tomas 	return 0;
1549a86c6181SAlex Tomas }
1550a86c6181SAlex Tomas 
1551a86c6181SAlex Tomas /*
155256055d3aSAmit Arora  * This function tries to merge the "ex" extent to the next extent in the tree.
155356055d3aSAmit Arora  * It always tries to merge towards right. If you want to merge towards
155456055d3aSAmit Arora  * left, pass "ex - 1" as argument instead of "ex".
155556055d3aSAmit Arora  * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
155656055d3aSAmit Arora  * 1 if they got merged.
155756055d3aSAmit Arora  */
1558197217a5SYongqiang Yang static int ext4_ext_try_to_merge_right(struct inode *inode,
155956055d3aSAmit Arora 				 struct ext4_ext_path *path,
156056055d3aSAmit Arora 				 struct ext4_extent *ex)
156156055d3aSAmit Arora {
156256055d3aSAmit Arora 	struct ext4_extent_header *eh;
156356055d3aSAmit Arora 	unsigned int depth, len;
156456055d3aSAmit Arora 	int merge_done = 0;
156556055d3aSAmit Arora 	int uninitialized = 0;
156656055d3aSAmit Arora 
156756055d3aSAmit Arora 	depth = ext_depth(inode);
156856055d3aSAmit Arora 	BUG_ON(path[depth].p_hdr == NULL);
156956055d3aSAmit Arora 	eh = path[depth].p_hdr;
157056055d3aSAmit Arora 
157156055d3aSAmit Arora 	while (ex < EXT_LAST_EXTENT(eh)) {
157256055d3aSAmit Arora 		if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
157356055d3aSAmit Arora 			break;
157456055d3aSAmit Arora 		/* merge with next extent! */
157556055d3aSAmit Arora 		if (ext4_ext_is_uninitialized(ex))
157656055d3aSAmit Arora 			uninitialized = 1;
157756055d3aSAmit Arora 		ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
157856055d3aSAmit Arora 				+ ext4_ext_get_actual_len(ex + 1));
157956055d3aSAmit Arora 		if (uninitialized)
158056055d3aSAmit Arora 			ext4_ext_mark_uninitialized(ex);
158156055d3aSAmit Arora 
158256055d3aSAmit Arora 		if (ex + 1 < EXT_LAST_EXTENT(eh)) {
158356055d3aSAmit Arora 			len = (EXT_LAST_EXTENT(eh) - ex - 1)
158456055d3aSAmit Arora 				* sizeof(struct ext4_extent);
158556055d3aSAmit Arora 			memmove(ex + 1, ex + 2, len);
158656055d3aSAmit Arora 		}
1587e8546d06SMarcin Slusarz 		le16_add_cpu(&eh->eh_entries, -1);
158856055d3aSAmit Arora 		merge_done = 1;
158956055d3aSAmit Arora 		WARN_ON(eh->eh_entries == 0);
159056055d3aSAmit Arora 		if (!eh->eh_entries)
159124676da4STheodore Ts'o 			EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
159256055d3aSAmit Arora 	}
159356055d3aSAmit Arora 
159456055d3aSAmit Arora 	return merge_done;
159556055d3aSAmit Arora }
159656055d3aSAmit Arora 
159756055d3aSAmit Arora /*
1598197217a5SYongqiang Yang  * This function tries to merge the @ex extent to neighbours in the tree.
1599197217a5SYongqiang Yang  * return 1 if merge left else 0.
1600197217a5SYongqiang Yang  */
1601197217a5SYongqiang Yang static int ext4_ext_try_to_merge(struct inode *inode,
1602197217a5SYongqiang Yang 				  struct ext4_ext_path *path,
1603197217a5SYongqiang Yang 				  struct ext4_extent *ex) {
1604197217a5SYongqiang Yang 	struct ext4_extent_header *eh;
1605197217a5SYongqiang Yang 	unsigned int depth;
1606197217a5SYongqiang Yang 	int merge_done = 0;
1607197217a5SYongqiang Yang 	int ret = 0;
1608197217a5SYongqiang Yang 
1609197217a5SYongqiang Yang 	depth = ext_depth(inode);
1610197217a5SYongqiang Yang 	BUG_ON(path[depth].p_hdr == NULL);
1611197217a5SYongqiang Yang 	eh = path[depth].p_hdr;
1612197217a5SYongqiang Yang 
1613197217a5SYongqiang Yang 	if (ex > EXT_FIRST_EXTENT(eh))
1614197217a5SYongqiang Yang 		merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
1615197217a5SYongqiang Yang 
1616197217a5SYongqiang Yang 	if (!merge_done)
1617197217a5SYongqiang Yang 		ret = ext4_ext_try_to_merge_right(inode, path, ex);
1618197217a5SYongqiang Yang 
1619197217a5SYongqiang Yang 	return ret;
1620197217a5SYongqiang Yang }
1621197217a5SYongqiang Yang 
1622197217a5SYongqiang Yang /*
162325d14f98SAmit Arora  * check if a portion of the "newext" extent overlaps with an
162425d14f98SAmit Arora  * existing extent.
162525d14f98SAmit Arora  *
162625d14f98SAmit Arora  * If there is an overlap discovered, it updates the length of the newext
162725d14f98SAmit Arora  * such that there will be no overlap, and then returns 1.
162825d14f98SAmit Arora  * If there is no overlap found, it returns 0.
162925d14f98SAmit Arora  */
1630*4d33b1efSTheodore Ts'o static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
1631*4d33b1efSTheodore Ts'o 					   struct inode *inode,
163225d14f98SAmit Arora 					   struct ext4_extent *newext,
163325d14f98SAmit Arora 					   struct ext4_ext_path *path)
163425d14f98SAmit Arora {
1635725d26d3SAneesh Kumar K.V 	ext4_lblk_t b1, b2;
163625d14f98SAmit Arora 	unsigned int depth, len1;
163725d14f98SAmit Arora 	unsigned int ret = 0;
163825d14f98SAmit Arora 
163925d14f98SAmit Arora 	b1 = le32_to_cpu(newext->ee_block);
1640a2df2a63SAmit Arora 	len1 = ext4_ext_get_actual_len(newext);
164125d14f98SAmit Arora 	depth = ext_depth(inode);
164225d14f98SAmit Arora 	if (!path[depth].p_ext)
164325d14f98SAmit Arora 		goto out;
164425d14f98SAmit Arora 	b2 = le32_to_cpu(path[depth].p_ext->ee_block);
1645*4d33b1efSTheodore Ts'o 	b2 &= ~(sbi->s_cluster_ratio - 1);
164625d14f98SAmit Arora 
164725d14f98SAmit Arora 	/*
164825d14f98SAmit Arora 	 * get the next allocated block if the extent in the path
164925d14f98SAmit Arora 	 * is before the requested block(s)
165025d14f98SAmit Arora 	 */
165125d14f98SAmit Arora 	if (b2 < b1) {
165225d14f98SAmit Arora 		b2 = ext4_ext_next_allocated_block(path);
1653f17722f9SLukas Czerner 		if (b2 == EXT_MAX_BLOCKS)
165425d14f98SAmit Arora 			goto out;
1655*4d33b1efSTheodore Ts'o 		b2 &= ~(sbi->s_cluster_ratio - 1);
165625d14f98SAmit Arora 	}
165725d14f98SAmit Arora 
1658725d26d3SAneesh Kumar K.V 	/* check for wrap through zero on extent logical start block*/
165925d14f98SAmit Arora 	if (b1 + len1 < b1) {
1660f17722f9SLukas Czerner 		len1 = EXT_MAX_BLOCKS - b1;
166125d14f98SAmit Arora 		newext->ee_len = cpu_to_le16(len1);
166225d14f98SAmit Arora 		ret = 1;
166325d14f98SAmit Arora 	}
166425d14f98SAmit Arora 
166525d14f98SAmit Arora 	/* check for overlap */
166625d14f98SAmit Arora 	if (b1 + len1 > b2) {
166725d14f98SAmit Arora 		newext->ee_len = cpu_to_le16(b2 - b1);
166825d14f98SAmit Arora 		ret = 1;
166925d14f98SAmit Arora 	}
167025d14f98SAmit Arora out:
167125d14f98SAmit Arora 	return ret;
167225d14f98SAmit Arora }
167325d14f98SAmit Arora 
167425d14f98SAmit Arora /*
1675d0d856e8SRandy Dunlap  * ext4_ext_insert_extent:
1676d0d856e8SRandy Dunlap  * tries to merge requsted extent into the existing extent or
1677d0d856e8SRandy Dunlap  * inserts requested extent as new one into the tree,
1678d0d856e8SRandy Dunlap  * creating new leaf in the no-space case.
1679a86c6181SAlex Tomas  */
1680a86c6181SAlex Tomas int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1681a86c6181SAlex Tomas 				struct ext4_ext_path *path,
16820031462bSMingming Cao 				struct ext4_extent *newext, int flag)
1683a86c6181SAlex Tomas {
1684a86c6181SAlex Tomas 	struct ext4_extent_header *eh;
1685a86c6181SAlex Tomas 	struct ext4_extent *ex, *fex;
1686a86c6181SAlex Tomas 	struct ext4_extent *nearex; /* nearest extent */
1687a86c6181SAlex Tomas 	struct ext4_ext_path *npath = NULL;
1688725d26d3SAneesh Kumar K.V 	int depth, len, err;
1689725d26d3SAneesh Kumar K.V 	ext4_lblk_t next;
1690a2df2a63SAmit Arora 	unsigned uninitialized = 0;
169155f020dbSAllison Henderson 	int flags = 0;
1692a86c6181SAlex Tomas 
1693273df556SFrank Mayhar 	if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1694273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
1695273df556SFrank Mayhar 		return -EIO;
1696273df556SFrank Mayhar 	}
1697a86c6181SAlex Tomas 	depth = ext_depth(inode);
1698a86c6181SAlex Tomas 	ex = path[depth].p_ext;
1699273df556SFrank Mayhar 	if (unlikely(path[depth].p_hdr == NULL)) {
1700273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1701273df556SFrank Mayhar 		return -EIO;
1702273df556SFrank Mayhar 	}
1703a86c6181SAlex Tomas 
1704a86c6181SAlex Tomas 	/* try to insert block into found extent and return */
1705744692dcSJiaying Zhang 	if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
17060031462bSMingming Cao 		&& ext4_can_extents_be_merged(inode, ex, newext)) {
1707553f9008SMingming 		ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
1708553f9008SMingming 			  ext4_ext_is_uninitialized(newext),
1709a2df2a63SAmit Arora 			  ext4_ext_get_actual_len(newext),
1710a86c6181SAlex Tomas 			  le32_to_cpu(ex->ee_block),
1711553f9008SMingming 			  ext4_ext_is_uninitialized(ex),
1712bf89d16fSTheodore Ts'o 			  ext4_ext_get_actual_len(ex),
1713bf89d16fSTheodore Ts'o 			  ext4_ext_pblock(ex));
17147e028976SAvantika Mathur 		err = ext4_ext_get_access(handle, inode, path + depth);
17157e028976SAvantika Mathur 		if (err)
1716a86c6181SAlex Tomas 			return err;
1717a2df2a63SAmit Arora 
1718a2df2a63SAmit Arora 		/*
1719a2df2a63SAmit Arora 		 * ext4_can_extents_be_merged should have checked that either
1720a2df2a63SAmit Arora 		 * both extents are uninitialized, or both aren't. Thus we
1721a2df2a63SAmit Arora 		 * need to check only one of them here.
1722a2df2a63SAmit Arora 		 */
1723a2df2a63SAmit Arora 		if (ext4_ext_is_uninitialized(ex))
1724a2df2a63SAmit Arora 			uninitialized = 1;
1725a2df2a63SAmit Arora 		ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1726a2df2a63SAmit Arora 					+ ext4_ext_get_actual_len(newext));
1727a2df2a63SAmit Arora 		if (uninitialized)
1728a2df2a63SAmit Arora 			ext4_ext_mark_uninitialized(ex);
1729a86c6181SAlex Tomas 		eh = path[depth].p_hdr;
1730a86c6181SAlex Tomas 		nearex = ex;
1731a86c6181SAlex Tomas 		goto merge;
1732a86c6181SAlex Tomas 	}
1733a86c6181SAlex Tomas 
1734a86c6181SAlex Tomas 	depth = ext_depth(inode);
1735a86c6181SAlex Tomas 	eh = path[depth].p_hdr;
1736a86c6181SAlex Tomas 	if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
1737a86c6181SAlex Tomas 		goto has_space;
1738a86c6181SAlex Tomas 
1739a86c6181SAlex Tomas 	/* probably next leaf has space for us? */
1740a86c6181SAlex Tomas 	fex = EXT_LAST_EXTENT(eh);
1741598dbdf2SRobin Dong 	next = EXT_MAX_BLOCKS;
1742598dbdf2SRobin Dong 	if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
17435718789dSRobin Dong 		next = ext4_ext_next_leaf_block(path);
1744598dbdf2SRobin Dong 	if (next != EXT_MAX_BLOCKS) {
1745a86c6181SAlex Tomas 		ext_debug("next leaf block - %d\n", next);
1746a86c6181SAlex Tomas 		BUG_ON(npath != NULL);
1747a86c6181SAlex Tomas 		npath = ext4_ext_find_extent(inode, next, NULL);
1748a86c6181SAlex Tomas 		if (IS_ERR(npath))
1749a86c6181SAlex Tomas 			return PTR_ERR(npath);
1750a86c6181SAlex Tomas 		BUG_ON(npath->p_depth != path->p_depth);
1751a86c6181SAlex Tomas 		eh = npath[depth].p_hdr;
1752a86c6181SAlex Tomas 		if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
175325985edcSLucas De Marchi 			ext_debug("next leaf isn't full(%d)\n",
1754a86c6181SAlex Tomas 				  le16_to_cpu(eh->eh_entries));
1755a86c6181SAlex Tomas 			path = npath;
1756ffb505ffSRobin Dong 			goto has_space;
1757a86c6181SAlex Tomas 		}
1758a86c6181SAlex Tomas 		ext_debug("next leaf has no free space(%d,%d)\n",
1759a86c6181SAlex Tomas 			  le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
1760a86c6181SAlex Tomas 	}
1761a86c6181SAlex Tomas 
1762a86c6181SAlex Tomas 	/*
1763d0d856e8SRandy Dunlap 	 * There is no free space in the found leaf.
1764d0d856e8SRandy Dunlap 	 * We're gonna add a new leaf in the tree.
1765a86c6181SAlex Tomas 	 */
176655f020dbSAllison Henderson 	if (flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT)
176755f020dbSAllison Henderson 		flags = EXT4_MB_USE_ROOT_BLOCKS;
176855f020dbSAllison Henderson 	err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext);
1769a86c6181SAlex Tomas 	if (err)
1770a86c6181SAlex Tomas 		goto cleanup;
1771a86c6181SAlex Tomas 	depth = ext_depth(inode);
1772a86c6181SAlex Tomas 	eh = path[depth].p_hdr;
1773a86c6181SAlex Tomas 
1774a86c6181SAlex Tomas has_space:
1775a86c6181SAlex Tomas 	nearex = path[depth].p_ext;
1776a86c6181SAlex Tomas 
17777e028976SAvantika Mathur 	err = ext4_ext_get_access(handle, inode, path + depth);
17787e028976SAvantika Mathur 	if (err)
1779a86c6181SAlex Tomas 		goto cleanup;
1780a86c6181SAlex Tomas 
1781a86c6181SAlex Tomas 	if (!nearex) {
1782a86c6181SAlex Tomas 		/* there is no extent in this leaf, create first one */
1783553f9008SMingming 		ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
1784a86c6181SAlex Tomas 				le32_to_cpu(newext->ee_block),
1785bf89d16fSTheodore Ts'o 				ext4_ext_pblock(newext),
1786553f9008SMingming 				ext4_ext_is_uninitialized(newext),
1787a2df2a63SAmit Arora 				ext4_ext_get_actual_len(newext));
1788a86c6181SAlex Tomas 		path[depth].p_ext = EXT_FIRST_EXTENT(eh);
1789a86c6181SAlex Tomas 	} else if (le32_to_cpu(newext->ee_block)
1790a86c6181SAlex Tomas 			   > le32_to_cpu(nearex->ee_block)) {
1791a86c6181SAlex Tomas /*		BUG_ON(newext->ee_block == nearex->ee_block); */
1792a86c6181SAlex Tomas 		if (nearex != EXT_LAST_EXTENT(eh)) {
1793a86c6181SAlex Tomas 			len = EXT_MAX_EXTENT(eh) - nearex;
1794a86c6181SAlex Tomas 			len = (len - 1) * sizeof(struct ext4_extent);
1795a86c6181SAlex Tomas 			len = len < 0 ? 0 : len;
1796553f9008SMingming 			ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
1797a86c6181SAlex Tomas 					"move %d from 0x%p to 0x%p\n",
1798a86c6181SAlex Tomas 					le32_to_cpu(newext->ee_block),
1799bf89d16fSTheodore Ts'o 					ext4_ext_pblock(newext),
1800553f9008SMingming 					ext4_ext_is_uninitialized(newext),
1801a2df2a63SAmit Arora 					ext4_ext_get_actual_len(newext),
1802a86c6181SAlex Tomas 					nearex, len, nearex + 1, nearex + 2);
1803a86c6181SAlex Tomas 			memmove(nearex + 2, nearex + 1, len);
1804a86c6181SAlex Tomas 		}
1805a86c6181SAlex Tomas 		path[depth].p_ext = nearex + 1;
1806a86c6181SAlex Tomas 	} else {
1807a86c6181SAlex Tomas 		BUG_ON(newext->ee_block == nearex->ee_block);
1808a86c6181SAlex Tomas 		len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
1809a86c6181SAlex Tomas 		len = len < 0 ? 0 : len;
1810553f9008SMingming 		ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
1811a86c6181SAlex Tomas 				"move %d from 0x%p to 0x%p\n",
1812a86c6181SAlex Tomas 				le32_to_cpu(newext->ee_block),
1813bf89d16fSTheodore Ts'o 				ext4_ext_pblock(newext),
1814553f9008SMingming 				ext4_ext_is_uninitialized(newext),
1815a2df2a63SAmit Arora 				ext4_ext_get_actual_len(newext),
18160737964bSRobin Dong 				nearex, len, nearex, nearex + 1);
1817a86c6181SAlex Tomas 		memmove(nearex + 1, nearex, len);
1818a86c6181SAlex Tomas 		path[depth].p_ext = nearex;
1819a86c6181SAlex Tomas 	}
1820a86c6181SAlex Tomas 
1821e8546d06SMarcin Slusarz 	le16_add_cpu(&eh->eh_entries, 1);
1822a86c6181SAlex Tomas 	nearex = path[depth].p_ext;
1823a86c6181SAlex Tomas 	nearex->ee_block = newext->ee_block;
1824bf89d16fSTheodore Ts'o 	ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
1825a86c6181SAlex Tomas 	nearex->ee_len = newext->ee_len;
1826a86c6181SAlex Tomas 
1827a86c6181SAlex Tomas merge:
1828a86c6181SAlex Tomas 	/* try to merge extents to the right */
1829744692dcSJiaying Zhang 	if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
183056055d3aSAmit Arora 		ext4_ext_try_to_merge(inode, path, nearex);
1831a86c6181SAlex Tomas 
1832a86c6181SAlex Tomas 	/* try to merge extents to the left */
1833a86c6181SAlex Tomas 
1834a86c6181SAlex Tomas 	/* time to correct all indexes above */
1835a86c6181SAlex Tomas 	err = ext4_ext_correct_indexes(handle, inode, path);
1836a86c6181SAlex Tomas 	if (err)
1837a86c6181SAlex Tomas 		goto cleanup;
1838a86c6181SAlex Tomas 
1839a86c6181SAlex Tomas 	err = ext4_ext_dirty(handle, inode, path + depth);
1840a86c6181SAlex Tomas 
1841a86c6181SAlex Tomas cleanup:
1842a86c6181SAlex Tomas 	if (npath) {
1843a86c6181SAlex Tomas 		ext4_ext_drop_refs(npath);
1844a86c6181SAlex Tomas 		kfree(npath);
1845a86c6181SAlex Tomas 	}
1846a86c6181SAlex Tomas 	ext4_ext_invalidate_cache(inode);
1847a86c6181SAlex Tomas 	return err;
1848a86c6181SAlex Tomas }
1849a86c6181SAlex Tomas 
18501f109d5aSTheodore Ts'o static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
18516873fa0dSEric Sandeen 			       ext4_lblk_t num, ext_prepare_callback func,
18526873fa0dSEric Sandeen 			       void *cbdata)
18536873fa0dSEric Sandeen {
18546873fa0dSEric Sandeen 	struct ext4_ext_path *path = NULL;
18556873fa0dSEric Sandeen 	struct ext4_ext_cache cbex;
18566873fa0dSEric Sandeen 	struct ext4_extent *ex;
18576873fa0dSEric Sandeen 	ext4_lblk_t next, start = 0, end = 0;
18586873fa0dSEric Sandeen 	ext4_lblk_t last = block + num;
18596873fa0dSEric Sandeen 	int depth, exists, err = 0;
18606873fa0dSEric Sandeen 
18616873fa0dSEric Sandeen 	BUG_ON(func == NULL);
18626873fa0dSEric Sandeen 	BUG_ON(inode == NULL);
18636873fa0dSEric Sandeen 
1864f17722f9SLukas Czerner 	while (block < last && block != EXT_MAX_BLOCKS) {
18656873fa0dSEric Sandeen 		num = last - block;
18666873fa0dSEric Sandeen 		/* find extent for this block */
1867fab3a549STheodore Ts'o 		down_read(&EXT4_I(inode)->i_data_sem);
18686873fa0dSEric Sandeen 		path = ext4_ext_find_extent(inode, block, path);
1869fab3a549STheodore Ts'o 		up_read(&EXT4_I(inode)->i_data_sem);
18706873fa0dSEric Sandeen 		if (IS_ERR(path)) {
18716873fa0dSEric Sandeen 			err = PTR_ERR(path);
18726873fa0dSEric Sandeen 			path = NULL;
18736873fa0dSEric Sandeen 			break;
18746873fa0dSEric Sandeen 		}
18756873fa0dSEric Sandeen 
18766873fa0dSEric Sandeen 		depth = ext_depth(inode);
1877273df556SFrank Mayhar 		if (unlikely(path[depth].p_hdr == NULL)) {
1878273df556SFrank Mayhar 			EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1879273df556SFrank Mayhar 			err = -EIO;
1880273df556SFrank Mayhar 			break;
1881273df556SFrank Mayhar 		}
18826873fa0dSEric Sandeen 		ex = path[depth].p_ext;
18836873fa0dSEric Sandeen 		next = ext4_ext_next_allocated_block(path);
18846873fa0dSEric Sandeen 
18856873fa0dSEric Sandeen 		exists = 0;
18866873fa0dSEric Sandeen 		if (!ex) {
18876873fa0dSEric Sandeen 			/* there is no extent yet, so try to allocate
18886873fa0dSEric Sandeen 			 * all requested space */
18896873fa0dSEric Sandeen 			start = block;
18906873fa0dSEric Sandeen 			end = block + num;
18916873fa0dSEric Sandeen 		} else if (le32_to_cpu(ex->ee_block) > block) {
18926873fa0dSEric Sandeen 			/* need to allocate space before found extent */
18936873fa0dSEric Sandeen 			start = block;
18946873fa0dSEric Sandeen 			end = le32_to_cpu(ex->ee_block);
18956873fa0dSEric Sandeen 			if (block + num < end)
18966873fa0dSEric Sandeen 				end = block + num;
18976873fa0dSEric Sandeen 		} else if (block >= le32_to_cpu(ex->ee_block)
18986873fa0dSEric Sandeen 					+ ext4_ext_get_actual_len(ex)) {
18996873fa0dSEric Sandeen 			/* need to allocate space after found extent */
19006873fa0dSEric Sandeen 			start = block;
19016873fa0dSEric Sandeen 			end = block + num;
19026873fa0dSEric Sandeen 			if (end >= next)
19036873fa0dSEric Sandeen 				end = next;
19046873fa0dSEric Sandeen 		} else if (block >= le32_to_cpu(ex->ee_block)) {
19056873fa0dSEric Sandeen 			/*
19066873fa0dSEric Sandeen 			 * some part of requested space is covered
19076873fa0dSEric Sandeen 			 * by found extent
19086873fa0dSEric Sandeen 			 */
19096873fa0dSEric Sandeen 			start = block;
19106873fa0dSEric Sandeen 			end = le32_to_cpu(ex->ee_block)
19116873fa0dSEric Sandeen 				+ ext4_ext_get_actual_len(ex);
19126873fa0dSEric Sandeen 			if (block + num < end)
19136873fa0dSEric Sandeen 				end = block + num;
19146873fa0dSEric Sandeen 			exists = 1;
19156873fa0dSEric Sandeen 		} else {
19166873fa0dSEric Sandeen 			BUG();
19176873fa0dSEric Sandeen 		}
19186873fa0dSEric Sandeen 		BUG_ON(end <= start);
19196873fa0dSEric Sandeen 
19206873fa0dSEric Sandeen 		if (!exists) {
19216873fa0dSEric Sandeen 			cbex.ec_block = start;
19226873fa0dSEric Sandeen 			cbex.ec_len = end - start;
19236873fa0dSEric Sandeen 			cbex.ec_start = 0;
19246873fa0dSEric Sandeen 		} else {
19256873fa0dSEric Sandeen 			cbex.ec_block = le32_to_cpu(ex->ee_block);
19266873fa0dSEric Sandeen 			cbex.ec_len = ext4_ext_get_actual_len(ex);
1927bf89d16fSTheodore Ts'o 			cbex.ec_start = ext4_ext_pblock(ex);
19286873fa0dSEric Sandeen 		}
19296873fa0dSEric Sandeen 
1930273df556SFrank Mayhar 		if (unlikely(cbex.ec_len == 0)) {
1931273df556SFrank Mayhar 			EXT4_ERROR_INODE(inode, "cbex.ec_len == 0");
1932273df556SFrank Mayhar 			err = -EIO;
1933273df556SFrank Mayhar 			break;
1934273df556SFrank Mayhar 		}
1935c03f8aa9SLukas Czerner 		err = func(inode, next, &cbex, ex, cbdata);
19366873fa0dSEric Sandeen 		ext4_ext_drop_refs(path);
19376873fa0dSEric Sandeen 
19386873fa0dSEric Sandeen 		if (err < 0)
19396873fa0dSEric Sandeen 			break;
19406873fa0dSEric Sandeen 
19416873fa0dSEric Sandeen 		if (err == EXT_REPEAT)
19426873fa0dSEric Sandeen 			continue;
19436873fa0dSEric Sandeen 		else if (err == EXT_BREAK) {
19446873fa0dSEric Sandeen 			err = 0;
19456873fa0dSEric Sandeen 			break;
19466873fa0dSEric Sandeen 		}
19476873fa0dSEric Sandeen 
19486873fa0dSEric Sandeen 		if (ext_depth(inode) != depth) {
19496873fa0dSEric Sandeen 			/* depth was changed. we have to realloc path */
19506873fa0dSEric Sandeen 			kfree(path);
19516873fa0dSEric Sandeen 			path = NULL;
19526873fa0dSEric Sandeen 		}
19536873fa0dSEric Sandeen 
19546873fa0dSEric Sandeen 		block = cbex.ec_block + cbex.ec_len;
19556873fa0dSEric Sandeen 	}
19566873fa0dSEric Sandeen 
19576873fa0dSEric Sandeen 	if (path) {
19586873fa0dSEric Sandeen 		ext4_ext_drop_refs(path);
19596873fa0dSEric Sandeen 		kfree(path);
19606873fa0dSEric Sandeen 	}
19616873fa0dSEric Sandeen 
19626873fa0dSEric Sandeen 	return err;
19636873fa0dSEric Sandeen }
19646873fa0dSEric Sandeen 
196509b88252SAvantika Mathur static void
1966725d26d3SAneesh Kumar K.V ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
1967b05e6ae5STheodore Ts'o 			__u32 len, ext4_fsblk_t start)
1968a86c6181SAlex Tomas {
1969a86c6181SAlex Tomas 	struct ext4_ext_cache *cex;
1970a86c6181SAlex Tomas 	BUG_ON(len == 0);
19712ec0ae3aSTheodore Ts'o 	spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1972a86c6181SAlex Tomas 	cex = &EXT4_I(inode)->i_cached_extent;
1973a86c6181SAlex Tomas 	cex->ec_block = block;
1974a86c6181SAlex Tomas 	cex->ec_len = len;
1975a86c6181SAlex Tomas 	cex->ec_start = start;
19762ec0ae3aSTheodore Ts'o 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1977a86c6181SAlex Tomas }
1978a86c6181SAlex Tomas 
1979a86c6181SAlex Tomas /*
1980d0d856e8SRandy Dunlap  * ext4_ext_put_gap_in_cache:
1981d0d856e8SRandy Dunlap  * calculate boundaries of the gap that the requested block fits into
1982a86c6181SAlex Tomas  * and cache this gap
1983a86c6181SAlex Tomas  */
198409b88252SAvantika Mathur static void
1985a86c6181SAlex Tomas ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
1986725d26d3SAneesh Kumar K.V 				ext4_lblk_t block)
1987a86c6181SAlex Tomas {
1988a86c6181SAlex Tomas 	int depth = ext_depth(inode);
1989725d26d3SAneesh Kumar K.V 	unsigned long len;
1990725d26d3SAneesh Kumar K.V 	ext4_lblk_t lblock;
1991a86c6181SAlex Tomas 	struct ext4_extent *ex;
1992a86c6181SAlex Tomas 
1993a86c6181SAlex Tomas 	ex = path[depth].p_ext;
1994a86c6181SAlex Tomas 	if (ex == NULL) {
1995a86c6181SAlex Tomas 		/* there is no extent yet, so gap is [0;-] */
1996a86c6181SAlex Tomas 		lblock = 0;
1997f17722f9SLukas Czerner 		len = EXT_MAX_BLOCKS;
1998a86c6181SAlex Tomas 		ext_debug("cache gap(whole file):");
1999a86c6181SAlex Tomas 	} else if (block < le32_to_cpu(ex->ee_block)) {
2000a86c6181SAlex Tomas 		lblock = block;
2001a86c6181SAlex Tomas 		len = le32_to_cpu(ex->ee_block) - block;
2002bba90743SEric Sandeen 		ext_debug("cache gap(before): %u [%u:%u]",
2003bba90743SEric Sandeen 				block,
2004bba90743SEric Sandeen 				le32_to_cpu(ex->ee_block),
2005bba90743SEric Sandeen 				 ext4_ext_get_actual_len(ex));
2006a86c6181SAlex Tomas 	} else if (block >= le32_to_cpu(ex->ee_block)
2007a2df2a63SAmit Arora 			+ ext4_ext_get_actual_len(ex)) {
2008725d26d3SAneesh Kumar K.V 		ext4_lblk_t next;
2009a86c6181SAlex Tomas 		lblock = le32_to_cpu(ex->ee_block)
2010a2df2a63SAmit Arora 			+ ext4_ext_get_actual_len(ex);
2011725d26d3SAneesh Kumar K.V 
2012725d26d3SAneesh Kumar K.V 		next = ext4_ext_next_allocated_block(path);
2013bba90743SEric Sandeen 		ext_debug("cache gap(after): [%u:%u] %u",
2014bba90743SEric Sandeen 				le32_to_cpu(ex->ee_block),
2015bba90743SEric Sandeen 				ext4_ext_get_actual_len(ex),
2016bba90743SEric Sandeen 				block);
2017725d26d3SAneesh Kumar K.V 		BUG_ON(next == lblock);
2018725d26d3SAneesh Kumar K.V 		len = next - lblock;
2019a86c6181SAlex Tomas 	} else {
2020a86c6181SAlex Tomas 		lblock = len = 0;
2021a86c6181SAlex Tomas 		BUG();
2022a86c6181SAlex Tomas 	}
2023a86c6181SAlex Tomas 
2024bba90743SEric Sandeen 	ext_debug(" -> %u:%lu\n", lblock, len);
2025b05e6ae5STheodore Ts'o 	ext4_ext_put_in_cache(inode, lblock, len, 0);
2026a86c6181SAlex Tomas }
2027a86c6181SAlex Tomas 
2028b05e6ae5STheodore Ts'o /*
2029b7ca1e8eSRobin Dong  * ext4_ext_check_cache()
2030a4bb6b64SAllison Henderson  * Checks to see if the given block is in the cache.
2031a4bb6b64SAllison Henderson  * If it is, the cached extent is stored in the given
2032a4bb6b64SAllison Henderson  * cache extent pointer.  If the cached extent is a hole,
2033a4bb6b64SAllison Henderson  * this routine should be used instead of
2034a4bb6b64SAllison Henderson  * ext4_ext_in_cache if the calling function needs to
2035a4bb6b64SAllison Henderson  * know the size of the hole.
2036a4bb6b64SAllison Henderson  *
2037a4bb6b64SAllison Henderson  * @inode: The files inode
2038a4bb6b64SAllison Henderson  * @block: The block to look for in the cache
2039a4bb6b64SAllison Henderson  * @ex:    Pointer where the cached extent will be stored
2040a4bb6b64SAllison Henderson  *         if it contains block
2041a4bb6b64SAllison Henderson  *
2042b05e6ae5STheodore Ts'o  * Return 0 if cache is invalid; 1 if the cache is valid
2043b05e6ae5STheodore Ts'o  */
2044a4bb6b64SAllison Henderson static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
2045a4bb6b64SAllison Henderson 	struct ext4_ext_cache *ex){
2046a86c6181SAlex Tomas 	struct ext4_ext_cache *cex;
204777f4135fSVivek Haldar 	struct ext4_sb_info *sbi;
2048b05e6ae5STheodore Ts'o 	int ret = 0;
2049a86c6181SAlex Tomas 
20502ec0ae3aSTheodore Ts'o 	/*
20512ec0ae3aSTheodore Ts'o 	 * We borrow i_block_reservation_lock to protect i_cached_extent
20522ec0ae3aSTheodore Ts'o 	 */
20532ec0ae3aSTheodore Ts'o 	spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
2054a86c6181SAlex Tomas 	cex = &EXT4_I(inode)->i_cached_extent;
205577f4135fSVivek Haldar 	sbi = EXT4_SB(inode->i_sb);
2056a86c6181SAlex Tomas 
2057a86c6181SAlex Tomas 	/* has cache valid data? */
2058b05e6ae5STheodore Ts'o 	if (cex->ec_len == 0)
20592ec0ae3aSTheodore Ts'o 		goto errout;
2060a86c6181SAlex Tomas 
2061731eb1a0SAkinobu Mita 	if (in_range(block, cex->ec_block, cex->ec_len)) {
2062a4bb6b64SAllison Henderson 		memcpy(ex, cex, sizeof(struct ext4_ext_cache));
2063bba90743SEric Sandeen 		ext_debug("%u cached by %u:%u:%llu\n",
2064bba90743SEric Sandeen 				block,
2065bba90743SEric Sandeen 				cex->ec_block, cex->ec_len, cex->ec_start);
2066b05e6ae5STheodore Ts'o 		ret = 1;
2067a86c6181SAlex Tomas 	}
20682ec0ae3aSTheodore Ts'o errout:
206977f4135fSVivek Haldar 	if (!ret)
207077f4135fSVivek Haldar 		sbi->extent_cache_misses++;
207177f4135fSVivek Haldar 	else
207277f4135fSVivek Haldar 		sbi->extent_cache_hits++;
20732ec0ae3aSTheodore Ts'o 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
20742ec0ae3aSTheodore Ts'o 	return ret;
2075a86c6181SAlex Tomas }
2076a86c6181SAlex Tomas 
2077a86c6181SAlex Tomas /*
2078a4bb6b64SAllison Henderson  * ext4_ext_in_cache()
2079a4bb6b64SAllison Henderson  * Checks to see if the given block is in the cache.
2080a4bb6b64SAllison Henderson  * If it is, the cached extent is stored in the given
2081a4bb6b64SAllison Henderson  * extent pointer.
2082a4bb6b64SAllison Henderson  *
2083a4bb6b64SAllison Henderson  * @inode: The files inode
2084a4bb6b64SAllison Henderson  * @block: The block to look for in the cache
2085a4bb6b64SAllison Henderson  * @ex:    Pointer where the cached extent will be stored
2086a4bb6b64SAllison Henderson  *         if it contains block
2087a4bb6b64SAllison Henderson  *
2088a4bb6b64SAllison Henderson  * Return 0 if cache is invalid; 1 if the cache is valid
2089a4bb6b64SAllison Henderson  */
2090a4bb6b64SAllison Henderson static int
2091a4bb6b64SAllison Henderson ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2092a4bb6b64SAllison Henderson 			struct ext4_extent *ex)
2093a4bb6b64SAllison Henderson {
2094a4bb6b64SAllison Henderson 	struct ext4_ext_cache cex;
2095a4bb6b64SAllison Henderson 	int ret = 0;
2096a4bb6b64SAllison Henderson 
2097a4bb6b64SAllison Henderson 	if (ext4_ext_check_cache(inode, block, &cex)) {
2098a4bb6b64SAllison Henderson 		ex->ee_block = cpu_to_le32(cex.ec_block);
2099a4bb6b64SAllison Henderson 		ext4_ext_store_pblock(ex, cex.ec_start);
2100a4bb6b64SAllison Henderson 		ex->ee_len = cpu_to_le16(cex.ec_len);
2101a4bb6b64SAllison Henderson 		ret = 1;
2102a4bb6b64SAllison Henderson 	}
2103a4bb6b64SAllison Henderson 
2104a4bb6b64SAllison Henderson 	return ret;
2105a4bb6b64SAllison Henderson }
2106a4bb6b64SAllison Henderson 
2107a4bb6b64SAllison Henderson 
2108a4bb6b64SAllison Henderson /*
2109d0d856e8SRandy Dunlap  * ext4_ext_rm_idx:
2110d0d856e8SRandy Dunlap  * removes index from the index block.
2111a86c6181SAlex Tomas  */
21121d03ec98SAneesh Kumar K.V static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2113a86c6181SAlex Tomas 			struct ext4_ext_path *path)
2114a86c6181SAlex Tomas {
2115a86c6181SAlex Tomas 	int err;
2116f65e6fbaSAlex Tomas 	ext4_fsblk_t leaf;
2117a86c6181SAlex Tomas 
2118a86c6181SAlex Tomas 	/* free index block */
2119a86c6181SAlex Tomas 	path--;
2120bf89d16fSTheodore Ts'o 	leaf = ext4_idx_pblock(path->p_idx);
2121273df556SFrank Mayhar 	if (unlikely(path->p_hdr->eh_entries == 0)) {
2122273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
2123273df556SFrank Mayhar 		return -EIO;
2124273df556SFrank Mayhar 	}
21257e028976SAvantika Mathur 	err = ext4_ext_get_access(handle, inode, path);
21267e028976SAvantika Mathur 	if (err)
2127a86c6181SAlex Tomas 		return err;
21280e1147b0SRobin Dong 
21290e1147b0SRobin Dong 	if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) {
21300e1147b0SRobin Dong 		int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx;
21310e1147b0SRobin Dong 		len *= sizeof(struct ext4_extent_idx);
21320e1147b0SRobin Dong 		memmove(path->p_idx, path->p_idx + 1, len);
21330e1147b0SRobin Dong 	}
21340e1147b0SRobin Dong 
2135e8546d06SMarcin Slusarz 	le16_add_cpu(&path->p_hdr->eh_entries, -1);
21367e028976SAvantika Mathur 	err = ext4_ext_dirty(handle, inode, path);
21377e028976SAvantika Mathur 	if (err)
2138a86c6181SAlex Tomas 		return err;
21392ae02107SMingming Cao 	ext_debug("index is empty, remove it, free block %llu\n", leaf);
21407dc57615SPeter Huewe 	ext4_free_blocks(handle, inode, NULL, leaf, 1,
2141e6362609STheodore Ts'o 			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
2142a86c6181SAlex Tomas 	return err;
2143a86c6181SAlex Tomas }
2144a86c6181SAlex Tomas 
2145a86c6181SAlex Tomas /*
2146ee12b630SMingming Cao  * ext4_ext_calc_credits_for_single_extent:
2147ee12b630SMingming Cao  * This routine returns max. credits that needed to insert an extent
2148ee12b630SMingming Cao  * to the extent tree.
2149ee12b630SMingming Cao  * When pass the actual path, the caller should calculate credits
2150ee12b630SMingming Cao  * under i_data_sem.
2151a86c6181SAlex Tomas  */
2152525f4ed8SMingming Cao int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
2153a86c6181SAlex Tomas 						struct ext4_ext_path *path)
2154a86c6181SAlex Tomas {
2155a86c6181SAlex Tomas 	if (path) {
2156ee12b630SMingming Cao 		int depth = ext_depth(inode);
2157f3bd1f3fSMingming Cao 		int ret = 0;
2158ee12b630SMingming Cao 
2159a86c6181SAlex Tomas 		/* probably there is space in leaf? */
2160a86c6181SAlex Tomas 		if (le16_to_cpu(path[depth].p_hdr->eh_entries)
2161ee12b630SMingming Cao 				< le16_to_cpu(path[depth].p_hdr->eh_max)) {
2162ee12b630SMingming Cao 
2163ee12b630SMingming Cao 			/*
2164ee12b630SMingming Cao 			 *  There are some space in the leaf tree, no
2165ee12b630SMingming Cao 			 *  need to account for leaf block credit
2166ee12b630SMingming Cao 			 *
2167ee12b630SMingming Cao 			 *  bitmaps and block group descriptor blocks
2168ee12b630SMingming Cao 			 *  and other metadat blocks still need to be
2169ee12b630SMingming Cao 			 *  accounted.
2170ee12b630SMingming Cao 			 */
2171525f4ed8SMingming Cao 			/* 1 bitmap, 1 block group descriptor */
2172ee12b630SMingming Cao 			ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
21735887e98bSAneesh Kumar K.V 			return ret;
2174ee12b630SMingming Cao 		}
2175ee12b630SMingming Cao 	}
2176ee12b630SMingming Cao 
2177525f4ed8SMingming Cao 	return ext4_chunk_trans_blocks(inode, nrblocks);
2178a86c6181SAlex Tomas }
2179a86c6181SAlex Tomas 
2180a86c6181SAlex Tomas /*
2181ee12b630SMingming Cao  * How many index/leaf blocks need to change/allocate to modify nrblocks?
2182ee12b630SMingming Cao  *
2183ee12b630SMingming Cao  * if nrblocks are fit in a single extent (chunk flag is 1), then
2184ee12b630SMingming Cao  * in the worse case, each tree level index/leaf need to be changed
2185ee12b630SMingming Cao  * if the tree split due to insert a new extent, then the old tree
2186ee12b630SMingming Cao  * index/leaf need to be updated too
2187ee12b630SMingming Cao  *
2188ee12b630SMingming Cao  * If the nrblocks are discontiguous, they could cause
2189ee12b630SMingming Cao  * the whole tree split more than once, but this is really rare.
2190a86c6181SAlex Tomas  */
2191525f4ed8SMingming Cao int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
2192ee12b630SMingming Cao {
2193ee12b630SMingming Cao 	int index;
2194ee12b630SMingming Cao 	int depth = ext_depth(inode);
2195a86c6181SAlex Tomas 
2196ee12b630SMingming Cao 	if (chunk)
2197ee12b630SMingming Cao 		index = depth * 2;
2198ee12b630SMingming Cao 	else
2199ee12b630SMingming Cao 		index = depth * 3;
2200a86c6181SAlex Tomas 
2201ee12b630SMingming Cao 	return index;
2202a86c6181SAlex Tomas }
2203a86c6181SAlex Tomas 
2204a86c6181SAlex Tomas static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2205a86c6181SAlex Tomas 				struct ext4_extent *ex,
2206725d26d3SAneesh Kumar K.V 				ext4_lblk_t from, ext4_lblk_t to)
2207a86c6181SAlex Tomas {
2208a2df2a63SAmit Arora 	unsigned short ee_len =  ext4_ext_get_actual_len(ex);
2209e6362609STheodore Ts'o 	int flags = EXT4_FREE_BLOCKS_FORGET;
2210a86c6181SAlex Tomas 
2211c9de560dSAlex Tomas 	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2212e6362609STheodore Ts'o 		flags |= EXT4_FREE_BLOCKS_METADATA;
2213a86c6181SAlex Tomas #ifdef EXTENTS_STATS
2214a86c6181SAlex Tomas 	{
2215a86c6181SAlex Tomas 		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2216a86c6181SAlex Tomas 		spin_lock(&sbi->s_ext_stats_lock);
2217a86c6181SAlex Tomas 		sbi->s_ext_blocks += ee_len;
2218a86c6181SAlex Tomas 		sbi->s_ext_extents++;
2219a86c6181SAlex Tomas 		if (ee_len < sbi->s_ext_min)
2220a86c6181SAlex Tomas 			sbi->s_ext_min = ee_len;
2221a86c6181SAlex Tomas 		if (ee_len > sbi->s_ext_max)
2222a86c6181SAlex Tomas 			sbi->s_ext_max = ee_len;
2223a86c6181SAlex Tomas 		if (ext_depth(inode) > sbi->s_depth_max)
2224a86c6181SAlex Tomas 			sbi->s_depth_max = ext_depth(inode);
2225a86c6181SAlex Tomas 		spin_unlock(&sbi->s_ext_stats_lock);
2226a86c6181SAlex Tomas 	}
2227a86c6181SAlex Tomas #endif
2228a86c6181SAlex Tomas 	if (from >= le32_to_cpu(ex->ee_block)
2229a2df2a63SAmit Arora 	    && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
2230a86c6181SAlex Tomas 		/* tail removal */
2231725d26d3SAneesh Kumar K.V 		ext4_lblk_t num;
2232f65e6fbaSAlex Tomas 		ext4_fsblk_t start;
2233725d26d3SAneesh Kumar K.V 
2234a2df2a63SAmit Arora 		num = le32_to_cpu(ex->ee_block) + ee_len - from;
2235bf89d16fSTheodore Ts'o 		start = ext4_ext_pblock(ex) + ee_len - num;
2236725d26d3SAneesh Kumar K.V 		ext_debug("free last %u blocks starting %llu\n", num, start);
22377dc57615SPeter Huewe 		ext4_free_blocks(handle, inode, NULL, start, num, flags);
2238a86c6181SAlex Tomas 	} else if (from == le32_to_cpu(ex->ee_block)
2239a2df2a63SAmit Arora 		   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
2240d583fb87SAllison Henderson 		/* head removal */
2241d583fb87SAllison Henderson 		ext4_lblk_t num;
2242d583fb87SAllison Henderson 		ext4_fsblk_t start;
2243d583fb87SAllison Henderson 
2244d583fb87SAllison Henderson 		num = to - from;
2245d583fb87SAllison Henderson 		start = ext4_ext_pblock(ex);
2246d583fb87SAllison Henderson 
2247d583fb87SAllison Henderson 		ext_debug("free first %u blocks starting %llu\n", num, start);
2248d583fb87SAllison Henderson 		ext4_free_blocks(handle, inode, 0, start, num, flags);
2249d583fb87SAllison Henderson 
2250a86c6181SAlex Tomas 	} else {
2251725d26d3SAneesh Kumar K.V 		printk(KERN_INFO "strange request: removal(2) "
2252725d26d3SAneesh Kumar K.V 				"%u-%u from %u:%u\n",
2253a2df2a63SAmit Arora 				from, to, le32_to_cpu(ex->ee_block), ee_len);
2254a86c6181SAlex Tomas 	}
2255a86c6181SAlex Tomas 	return 0;
2256a86c6181SAlex Tomas }
2257a86c6181SAlex Tomas 
2258d583fb87SAllison Henderson 
2259d583fb87SAllison Henderson /*
2260d583fb87SAllison Henderson  * ext4_ext_rm_leaf() Removes the extents associated with the
2261d583fb87SAllison Henderson  * blocks appearing between "start" and "end", and splits the extents
2262d583fb87SAllison Henderson  * if "start" and "end" appear in the same extent
2263d583fb87SAllison Henderson  *
2264d583fb87SAllison Henderson  * @handle: The journal handle
2265d583fb87SAllison Henderson  * @inode:  The files inode
2266d583fb87SAllison Henderson  * @path:   The path to the leaf
2267d583fb87SAllison Henderson  * @start:  The first block to remove
2268d583fb87SAllison Henderson  * @end:   The last block to remove
2269d583fb87SAllison Henderson  */
2270a86c6181SAlex Tomas static int
2271a86c6181SAlex Tomas ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2272d583fb87SAllison Henderson 		struct ext4_ext_path *path, ext4_lblk_t start,
2273d583fb87SAllison Henderson 		ext4_lblk_t end)
2274a86c6181SAlex Tomas {
2275a86c6181SAlex Tomas 	int err = 0, correct_index = 0;
2276a86c6181SAlex Tomas 	int depth = ext_depth(inode), credits;
2277a86c6181SAlex Tomas 	struct ext4_extent_header *eh;
2278725d26d3SAneesh Kumar K.V 	ext4_lblk_t a, b, block;
2279725d26d3SAneesh Kumar K.V 	unsigned num;
2280725d26d3SAneesh Kumar K.V 	ext4_lblk_t ex_ee_block;
2281a86c6181SAlex Tomas 	unsigned short ex_ee_len;
2282a2df2a63SAmit Arora 	unsigned uninitialized = 0;
2283a86c6181SAlex Tomas 	struct ext4_extent *ex;
2284d583fb87SAllison Henderson 	struct ext4_map_blocks map;
2285a86c6181SAlex Tomas 
2286c29c0ae7SAlex Tomas 	/* the header must be checked already in ext4_ext_remove_space() */
2287725d26d3SAneesh Kumar K.V 	ext_debug("truncate since %u in leaf\n", start);
2288a86c6181SAlex Tomas 	if (!path[depth].p_hdr)
2289a86c6181SAlex Tomas 		path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
2290a86c6181SAlex Tomas 	eh = path[depth].p_hdr;
2291273df556SFrank Mayhar 	if (unlikely(path[depth].p_hdr == NULL)) {
2292273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
2293273df556SFrank Mayhar 		return -EIO;
2294273df556SFrank Mayhar 	}
2295a86c6181SAlex Tomas 	/* find where to start removing */
2296a86c6181SAlex Tomas 	ex = EXT_LAST_EXTENT(eh);
2297a86c6181SAlex Tomas 
2298a86c6181SAlex Tomas 	ex_ee_block = le32_to_cpu(ex->ee_block);
2299a2df2a63SAmit Arora 	ex_ee_len = ext4_ext_get_actual_len(ex);
2300a86c6181SAlex Tomas 
2301a86c6181SAlex Tomas 	while (ex >= EXT_FIRST_EXTENT(eh) &&
2302a86c6181SAlex Tomas 			ex_ee_block + ex_ee_len > start) {
2303a41f2071SAneesh Kumar K.V 
2304a41f2071SAneesh Kumar K.V 		if (ext4_ext_is_uninitialized(ex))
2305a41f2071SAneesh Kumar K.V 			uninitialized = 1;
2306a41f2071SAneesh Kumar K.V 		else
2307a41f2071SAneesh Kumar K.V 			uninitialized = 0;
2308a41f2071SAneesh Kumar K.V 
2309553f9008SMingming 		ext_debug("remove ext %u:[%d]%d\n", ex_ee_block,
2310553f9008SMingming 			 uninitialized, ex_ee_len);
2311a86c6181SAlex Tomas 		path[depth].p_ext = ex;
2312a86c6181SAlex Tomas 
2313a86c6181SAlex Tomas 		a = ex_ee_block > start ? ex_ee_block : start;
2314d583fb87SAllison Henderson 		b = ex_ee_block+ex_ee_len - 1 < end ?
2315d583fb87SAllison Henderson 			ex_ee_block+ex_ee_len - 1 : end;
2316a86c6181SAlex Tomas 
2317a86c6181SAlex Tomas 		ext_debug("  border %u:%u\n", a, b);
2318a86c6181SAlex Tomas 
2319d583fb87SAllison Henderson 		/* If this extent is beyond the end of the hole, skip it */
2320d583fb87SAllison Henderson 		if (end <= ex_ee_block) {
2321d583fb87SAllison Henderson 			ex--;
2322d583fb87SAllison Henderson 			ex_ee_block = le32_to_cpu(ex->ee_block);
2323d583fb87SAllison Henderson 			ex_ee_len = ext4_ext_get_actual_len(ex);
2324d583fb87SAllison Henderson 			continue;
2325d583fb87SAllison Henderson 		} else if (a != ex_ee_block &&
2326d583fb87SAllison Henderson 			b != ex_ee_block + ex_ee_len - 1) {
2327d583fb87SAllison Henderson 			/*
2328d583fb87SAllison Henderson 			 * If this is a truncate, then this condition should
2329d583fb87SAllison Henderson 			 * never happen because at least one of the end points
2330d583fb87SAllison Henderson 			 * needs to be on the edge of the extent.
2331d583fb87SAllison Henderson 			 */
2332f17722f9SLukas Czerner 			if (end == EXT_MAX_BLOCKS - 1) {
2333d583fb87SAllison Henderson 				ext_debug("  bad truncate %u:%u\n",
2334d583fb87SAllison Henderson 						start, end);
2335a86c6181SAlex Tomas 				block = 0;
2336a86c6181SAlex Tomas 				num = 0;
2337d583fb87SAllison Henderson 				err = -EIO;
2338d583fb87SAllison Henderson 				goto out;
2339d583fb87SAllison Henderson 			}
2340d583fb87SAllison Henderson 			/*
2341d583fb87SAllison Henderson 			 * else this is a hole punch, so the extent needs to
2342d583fb87SAllison Henderson 			 * be split since neither edge of the hole is on the
2343d583fb87SAllison Henderson 			 * extent edge
2344d583fb87SAllison Henderson 			 */
2345d583fb87SAllison Henderson 			else{
2346d583fb87SAllison Henderson 				map.m_pblk = ext4_ext_pblock(ex);
2347d583fb87SAllison Henderson 				map.m_lblk = ex_ee_block;
2348d583fb87SAllison Henderson 				map.m_len = b - ex_ee_block;
2349d583fb87SAllison Henderson 
2350d583fb87SAllison Henderson 				err = ext4_split_extent(handle,
2351d583fb87SAllison Henderson 					inode, path, &map, 0,
2352d583fb87SAllison Henderson 					EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
2353d583fb87SAllison Henderson 					EXT4_GET_BLOCKS_PRE_IO);
2354d583fb87SAllison Henderson 
2355d583fb87SAllison Henderson 				if (err < 0)
2356d583fb87SAllison Henderson 					goto out;
2357d583fb87SAllison Henderson 
2358d583fb87SAllison Henderson 				ex_ee_len = ext4_ext_get_actual_len(ex);
2359d583fb87SAllison Henderson 
2360d583fb87SAllison Henderson 				b = ex_ee_block+ex_ee_len - 1 < end ?
2361d583fb87SAllison Henderson 					ex_ee_block+ex_ee_len - 1 : end;
2362d583fb87SAllison Henderson 
2363d583fb87SAllison Henderson 				/* Then remove tail of this extent */
2364d583fb87SAllison Henderson 				block = ex_ee_block;
2365d583fb87SAllison Henderson 				num = a - block;
2366d583fb87SAllison Henderson 			}
2367a86c6181SAlex Tomas 		} else if (a != ex_ee_block) {
2368a86c6181SAlex Tomas 			/* remove tail of the extent */
2369a86c6181SAlex Tomas 			block = ex_ee_block;
2370a86c6181SAlex Tomas 			num = a - block;
2371a86c6181SAlex Tomas 		} else if (b != ex_ee_block + ex_ee_len - 1) {
2372a86c6181SAlex Tomas 			/* remove head of the extent */
2373d583fb87SAllison Henderson 			block = b;
2374d583fb87SAllison Henderson 			num =  ex_ee_block + ex_ee_len - b;
2375d583fb87SAllison Henderson 
2376d583fb87SAllison Henderson 			/*
2377d583fb87SAllison Henderson 			 * If this is a truncate, this condition
2378d583fb87SAllison Henderson 			 * should never happen
2379d583fb87SAllison Henderson 			 */
2380f17722f9SLukas Czerner 			if (end == EXT_MAX_BLOCKS - 1) {
2381d583fb87SAllison Henderson 				ext_debug("  bad truncate %u:%u\n",
2382d583fb87SAllison Henderson 					start, end);
2383d583fb87SAllison Henderson 				err = -EIO;
2384d583fb87SAllison Henderson 				goto out;
2385d583fb87SAllison Henderson 			}
2386a86c6181SAlex Tomas 		} else {
2387a86c6181SAlex Tomas 			/* remove whole extent: excellent! */
2388a86c6181SAlex Tomas 			block = ex_ee_block;
2389a86c6181SAlex Tomas 			num = 0;
2390d583fb87SAllison Henderson 			if (a != ex_ee_block) {
2391d583fb87SAllison Henderson 				ext_debug("  bad truncate %u:%u\n",
2392d583fb87SAllison Henderson 					start, end);
2393d583fb87SAllison Henderson 				err = -EIO;
2394d583fb87SAllison Henderson 				goto out;
2395d583fb87SAllison Henderson 			}
2396d583fb87SAllison Henderson 
2397d583fb87SAllison Henderson 			if (b != ex_ee_block + ex_ee_len - 1) {
2398d583fb87SAllison Henderson 				ext_debug("  bad truncate %u:%u\n",
2399d583fb87SAllison Henderson 					start, end);
2400d583fb87SAllison Henderson 				err = -EIO;
2401d583fb87SAllison Henderson 				goto out;
2402d583fb87SAllison Henderson 			}
2403a86c6181SAlex Tomas 		}
2404a86c6181SAlex Tomas 
240534071da7STheodore Ts'o 		/*
240634071da7STheodore Ts'o 		 * 3 for leaf, sb, and inode plus 2 (bmap and group
240734071da7STheodore Ts'o 		 * descriptor) for each block group; assume two block
240834071da7STheodore Ts'o 		 * groups plus ex_ee_len/blocks_per_block_group for
240934071da7STheodore Ts'o 		 * the worst case
241034071da7STheodore Ts'o 		 */
241134071da7STheodore Ts'o 		credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
2412a86c6181SAlex Tomas 		if (ex == EXT_FIRST_EXTENT(eh)) {
2413a86c6181SAlex Tomas 			correct_index = 1;
2414a86c6181SAlex Tomas 			credits += (ext_depth(inode)) + 1;
2415a86c6181SAlex Tomas 		}
24165aca07ebSDmitry Monakhov 		credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
2417a86c6181SAlex Tomas 
2418487caeefSJan Kara 		err = ext4_ext_truncate_extend_restart(handle, inode, credits);
24199102e4faSShen Feng 		if (err)
2420a86c6181SAlex Tomas 			goto out;
2421a86c6181SAlex Tomas 
2422a86c6181SAlex Tomas 		err = ext4_ext_get_access(handle, inode, path + depth);
2423a86c6181SAlex Tomas 		if (err)
2424a86c6181SAlex Tomas 			goto out;
2425a86c6181SAlex Tomas 
2426a86c6181SAlex Tomas 		err = ext4_remove_blocks(handle, inode, ex, a, b);
2427a86c6181SAlex Tomas 		if (err)
2428a86c6181SAlex Tomas 			goto out;
2429a86c6181SAlex Tomas 
2430a86c6181SAlex Tomas 		if (num == 0) {
2431d0d856e8SRandy Dunlap 			/* this extent is removed; mark slot entirely unused */
2432f65e6fbaSAlex Tomas 			ext4_ext_store_pblock(ex, 0);
2433d583fb87SAllison Henderson 		} else if (block != ex_ee_block) {
2434d583fb87SAllison Henderson 			/*
2435d583fb87SAllison Henderson 			 * If this was a head removal, then we need to update
2436d583fb87SAllison Henderson 			 * the physical block since it is now at a different
2437d583fb87SAllison Henderson 			 * location
2438d583fb87SAllison Henderson 			 */
2439d583fb87SAllison Henderson 			ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a));
2440a86c6181SAlex Tomas 		}
2441a86c6181SAlex Tomas 
2442a86c6181SAlex Tomas 		ex->ee_block = cpu_to_le32(block);
2443a86c6181SAlex Tomas 		ex->ee_len = cpu_to_le16(num);
2444749269faSAmit Arora 		/*
2445749269faSAmit Arora 		 * Do not mark uninitialized if all the blocks in the
2446749269faSAmit Arora 		 * extent have been removed.
2447749269faSAmit Arora 		 */
2448749269faSAmit Arora 		if (uninitialized && num)
2449a2df2a63SAmit Arora 			ext4_ext_mark_uninitialized(ex);
2450a86c6181SAlex Tomas 
2451a86c6181SAlex Tomas 		err = ext4_ext_dirty(handle, inode, path + depth);
2452a86c6181SAlex Tomas 		if (err)
2453a86c6181SAlex Tomas 			goto out;
2454a86c6181SAlex Tomas 
2455d583fb87SAllison Henderson 		/*
2456d583fb87SAllison Henderson 		 * If the extent was completely released,
2457d583fb87SAllison Henderson 		 * we need to remove it from the leaf
2458d583fb87SAllison Henderson 		 */
2459d583fb87SAllison Henderson 		if (num == 0) {
2460f17722f9SLukas Czerner 			if (end != EXT_MAX_BLOCKS - 1) {
2461d583fb87SAllison Henderson 				/*
2462d583fb87SAllison Henderson 				 * For hole punching, we need to scoot all the
2463d583fb87SAllison Henderson 				 * extents up when an extent is removed so that
2464d583fb87SAllison Henderson 				 * we dont have blank extents in the middle
2465d583fb87SAllison Henderson 				 */
2466d583fb87SAllison Henderson 				memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) *
2467d583fb87SAllison Henderson 					sizeof(struct ext4_extent));
2468d583fb87SAllison Henderson 
2469d583fb87SAllison Henderson 				/* Now get rid of the one at the end */
2470d583fb87SAllison Henderson 				memset(EXT_LAST_EXTENT(eh), 0,
2471d583fb87SAllison Henderson 					sizeof(struct ext4_extent));
2472d583fb87SAllison Henderson 			}
2473d583fb87SAllison Henderson 			le16_add_cpu(&eh->eh_entries, -1);
2474d583fb87SAllison Henderson 		}
2475d583fb87SAllison Henderson 
24762ae02107SMingming Cao 		ext_debug("new extent: %u:%u:%llu\n", block, num,
2477bf89d16fSTheodore Ts'o 				ext4_ext_pblock(ex));
2478a86c6181SAlex Tomas 		ex--;
2479a86c6181SAlex Tomas 		ex_ee_block = le32_to_cpu(ex->ee_block);
2480a2df2a63SAmit Arora 		ex_ee_len = ext4_ext_get_actual_len(ex);
2481a86c6181SAlex Tomas 	}
2482a86c6181SAlex Tomas 
2483a86c6181SAlex Tomas 	if (correct_index && eh->eh_entries)
2484a86c6181SAlex Tomas 		err = ext4_ext_correct_indexes(handle, inode, path);
2485a86c6181SAlex Tomas 
2486a86c6181SAlex Tomas 	/* if this leaf is free, then we should
2487a86c6181SAlex Tomas 	 * remove it from index block above */
2488a86c6181SAlex Tomas 	if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
2489a86c6181SAlex Tomas 		err = ext4_ext_rm_idx(handle, inode, path + depth);
2490a86c6181SAlex Tomas 
2491a86c6181SAlex Tomas out:
2492a86c6181SAlex Tomas 	return err;
2493a86c6181SAlex Tomas }
2494a86c6181SAlex Tomas 
2495a86c6181SAlex Tomas /*
2496d0d856e8SRandy Dunlap  * ext4_ext_more_to_rm:
2497d0d856e8SRandy Dunlap  * returns 1 if current index has to be freed (even partial)
2498a86c6181SAlex Tomas  */
249909b88252SAvantika Mathur static int
2500a86c6181SAlex Tomas ext4_ext_more_to_rm(struct ext4_ext_path *path)
2501a86c6181SAlex Tomas {
2502a86c6181SAlex Tomas 	BUG_ON(path->p_idx == NULL);
2503a86c6181SAlex Tomas 
2504a86c6181SAlex Tomas 	if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
2505a86c6181SAlex Tomas 		return 0;
2506a86c6181SAlex Tomas 
2507a86c6181SAlex Tomas 	/*
2508d0d856e8SRandy Dunlap 	 * if truncate on deeper level happened, it wasn't partial,
2509a86c6181SAlex Tomas 	 * so we have to consider current index for truncation
2510a86c6181SAlex Tomas 	 */
2511a86c6181SAlex Tomas 	if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
2512a86c6181SAlex Tomas 		return 0;
2513a86c6181SAlex Tomas 	return 1;
2514a86c6181SAlex Tomas }
2515a86c6181SAlex Tomas 
2516c6a0371cSAllison Henderson static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
2517a86c6181SAlex Tomas {
2518a86c6181SAlex Tomas 	struct super_block *sb = inode->i_sb;
2519a86c6181SAlex Tomas 	int depth = ext_depth(inode);
2520a86c6181SAlex Tomas 	struct ext4_ext_path *path;
2521a86c6181SAlex Tomas 	handle_t *handle;
25220617b83fSDmitry Monakhov 	int i, err;
2523a86c6181SAlex Tomas 
2524725d26d3SAneesh Kumar K.V 	ext_debug("truncate since %u\n", start);
2525a86c6181SAlex Tomas 
2526a86c6181SAlex Tomas 	/* probably first extent we're gonna free will be last in block */
2527a86c6181SAlex Tomas 	handle = ext4_journal_start(inode, depth + 1);
2528a86c6181SAlex Tomas 	if (IS_ERR(handle))
2529a86c6181SAlex Tomas 		return PTR_ERR(handle);
2530a86c6181SAlex Tomas 
25310617b83fSDmitry Monakhov again:
2532a86c6181SAlex Tomas 	ext4_ext_invalidate_cache(inode);
2533a86c6181SAlex Tomas 
2534a86c6181SAlex Tomas 	/*
2535d0d856e8SRandy Dunlap 	 * We start scanning from right side, freeing all the blocks
2536d0d856e8SRandy Dunlap 	 * after i_size and walking into the tree depth-wise.
2537a86c6181SAlex Tomas 	 */
25380617b83fSDmitry Monakhov 	depth = ext_depth(inode);
2539216553c4SJosef Bacik 	path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
2540a86c6181SAlex Tomas 	if (path == NULL) {
2541a86c6181SAlex Tomas 		ext4_journal_stop(handle);
2542a86c6181SAlex Tomas 		return -ENOMEM;
2543a86c6181SAlex Tomas 	}
25440617b83fSDmitry Monakhov 	path[0].p_depth = depth;
2545a86c6181SAlex Tomas 	path[0].p_hdr = ext_inode_hdr(inode);
254656b19868SAneesh Kumar K.V 	if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
2547a86c6181SAlex Tomas 		err = -EIO;
2548a86c6181SAlex Tomas 		goto out;
2549a86c6181SAlex Tomas 	}
25500617b83fSDmitry Monakhov 	i = err = 0;
2551a86c6181SAlex Tomas 
2552a86c6181SAlex Tomas 	while (i >= 0 && err == 0) {
2553a86c6181SAlex Tomas 		if (i == depth) {
2554a86c6181SAlex Tomas 			/* this is leaf block */
2555d583fb87SAllison Henderson 			err = ext4_ext_rm_leaf(handle, inode, path,
2556c6a0371cSAllison Henderson 					start, EXT_MAX_BLOCKS - 1);
2557d0d856e8SRandy Dunlap 			/* root level has p_bh == NULL, brelse() eats this */
2558a86c6181SAlex Tomas 			brelse(path[i].p_bh);
2559a86c6181SAlex Tomas 			path[i].p_bh = NULL;
2560a86c6181SAlex Tomas 			i--;
2561a86c6181SAlex Tomas 			continue;
2562a86c6181SAlex Tomas 		}
2563a86c6181SAlex Tomas 
2564a86c6181SAlex Tomas 		/* this is index block */
2565a86c6181SAlex Tomas 		if (!path[i].p_hdr) {
2566a86c6181SAlex Tomas 			ext_debug("initialize header\n");
2567a86c6181SAlex Tomas 			path[i].p_hdr = ext_block_hdr(path[i].p_bh);
2568a86c6181SAlex Tomas 		}
2569a86c6181SAlex Tomas 
2570a86c6181SAlex Tomas 		if (!path[i].p_idx) {
2571d0d856e8SRandy Dunlap 			/* this level hasn't been touched yet */
2572a86c6181SAlex Tomas 			path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
2573a86c6181SAlex Tomas 			path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
2574a86c6181SAlex Tomas 			ext_debug("init index ptr: hdr 0x%p, num %d\n",
2575a86c6181SAlex Tomas 				  path[i].p_hdr,
2576a86c6181SAlex Tomas 				  le16_to_cpu(path[i].p_hdr->eh_entries));
2577a86c6181SAlex Tomas 		} else {
2578d0d856e8SRandy Dunlap 			/* we were already here, see at next index */
2579a86c6181SAlex Tomas 			path[i].p_idx--;
2580a86c6181SAlex Tomas 		}
2581a86c6181SAlex Tomas 
2582a86c6181SAlex Tomas 		ext_debug("level %d - index, first 0x%p, cur 0x%p\n",
2583a86c6181SAlex Tomas 				i, EXT_FIRST_INDEX(path[i].p_hdr),
2584a86c6181SAlex Tomas 				path[i].p_idx);
2585a86c6181SAlex Tomas 		if (ext4_ext_more_to_rm(path + i)) {
2586c29c0ae7SAlex Tomas 			struct buffer_head *bh;
2587a86c6181SAlex Tomas 			/* go to the next level */
25882ae02107SMingming Cao 			ext_debug("move to level %d (block %llu)\n",
2589bf89d16fSTheodore Ts'o 				  i + 1, ext4_idx_pblock(path[i].p_idx));
2590a86c6181SAlex Tomas 			memset(path + i + 1, 0, sizeof(*path));
2591bf89d16fSTheodore Ts'o 			bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx));
2592c29c0ae7SAlex Tomas 			if (!bh) {
2593a86c6181SAlex Tomas 				/* should we reset i_size? */
2594a86c6181SAlex Tomas 				err = -EIO;
2595a86c6181SAlex Tomas 				break;
2596a86c6181SAlex Tomas 			}
2597c29c0ae7SAlex Tomas 			if (WARN_ON(i + 1 > depth)) {
2598c29c0ae7SAlex Tomas 				err = -EIO;
2599c29c0ae7SAlex Tomas 				break;
2600c29c0ae7SAlex Tomas 			}
260156b19868SAneesh Kumar K.V 			if (ext4_ext_check(inode, ext_block_hdr(bh),
2602c29c0ae7SAlex Tomas 							depth - i - 1)) {
2603c29c0ae7SAlex Tomas 				err = -EIO;
2604c29c0ae7SAlex Tomas 				break;
2605c29c0ae7SAlex Tomas 			}
2606c29c0ae7SAlex Tomas 			path[i + 1].p_bh = bh;
2607a86c6181SAlex Tomas 
2608d0d856e8SRandy Dunlap 			/* save actual number of indexes since this
2609d0d856e8SRandy Dunlap 			 * number is changed at the next iteration */
2610a86c6181SAlex Tomas 			path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
2611a86c6181SAlex Tomas 			i++;
2612a86c6181SAlex Tomas 		} else {
2613d0d856e8SRandy Dunlap 			/* we finished processing this index, go up */
2614a86c6181SAlex Tomas 			if (path[i].p_hdr->eh_entries == 0 && i > 0) {
2615d0d856e8SRandy Dunlap 				/* index is empty, remove it;
2616a86c6181SAlex Tomas 				 * handle must be already prepared by the
2617a86c6181SAlex Tomas 				 * truncatei_leaf() */
2618a86c6181SAlex Tomas 				err = ext4_ext_rm_idx(handle, inode, path + i);
2619a86c6181SAlex Tomas 			}
2620d0d856e8SRandy Dunlap 			/* root level has p_bh == NULL, brelse() eats this */
2621a86c6181SAlex Tomas 			brelse(path[i].p_bh);
2622a86c6181SAlex Tomas 			path[i].p_bh = NULL;
2623a86c6181SAlex Tomas 			i--;
2624a86c6181SAlex Tomas 			ext_debug("return to level %d\n", i);
2625a86c6181SAlex Tomas 		}
2626a86c6181SAlex Tomas 	}
2627a86c6181SAlex Tomas 
2628a86c6181SAlex Tomas 	/* TODO: flexible tree reduction should be here */
2629a86c6181SAlex Tomas 	if (path->p_hdr->eh_entries == 0) {
2630a86c6181SAlex Tomas 		/*
2631d0d856e8SRandy Dunlap 		 * truncate to zero freed all the tree,
2632d0d856e8SRandy Dunlap 		 * so we need to correct eh_depth
2633a86c6181SAlex Tomas 		 */
2634a86c6181SAlex Tomas 		err = ext4_ext_get_access(handle, inode, path);
2635a86c6181SAlex Tomas 		if (err == 0) {
2636a86c6181SAlex Tomas 			ext_inode_hdr(inode)->eh_depth = 0;
2637a86c6181SAlex Tomas 			ext_inode_hdr(inode)->eh_max =
263855ad63bfSTheodore Ts'o 				cpu_to_le16(ext4_ext_space_root(inode, 0));
2639a86c6181SAlex Tomas 			err = ext4_ext_dirty(handle, inode, path);
2640a86c6181SAlex Tomas 		}
2641a86c6181SAlex Tomas 	}
2642a86c6181SAlex Tomas out:
2643a86c6181SAlex Tomas 	ext4_ext_drop_refs(path);
2644a86c6181SAlex Tomas 	kfree(path);
26450617b83fSDmitry Monakhov 	if (err == -EAGAIN)
26460617b83fSDmitry Monakhov 		goto again;
2647a86c6181SAlex Tomas 	ext4_journal_stop(handle);
2648a86c6181SAlex Tomas 
2649a86c6181SAlex Tomas 	return err;
2650a86c6181SAlex Tomas }
2651a86c6181SAlex Tomas 
2652a86c6181SAlex Tomas /*
2653a86c6181SAlex Tomas  * called at mount time
2654a86c6181SAlex Tomas  */
2655a86c6181SAlex Tomas void ext4_ext_init(struct super_block *sb)
2656a86c6181SAlex Tomas {
2657a86c6181SAlex Tomas 	/*
2658a86c6181SAlex Tomas 	 * possible initialization would be here
2659a86c6181SAlex Tomas 	 */
2660a86c6181SAlex Tomas 
266183982b6fSTheodore Ts'o 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
266290576c0bSTheodore Ts'o #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
26634776004fSTheodore Ts'o 		printk(KERN_INFO "EXT4-fs: file extents enabled");
2664bbf2f9fbSRobert P. J. Day #ifdef AGGRESSIVE_TEST
2665bbf2f9fbSRobert P. J. Day 		printk(", aggressive tests");
2666a86c6181SAlex Tomas #endif
2667a86c6181SAlex Tomas #ifdef CHECK_BINSEARCH
2668a86c6181SAlex Tomas 		printk(", check binsearch");
2669a86c6181SAlex Tomas #endif
2670a86c6181SAlex Tomas #ifdef EXTENTS_STATS
2671a86c6181SAlex Tomas 		printk(", stats");
2672a86c6181SAlex Tomas #endif
2673a86c6181SAlex Tomas 		printk("\n");
267490576c0bSTheodore Ts'o #endif
2675a86c6181SAlex Tomas #ifdef EXTENTS_STATS
2676a86c6181SAlex Tomas 		spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
2677a86c6181SAlex Tomas 		EXT4_SB(sb)->s_ext_min = 1 << 30;
2678a86c6181SAlex Tomas 		EXT4_SB(sb)->s_ext_max = 0;
2679a86c6181SAlex Tomas #endif
2680a86c6181SAlex Tomas 	}
2681a86c6181SAlex Tomas }
2682a86c6181SAlex Tomas 
2683a86c6181SAlex Tomas /*
2684a86c6181SAlex Tomas  * called at umount time
2685a86c6181SAlex Tomas  */
2686a86c6181SAlex Tomas void ext4_ext_release(struct super_block *sb)
2687a86c6181SAlex Tomas {
268883982b6fSTheodore Ts'o 	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
2689a86c6181SAlex Tomas 		return;
2690a86c6181SAlex Tomas 
2691a86c6181SAlex Tomas #ifdef EXTENTS_STATS
2692a86c6181SAlex Tomas 	if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) {
2693a86c6181SAlex Tomas 		struct ext4_sb_info *sbi = EXT4_SB(sb);
2694a86c6181SAlex Tomas 		printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n",
2695a86c6181SAlex Tomas 			sbi->s_ext_blocks, sbi->s_ext_extents,
2696a86c6181SAlex Tomas 			sbi->s_ext_blocks / sbi->s_ext_extents);
2697a86c6181SAlex Tomas 		printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n",
2698a86c6181SAlex Tomas 			sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max);
2699a86c6181SAlex Tomas 	}
2700a86c6181SAlex Tomas #endif
2701a86c6181SAlex Tomas }
2702a86c6181SAlex Tomas 
2703093a088bSAneesh Kumar K.V /* FIXME!! we need to try to merge to left or right after zero-out  */
2704093a088bSAneesh Kumar K.V static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2705093a088bSAneesh Kumar K.V {
27062407518dSLukas Czerner 	ext4_fsblk_t ee_pblock;
27072407518dSLukas Czerner 	unsigned int ee_len;
2708b720303dSJing Zhang 	int ret;
2709093a088bSAneesh Kumar K.V 
2710093a088bSAneesh Kumar K.V 	ee_len    = ext4_ext_get_actual_len(ex);
2711bf89d16fSTheodore Ts'o 	ee_pblock = ext4_ext_pblock(ex);
2712093a088bSAneesh Kumar K.V 
2713a107e5a3STheodore Ts'o 	ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
27142407518dSLukas Czerner 	if (ret > 0)
27152407518dSLukas Czerner 		ret = 0;
2716093a088bSAneesh Kumar K.V 
27172407518dSLukas Czerner 	return ret;
2718093a088bSAneesh Kumar K.V }
2719093a088bSAneesh Kumar K.V 
272047ea3bb5SYongqiang Yang /*
272147ea3bb5SYongqiang Yang  * used by extent splitting.
272247ea3bb5SYongqiang Yang  */
272347ea3bb5SYongqiang Yang #define EXT4_EXT_MAY_ZEROOUT	0x1  /* safe to zeroout if split fails \
272447ea3bb5SYongqiang Yang 					due to ENOSPC */
272547ea3bb5SYongqiang Yang #define EXT4_EXT_MARK_UNINIT1	0x2  /* mark first half uninitialized */
272647ea3bb5SYongqiang Yang #define EXT4_EXT_MARK_UNINIT2	0x4  /* mark second half uninitialized */
272747ea3bb5SYongqiang Yang 
272847ea3bb5SYongqiang Yang /*
272947ea3bb5SYongqiang Yang  * ext4_split_extent_at() splits an extent at given block.
273047ea3bb5SYongqiang Yang  *
273147ea3bb5SYongqiang Yang  * @handle: the journal handle
273247ea3bb5SYongqiang Yang  * @inode: the file inode
273347ea3bb5SYongqiang Yang  * @path: the path to the extent
273447ea3bb5SYongqiang Yang  * @split: the logical block where the extent is splitted.
273547ea3bb5SYongqiang Yang  * @split_flags: indicates if the extent could be zeroout if split fails, and
273647ea3bb5SYongqiang Yang  *		 the states(init or uninit) of new extents.
273747ea3bb5SYongqiang Yang  * @flags: flags used to insert new extent to extent tree.
273847ea3bb5SYongqiang Yang  *
273947ea3bb5SYongqiang Yang  *
274047ea3bb5SYongqiang Yang  * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
274147ea3bb5SYongqiang Yang  * of which are deterimined by split_flag.
274247ea3bb5SYongqiang Yang  *
274347ea3bb5SYongqiang Yang  * There are two cases:
274447ea3bb5SYongqiang Yang  *  a> the extent are splitted into two extent.
274547ea3bb5SYongqiang Yang  *  b> split is not needed, and just mark the extent.
274647ea3bb5SYongqiang Yang  *
274747ea3bb5SYongqiang Yang  * return 0 on success.
274847ea3bb5SYongqiang Yang  */
274947ea3bb5SYongqiang Yang static int ext4_split_extent_at(handle_t *handle,
275047ea3bb5SYongqiang Yang 			     struct inode *inode,
275147ea3bb5SYongqiang Yang 			     struct ext4_ext_path *path,
275247ea3bb5SYongqiang Yang 			     ext4_lblk_t split,
275347ea3bb5SYongqiang Yang 			     int split_flag,
275447ea3bb5SYongqiang Yang 			     int flags)
275547ea3bb5SYongqiang Yang {
275647ea3bb5SYongqiang Yang 	ext4_fsblk_t newblock;
275747ea3bb5SYongqiang Yang 	ext4_lblk_t ee_block;
275847ea3bb5SYongqiang Yang 	struct ext4_extent *ex, newex, orig_ex;
275947ea3bb5SYongqiang Yang 	struct ext4_extent *ex2 = NULL;
276047ea3bb5SYongqiang Yang 	unsigned int ee_len, depth;
276147ea3bb5SYongqiang Yang 	int err = 0;
276247ea3bb5SYongqiang Yang 
276347ea3bb5SYongqiang Yang 	ext_debug("ext4_split_extents_at: inode %lu, logical"
276447ea3bb5SYongqiang Yang 		"block %llu\n", inode->i_ino, (unsigned long long)split);
276547ea3bb5SYongqiang Yang 
276647ea3bb5SYongqiang Yang 	ext4_ext_show_leaf(inode, path);
276747ea3bb5SYongqiang Yang 
276847ea3bb5SYongqiang Yang 	depth = ext_depth(inode);
276947ea3bb5SYongqiang Yang 	ex = path[depth].p_ext;
277047ea3bb5SYongqiang Yang 	ee_block = le32_to_cpu(ex->ee_block);
277147ea3bb5SYongqiang Yang 	ee_len = ext4_ext_get_actual_len(ex);
277247ea3bb5SYongqiang Yang 	newblock = split - ee_block + ext4_ext_pblock(ex);
277347ea3bb5SYongqiang Yang 
277447ea3bb5SYongqiang Yang 	BUG_ON(split < ee_block || split >= (ee_block + ee_len));
277547ea3bb5SYongqiang Yang 
277647ea3bb5SYongqiang Yang 	err = ext4_ext_get_access(handle, inode, path + depth);
277747ea3bb5SYongqiang Yang 	if (err)
277847ea3bb5SYongqiang Yang 		goto out;
277947ea3bb5SYongqiang Yang 
278047ea3bb5SYongqiang Yang 	if (split == ee_block) {
278147ea3bb5SYongqiang Yang 		/*
278247ea3bb5SYongqiang Yang 		 * case b: block @split is the block that the extent begins with
278347ea3bb5SYongqiang Yang 		 * then we just change the state of the extent, and splitting
278447ea3bb5SYongqiang Yang 		 * is not needed.
278547ea3bb5SYongqiang Yang 		 */
278647ea3bb5SYongqiang Yang 		if (split_flag & EXT4_EXT_MARK_UNINIT2)
278747ea3bb5SYongqiang Yang 			ext4_ext_mark_uninitialized(ex);
278847ea3bb5SYongqiang Yang 		else
278947ea3bb5SYongqiang Yang 			ext4_ext_mark_initialized(ex);
279047ea3bb5SYongqiang Yang 
279147ea3bb5SYongqiang Yang 		if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
279247ea3bb5SYongqiang Yang 			ext4_ext_try_to_merge(inode, path, ex);
279347ea3bb5SYongqiang Yang 
279447ea3bb5SYongqiang Yang 		err = ext4_ext_dirty(handle, inode, path + depth);
279547ea3bb5SYongqiang Yang 		goto out;
279647ea3bb5SYongqiang Yang 	}
279747ea3bb5SYongqiang Yang 
279847ea3bb5SYongqiang Yang 	/* case a */
279947ea3bb5SYongqiang Yang 	memcpy(&orig_ex, ex, sizeof(orig_ex));
280047ea3bb5SYongqiang Yang 	ex->ee_len = cpu_to_le16(split - ee_block);
280147ea3bb5SYongqiang Yang 	if (split_flag & EXT4_EXT_MARK_UNINIT1)
280247ea3bb5SYongqiang Yang 		ext4_ext_mark_uninitialized(ex);
280347ea3bb5SYongqiang Yang 
280447ea3bb5SYongqiang Yang 	/*
280547ea3bb5SYongqiang Yang 	 * path may lead to new leaf, not to original leaf any more
280647ea3bb5SYongqiang Yang 	 * after ext4_ext_insert_extent() returns,
280747ea3bb5SYongqiang Yang 	 */
280847ea3bb5SYongqiang Yang 	err = ext4_ext_dirty(handle, inode, path + depth);
280947ea3bb5SYongqiang Yang 	if (err)
281047ea3bb5SYongqiang Yang 		goto fix_extent_len;
281147ea3bb5SYongqiang Yang 
281247ea3bb5SYongqiang Yang 	ex2 = &newex;
281347ea3bb5SYongqiang Yang 	ex2->ee_block = cpu_to_le32(split);
281447ea3bb5SYongqiang Yang 	ex2->ee_len   = cpu_to_le16(ee_len - (split - ee_block));
281547ea3bb5SYongqiang Yang 	ext4_ext_store_pblock(ex2, newblock);
281647ea3bb5SYongqiang Yang 	if (split_flag & EXT4_EXT_MARK_UNINIT2)
281747ea3bb5SYongqiang Yang 		ext4_ext_mark_uninitialized(ex2);
281847ea3bb5SYongqiang Yang 
281947ea3bb5SYongqiang Yang 	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
282047ea3bb5SYongqiang Yang 	if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
282147ea3bb5SYongqiang Yang 		err = ext4_ext_zeroout(inode, &orig_ex);
282247ea3bb5SYongqiang Yang 		if (err)
282347ea3bb5SYongqiang Yang 			goto fix_extent_len;
282447ea3bb5SYongqiang Yang 		/* update the extent length and mark as initialized */
282547ea3bb5SYongqiang Yang 		ex->ee_len = cpu_to_le32(ee_len);
282647ea3bb5SYongqiang Yang 		ext4_ext_try_to_merge(inode, path, ex);
282747ea3bb5SYongqiang Yang 		err = ext4_ext_dirty(handle, inode, path + depth);
282847ea3bb5SYongqiang Yang 		goto out;
282947ea3bb5SYongqiang Yang 	} else if (err)
283047ea3bb5SYongqiang Yang 		goto fix_extent_len;
283147ea3bb5SYongqiang Yang 
283247ea3bb5SYongqiang Yang out:
283347ea3bb5SYongqiang Yang 	ext4_ext_show_leaf(inode, path);
283447ea3bb5SYongqiang Yang 	return err;
283547ea3bb5SYongqiang Yang 
283647ea3bb5SYongqiang Yang fix_extent_len:
283747ea3bb5SYongqiang Yang 	ex->ee_len = orig_ex.ee_len;
283847ea3bb5SYongqiang Yang 	ext4_ext_dirty(handle, inode, path + depth);
283947ea3bb5SYongqiang Yang 	return err;
284047ea3bb5SYongqiang Yang }
284147ea3bb5SYongqiang Yang 
284247ea3bb5SYongqiang Yang /*
284347ea3bb5SYongqiang Yang  * ext4_split_extents() splits an extent and mark extent which is covered
284447ea3bb5SYongqiang Yang  * by @map as split_flags indicates
284547ea3bb5SYongqiang Yang  *
284647ea3bb5SYongqiang Yang  * It may result in splitting the extent into multiple extents (upto three)
284747ea3bb5SYongqiang Yang  * There are three possibilities:
284847ea3bb5SYongqiang Yang  *   a> There is no split required
284947ea3bb5SYongqiang Yang  *   b> Splits in two extents: Split is happening at either end of the extent
285047ea3bb5SYongqiang Yang  *   c> Splits in three extents: Somone is splitting in middle of the extent
285147ea3bb5SYongqiang Yang  *
285247ea3bb5SYongqiang Yang  */
285347ea3bb5SYongqiang Yang static int ext4_split_extent(handle_t *handle,
285447ea3bb5SYongqiang Yang 			      struct inode *inode,
285547ea3bb5SYongqiang Yang 			      struct ext4_ext_path *path,
285647ea3bb5SYongqiang Yang 			      struct ext4_map_blocks *map,
285747ea3bb5SYongqiang Yang 			      int split_flag,
285847ea3bb5SYongqiang Yang 			      int flags)
285947ea3bb5SYongqiang Yang {
286047ea3bb5SYongqiang Yang 	ext4_lblk_t ee_block;
286147ea3bb5SYongqiang Yang 	struct ext4_extent *ex;
286247ea3bb5SYongqiang Yang 	unsigned int ee_len, depth;
286347ea3bb5SYongqiang Yang 	int err = 0;
286447ea3bb5SYongqiang Yang 	int uninitialized;
286547ea3bb5SYongqiang Yang 	int split_flag1, flags1;
286647ea3bb5SYongqiang Yang 
286747ea3bb5SYongqiang Yang 	depth = ext_depth(inode);
286847ea3bb5SYongqiang Yang 	ex = path[depth].p_ext;
286947ea3bb5SYongqiang Yang 	ee_block = le32_to_cpu(ex->ee_block);
287047ea3bb5SYongqiang Yang 	ee_len = ext4_ext_get_actual_len(ex);
287147ea3bb5SYongqiang Yang 	uninitialized = ext4_ext_is_uninitialized(ex);
287247ea3bb5SYongqiang Yang 
287347ea3bb5SYongqiang Yang 	if (map->m_lblk + map->m_len < ee_block + ee_len) {
287447ea3bb5SYongqiang Yang 		split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
287547ea3bb5SYongqiang Yang 			      EXT4_EXT_MAY_ZEROOUT : 0;
287647ea3bb5SYongqiang Yang 		flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
287747ea3bb5SYongqiang Yang 		if (uninitialized)
287847ea3bb5SYongqiang Yang 			split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
287947ea3bb5SYongqiang Yang 				       EXT4_EXT_MARK_UNINIT2;
288047ea3bb5SYongqiang Yang 		err = ext4_split_extent_at(handle, inode, path,
288147ea3bb5SYongqiang Yang 				map->m_lblk + map->m_len, split_flag1, flags1);
288293917411SYongqiang Yang 		if (err)
288393917411SYongqiang Yang 			goto out;
288447ea3bb5SYongqiang Yang 	}
288547ea3bb5SYongqiang Yang 
288647ea3bb5SYongqiang Yang 	ext4_ext_drop_refs(path);
288747ea3bb5SYongqiang Yang 	path = ext4_ext_find_extent(inode, map->m_lblk, path);
288847ea3bb5SYongqiang Yang 	if (IS_ERR(path))
288947ea3bb5SYongqiang Yang 		return PTR_ERR(path);
289047ea3bb5SYongqiang Yang 
289147ea3bb5SYongqiang Yang 	if (map->m_lblk >= ee_block) {
289247ea3bb5SYongqiang Yang 		split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
289347ea3bb5SYongqiang Yang 			      EXT4_EXT_MAY_ZEROOUT : 0;
289447ea3bb5SYongqiang Yang 		if (uninitialized)
289547ea3bb5SYongqiang Yang 			split_flag1 |= EXT4_EXT_MARK_UNINIT1;
289647ea3bb5SYongqiang Yang 		if (split_flag & EXT4_EXT_MARK_UNINIT2)
289747ea3bb5SYongqiang Yang 			split_flag1 |= EXT4_EXT_MARK_UNINIT2;
289847ea3bb5SYongqiang Yang 		err = ext4_split_extent_at(handle, inode, path,
289947ea3bb5SYongqiang Yang 				map->m_lblk, split_flag1, flags);
290047ea3bb5SYongqiang Yang 		if (err)
290147ea3bb5SYongqiang Yang 			goto out;
290247ea3bb5SYongqiang Yang 	}
290347ea3bb5SYongqiang Yang 
290447ea3bb5SYongqiang Yang 	ext4_ext_show_leaf(inode, path);
290547ea3bb5SYongqiang Yang out:
290647ea3bb5SYongqiang Yang 	return err ? err : map->m_len;
290747ea3bb5SYongqiang Yang }
290847ea3bb5SYongqiang Yang 
29093977c965SAneesh Kumar K.V #define EXT4_EXT_ZERO_LEN 7
291056055d3aSAmit Arora /*
2911e35fd660STheodore Ts'o  * This function is called by ext4_ext_map_blocks() if someone tries to write
291256055d3aSAmit Arora  * to an uninitialized extent. It may result in splitting the uninitialized
291356055d3aSAmit Arora  * extent into multiple extents (up to three - one initialized and two
291456055d3aSAmit Arora  * uninitialized).
291556055d3aSAmit Arora  * There are three possibilities:
291656055d3aSAmit Arora  *   a> There is no split required: Entire extent should be initialized
291756055d3aSAmit Arora  *   b> Splits in two extents: Write is happening at either end of the extent
291856055d3aSAmit Arora  *   c> Splits in three extents: Somone is writing in middle of the extent
291956055d3aSAmit Arora  */
2920725d26d3SAneesh Kumar K.V static int ext4_ext_convert_to_initialized(handle_t *handle,
2921725d26d3SAneesh Kumar K.V 					   struct inode *inode,
2922e35fd660STheodore Ts'o 					   struct ext4_map_blocks *map,
2923e35fd660STheodore Ts'o 					   struct ext4_ext_path *path)
292456055d3aSAmit Arora {
2925667eff35SYongqiang Yang 	struct ext4_map_blocks split_map;
2926667eff35SYongqiang Yang 	struct ext4_extent zero_ex;
2927667eff35SYongqiang Yang 	struct ext4_extent *ex;
292821ca087aSDmitry Monakhov 	ext4_lblk_t ee_block, eof_block;
2929725d26d3SAneesh Kumar K.V 	unsigned int allocated, ee_len, depth;
293056055d3aSAmit Arora 	int err = 0;
2931667eff35SYongqiang Yang 	int split_flag = 0;
293221ca087aSDmitry Monakhov 
293321ca087aSDmitry Monakhov 	ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
293421ca087aSDmitry Monakhov 		"block %llu, max_blocks %u\n", inode->i_ino,
2935e35fd660STheodore Ts'o 		(unsigned long long)map->m_lblk, map->m_len);
293621ca087aSDmitry Monakhov 
293721ca087aSDmitry Monakhov 	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
293821ca087aSDmitry Monakhov 		inode->i_sb->s_blocksize_bits;
2939e35fd660STheodore Ts'o 	if (eof_block < map->m_lblk + map->m_len)
2940e35fd660STheodore Ts'o 		eof_block = map->m_lblk + map->m_len;
294156055d3aSAmit Arora 
294256055d3aSAmit Arora 	depth = ext_depth(inode);
294356055d3aSAmit Arora 	ex = path[depth].p_ext;
294456055d3aSAmit Arora 	ee_block = le32_to_cpu(ex->ee_block);
294556055d3aSAmit Arora 	ee_len = ext4_ext_get_actual_len(ex);
2946e35fd660STheodore Ts'o 	allocated = ee_len - (map->m_lblk - ee_block);
294721ca087aSDmitry Monakhov 
2948667eff35SYongqiang Yang 	WARN_ON(map->m_lblk < ee_block);
294921ca087aSDmitry Monakhov 	/*
295021ca087aSDmitry Monakhov 	 * It is safe to convert extent to initialized via explicit
295121ca087aSDmitry Monakhov 	 * zeroout only if extent is fully insde i_size or new_size.
295221ca087aSDmitry Monakhov 	 */
2953667eff35SYongqiang Yang 	split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
295421ca087aSDmitry Monakhov 
29553977c965SAneesh Kumar K.V 	/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
2956667eff35SYongqiang Yang 	if (ee_len <= 2*EXT4_EXT_ZERO_LEN &&
2957667eff35SYongqiang Yang 	    (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
2958667eff35SYongqiang Yang 		err = ext4_ext_zeroout(inode, ex);
29593977c965SAneesh Kumar K.V 		if (err)
296056055d3aSAmit Arora 			goto out;
29619df5643aSAneesh Kumar K.V 
29629df5643aSAneesh Kumar K.V 		err = ext4_ext_get_access(handle, inode, path + depth);
29639df5643aSAneesh Kumar K.V 		if (err)
29649df5643aSAneesh Kumar K.V 			goto out;
2965667eff35SYongqiang Yang 		ext4_ext_mark_initialized(ex);
2966667eff35SYongqiang Yang 		ext4_ext_try_to_merge(inode, path, ex);
296756055d3aSAmit Arora 		err = ext4_ext_dirty(handle, inode, path + depth);
296856055d3aSAmit Arora 		goto out;
2969667eff35SYongqiang Yang 	}
2970093a088bSAneesh Kumar K.V 
2971667eff35SYongqiang Yang 	/*
2972667eff35SYongqiang Yang 	 * four cases:
2973667eff35SYongqiang Yang 	 * 1. split the extent into three extents.
2974667eff35SYongqiang Yang 	 * 2. split the extent into two extents, zeroout the first half.
2975667eff35SYongqiang Yang 	 * 3. split the extent into two extents, zeroout the second half.
2976667eff35SYongqiang Yang 	 * 4. split the extent into two extents with out zeroout.
2977667eff35SYongqiang Yang 	 */
2978667eff35SYongqiang Yang 	split_map.m_lblk = map->m_lblk;
2979667eff35SYongqiang Yang 	split_map.m_len = map->m_len;
2980667eff35SYongqiang Yang 
2981667eff35SYongqiang Yang 	if (allocated > map->m_len) {
2982667eff35SYongqiang Yang 		if (allocated <= EXT4_EXT_ZERO_LEN &&
2983667eff35SYongqiang Yang 		    (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
2984667eff35SYongqiang Yang 			/* case 3 */
2985667eff35SYongqiang Yang 			zero_ex.ee_block =
29869b940f8eSAllison Henderson 					 cpu_to_le32(map->m_lblk);
29879b940f8eSAllison Henderson 			zero_ex.ee_len = cpu_to_le16(allocated);
2988667eff35SYongqiang Yang 			ext4_ext_store_pblock(&zero_ex,
2989667eff35SYongqiang Yang 				ext4_ext_pblock(ex) + map->m_lblk - ee_block);
2990667eff35SYongqiang Yang 			err = ext4_ext_zeroout(inode, &zero_ex);
2991667eff35SYongqiang Yang 			if (err)
2992667eff35SYongqiang Yang 				goto out;
2993667eff35SYongqiang Yang 			split_map.m_lblk = map->m_lblk;
2994667eff35SYongqiang Yang 			split_map.m_len = allocated;
2995667eff35SYongqiang Yang 		} else if ((map->m_lblk - ee_block + map->m_len <
2996667eff35SYongqiang Yang 			   EXT4_EXT_ZERO_LEN) &&
2997667eff35SYongqiang Yang 			   (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
2998667eff35SYongqiang Yang 			/* case 2 */
2999667eff35SYongqiang Yang 			if (map->m_lblk != ee_block) {
3000667eff35SYongqiang Yang 				zero_ex.ee_block = ex->ee_block;
3001667eff35SYongqiang Yang 				zero_ex.ee_len = cpu_to_le16(map->m_lblk -
3002667eff35SYongqiang Yang 							ee_block);
3003667eff35SYongqiang Yang 				ext4_ext_store_pblock(&zero_ex,
3004667eff35SYongqiang Yang 						      ext4_ext_pblock(ex));
3005667eff35SYongqiang Yang 				err = ext4_ext_zeroout(inode, &zero_ex);
3006667eff35SYongqiang Yang 				if (err)
3007667eff35SYongqiang Yang 					goto out;
3008667eff35SYongqiang Yang 			}
3009667eff35SYongqiang Yang 
3010667eff35SYongqiang Yang 			split_map.m_lblk = ee_block;
30119b940f8eSAllison Henderson 			split_map.m_len = map->m_lblk - ee_block + map->m_len;
30129b940f8eSAllison Henderson 			allocated = map->m_len;
3013667eff35SYongqiang Yang 		}
3014667eff35SYongqiang Yang 	}
3015667eff35SYongqiang Yang 
3016667eff35SYongqiang Yang 	allocated = ext4_split_extent(handle, inode, path,
3017667eff35SYongqiang Yang 				       &split_map, split_flag, 0);
3018667eff35SYongqiang Yang 	if (allocated < 0)
3019667eff35SYongqiang Yang 		err = allocated;
3020667eff35SYongqiang Yang 
3021667eff35SYongqiang Yang out:
3022667eff35SYongqiang Yang 	return err ? err : allocated;
302356055d3aSAmit Arora }
302456055d3aSAmit Arora 
3025c278bfecSAneesh Kumar K.V /*
3026e35fd660STheodore Ts'o  * This function is called by ext4_ext_map_blocks() from
30270031462bSMingming Cao  * ext4_get_blocks_dio_write() when DIO to write
30280031462bSMingming Cao  * to an uninitialized extent.
30290031462bSMingming Cao  *
3030fd018fe8SPaul Bolle  * Writing to an uninitialized extent may result in splitting the uninitialized
3031b595076aSUwe Kleine-König  * extent into multiple /initialized uninitialized extents (up to three)
30320031462bSMingming Cao  * There are three possibilities:
30330031462bSMingming Cao  *   a> There is no split required: Entire extent should be uninitialized
30340031462bSMingming Cao  *   b> Splits in two extents: Write is happening at either end of the extent
30350031462bSMingming Cao  *   c> Splits in three extents: Somone is writing in middle of the extent
30360031462bSMingming Cao  *
30370031462bSMingming Cao  * One of more index blocks maybe needed if the extent tree grow after
3038b595076aSUwe Kleine-König  * the uninitialized extent split. To prevent ENOSPC occur at the IO
30390031462bSMingming Cao  * complete, we need to split the uninitialized extent before DIO submit
3040421f91d2SUwe Kleine-König  * the IO. The uninitialized extent called at this time will be split
30410031462bSMingming Cao  * into three uninitialized extent(at most). After IO complete, the part
30420031462bSMingming Cao  * being filled will be convert to initialized by the end_io callback function
30430031462bSMingming Cao  * via ext4_convert_unwritten_extents().
3044ba230c3fSMingming  *
3045ba230c3fSMingming  * Returns the size of uninitialized extent to be written on success.
30460031462bSMingming Cao  */
30470031462bSMingming Cao static int ext4_split_unwritten_extents(handle_t *handle,
30480031462bSMingming Cao 					struct inode *inode,
3049e35fd660STheodore Ts'o 					struct ext4_map_blocks *map,
30500031462bSMingming Cao 					struct ext4_ext_path *path,
30510031462bSMingming Cao 					int flags)
30520031462bSMingming Cao {
3053667eff35SYongqiang Yang 	ext4_lblk_t eof_block;
3054667eff35SYongqiang Yang 	ext4_lblk_t ee_block;
3055667eff35SYongqiang Yang 	struct ext4_extent *ex;
3056667eff35SYongqiang Yang 	unsigned int ee_len;
3057667eff35SYongqiang Yang 	int split_flag = 0, depth;
30580031462bSMingming Cao 
305921ca087aSDmitry Monakhov 	ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
306021ca087aSDmitry Monakhov 		"block %llu, max_blocks %u\n", inode->i_ino,
3061e35fd660STheodore Ts'o 		(unsigned long long)map->m_lblk, map->m_len);
306221ca087aSDmitry Monakhov 
306321ca087aSDmitry Monakhov 	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
306421ca087aSDmitry Monakhov 		inode->i_sb->s_blocksize_bits;
3065e35fd660STheodore Ts'o 	if (eof_block < map->m_lblk + map->m_len)
3066e35fd660STheodore Ts'o 		eof_block = map->m_lblk + map->m_len;
30670031462bSMingming Cao 	/*
306821ca087aSDmitry Monakhov 	 * It is safe to convert extent to initialized via explicit
306921ca087aSDmitry Monakhov 	 * zeroout only if extent is fully insde i_size or new_size.
307021ca087aSDmitry Monakhov 	 */
3071667eff35SYongqiang Yang 	depth = ext_depth(inode);
30720031462bSMingming Cao 	ex = path[depth].p_ext;
3073667eff35SYongqiang Yang 	ee_block = le32_to_cpu(ex->ee_block);
3074667eff35SYongqiang Yang 	ee_len = ext4_ext_get_actual_len(ex);
30750031462bSMingming Cao 
3076667eff35SYongqiang Yang 	split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
3077667eff35SYongqiang Yang 	split_flag |= EXT4_EXT_MARK_UNINIT2;
30780031462bSMingming Cao 
3079667eff35SYongqiang Yang 	flags |= EXT4_GET_BLOCKS_PRE_IO;
3080667eff35SYongqiang Yang 	return ext4_split_extent(handle, inode, path, map, split_flag, flags);
30810031462bSMingming Cao }
3082197217a5SYongqiang Yang 
3083c7064ef1SJiaying Zhang static int ext4_convert_unwritten_extents_endio(handle_t *handle,
30840031462bSMingming Cao 					      struct inode *inode,
30850031462bSMingming Cao 					      struct ext4_ext_path *path)
30860031462bSMingming Cao {
30870031462bSMingming Cao 	struct ext4_extent *ex;
30880031462bSMingming Cao 	int depth;
30890031462bSMingming Cao 	int err = 0;
30900031462bSMingming Cao 
30910031462bSMingming Cao 	depth = ext_depth(inode);
30920031462bSMingming Cao 	ex = path[depth].p_ext;
30930031462bSMingming Cao 
3094197217a5SYongqiang Yang 	ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
3095197217a5SYongqiang Yang 		"block %llu, max_blocks %u\n", inode->i_ino,
3096197217a5SYongqiang Yang 		(unsigned long long)le32_to_cpu(ex->ee_block),
3097197217a5SYongqiang Yang 		ext4_ext_get_actual_len(ex));
3098197217a5SYongqiang Yang 
30990031462bSMingming Cao 	err = ext4_ext_get_access(handle, inode, path + depth);
31000031462bSMingming Cao 	if (err)
31010031462bSMingming Cao 		goto out;
31020031462bSMingming Cao 	/* first mark the extent as initialized */
31030031462bSMingming Cao 	ext4_ext_mark_initialized(ex);
31040031462bSMingming Cao 
3105197217a5SYongqiang Yang 	/* note: ext4_ext_correct_indexes() isn't needed here because
3106197217a5SYongqiang Yang 	 * borders are not changed
31070031462bSMingming Cao 	 */
3108197217a5SYongqiang Yang 	ext4_ext_try_to_merge(inode, path, ex);
3109197217a5SYongqiang Yang 
31100031462bSMingming Cao 	/* Mark modified extent as dirty */
31110031462bSMingming Cao 	err = ext4_ext_dirty(handle, inode, path + depth);
31120031462bSMingming Cao out:
31130031462bSMingming Cao 	ext4_ext_show_leaf(inode, path);
31140031462bSMingming Cao 	return err;
31150031462bSMingming Cao }
31160031462bSMingming Cao 
3117515f41c3SAneesh Kumar K.V static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3118515f41c3SAneesh Kumar K.V 			sector_t block, int count)
3119515f41c3SAneesh Kumar K.V {
3120515f41c3SAneesh Kumar K.V 	int i;
3121515f41c3SAneesh Kumar K.V 	for (i = 0; i < count; i++)
3122515f41c3SAneesh Kumar K.V                 unmap_underlying_metadata(bdev, block + i);
3123515f41c3SAneesh Kumar K.V }
3124515f41c3SAneesh Kumar K.V 
312558590b06STheodore Ts'o /*
312658590b06STheodore Ts'o  * Handle EOFBLOCKS_FL flag, clearing it if necessary
312758590b06STheodore Ts'o  */
312858590b06STheodore Ts'o static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3129d002ebf1SEric Sandeen 			      ext4_lblk_t lblk,
313058590b06STheodore Ts'o 			      struct ext4_ext_path *path,
313158590b06STheodore Ts'o 			      unsigned int len)
313258590b06STheodore Ts'o {
313358590b06STheodore Ts'o 	int i, depth;
313458590b06STheodore Ts'o 	struct ext4_extent_header *eh;
313565922cb5SSergey Senozhatsky 	struct ext4_extent *last_ex;
313658590b06STheodore Ts'o 
313758590b06STheodore Ts'o 	if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
313858590b06STheodore Ts'o 		return 0;
313958590b06STheodore Ts'o 
314058590b06STheodore Ts'o 	depth = ext_depth(inode);
314158590b06STheodore Ts'o 	eh = path[depth].p_hdr;
314258590b06STheodore Ts'o 
314358590b06STheodore Ts'o 	if (unlikely(!eh->eh_entries)) {
314458590b06STheodore Ts'o 		EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
314558590b06STheodore Ts'o 				 "EOFBLOCKS_FL set");
314658590b06STheodore Ts'o 		return -EIO;
314758590b06STheodore Ts'o 	}
314858590b06STheodore Ts'o 	last_ex = EXT_LAST_EXTENT(eh);
314958590b06STheodore Ts'o 	/*
315058590b06STheodore Ts'o 	 * We should clear the EOFBLOCKS_FL flag if we are writing the
315158590b06STheodore Ts'o 	 * last block in the last extent in the file.  We test this by
315258590b06STheodore Ts'o 	 * first checking to see if the caller to
315358590b06STheodore Ts'o 	 * ext4_ext_get_blocks() was interested in the last block (or
315458590b06STheodore Ts'o 	 * a block beyond the last block) in the current extent.  If
315558590b06STheodore Ts'o 	 * this turns out to be false, we can bail out from this
315658590b06STheodore Ts'o 	 * function immediately.
315758590b06STheodore Ts'o 	 */
3158d002ebf1SEric Sandeen 	if (lblk + len < le32_to_cpu(last_ex->ee_block) +
315958590b06STheodore Ts'o 	    ext4_ext_get_actual_len(last_ex))
316058590b06STheodore Ts'o 		return 0;
316158590b06STheodore Ts'o 	/*
316258590b06STheodore Ts'o 	 * If the caller does appear to be planning to write at or
316358590b06STheodore Ts'o 	 * beyond the end of the current extent, we then test to see
316458590b06STheodore Ts'o 	 * if the current extent is the last extent in the file, by
316558590b06STheodore Ts'o 	 * checking to make sure it was reached via the rightmost node
316658590b06STheodore Ts'o 	 * at each level of the tree.
316758590b06STheodore Ts'o 	 */
316858590b06STheodore Ts'o 	for (i = depth-1; i >= 0; i--)
316958590b06STheodore Ts'o 		if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
317058590b06STheodore Ts'o 			return 0;
317158590b06STheodore Ts'o 	ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
317258590b06STheodore Ts'o 	return ext4_mark_inode_dirty(handle, inode);
317358590b06STheodore Ts'o }
317458590b06STheodore Ts'o 
31750031462bSMingming Cao static int
31760031462bSMingming Cao ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3177e35fd660STheodore Ts'o 			struct ext4_map_blocks *map,
31780031462bSMingming Cao 			struct ext4_ext_path *path, int flags,
3179e35fd660STheodore Ts'o 			unsigned int allocated, ext4_fsblk_t newblock)
31800031462bSMingming Cao {
31810031462bSMingming Cao 	int ret = 0;
31820031462bSMingming Cao 	int err = 0;
31838d5d02e6SMingming Cao 	ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
31840031462bSMingming Cao 
31850031462bSMingming Cao 	ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical"
31860031462bSMingming Cao 		  "block %llu, max_blocks %u, flags %d, allocated %u",
3187e35fd660STheodore Ts'o 		  inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,
31880031462bSMingming Cao 		  flags, allocated);
31890031462bSMingming Cao 	ext4_ext_show_leaf(inode, path);
31900031462bSMingming Cao 
3191c7064ef1SJiaying Zhang 	/* get_block() before submit the IO, split the extent */
3192744692dcSJiaying Zhang 	if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3193e35fd660STheodore Ts'o 		ret = ext4_split_unwritten_extents(handle, inode, map,
3194e35fd660STheodore Ts'o 						   path, flags);
31955f524950SMingming 		/*
31965f524950SMingming 		 * Flag the inode(non aio case) or end_io struct (aio case)
319725985edcSLucas De Marchi 		 * that this IO needs to conversion to written when IO is
31985f524950SMingming 		 * completed
31995f524950SMingming 		 */
3200e9e3bcecSEric Sandeen 		if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
3201bd2d0210STheodore Ts'o 			io->flag = EXT4_IO_END_UNWRITTEN;
3202e9e3bcecSEric Sandeen 			atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
3203e9e3bcecSEric Sandeen 		} else
320419f5fb7aSTheodore Ts'o 			ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3205744692dcSJiaying Zhang 		if (ext4_should_dioread_nolock(inode))
3206e35fd660STheodore Ts'o 			map->m_flags |= EXT4_MAP_UNINIT;
32070031462bSMingming Cao 		goto out;
32080031462bSMingming Cao 	}
3209c7064ef1SJiaying Zhang 	/* IO end_io complete, convert the filled extent to written */
3210744692dcSJiaying Zhang 	if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
3211c7064ef1SJiaying Zhang 		ret = ext4_convert_unwritten_extents_endio(handle, inode,
32120031462bSMingming Cao 							path);
321358590b06STheodore Ts'o 		if (ret >= 0) {
3214b436b9beSJan Kara 			ext4_update_inode_fsync_trans(handle, inode, 1);
3215d002ebf1SEric Sandeen 			err = check_eofblocks_fl(handle, inode, map->m_lblk,
3216d002ebf1SEric Sandeen 						 path, map->m_len);
321758590b06STheodore Ts'o 		} else
321858590b06STheodore Ts'o 			err = ret;
32190031462bSMingming Cao 		goto out2;
32200031462bSMingming Cao 	}
32210031462bSMingming Cao 	/* buffered IO case */
32220031462bSMingming Cao 	/*
32230031462bSMingming Cao 	 * repeat fallocate creation request
32240031462bSMingming Cao 	 * we already have an unwritten extent
32250031462bSMingming Cao 	 */
32260031462bSMingming Cao 	if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
32270031462bSMingming Cao 		goto map_out;
32280031462bSMingming Cao 
32290031462bSMingming Cao 	/* buffered READ or buffered write_begin() lookup */
32300031462bSMingming Cao 	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
32310031462bSMingming Cao 		/*
32320031462bSMingming Cao 		 * We have blocks reserved already.  We
32330031462bSMingming Cao 		 * return allocated blocks so that delalloc
32340031462bSMingming Cao 		 * won't do block reservation for us.  But
32350031462bSMingming Cao 		 * the buffer head will be unmapped so that
32360031462bSMingming Cao 		 * a read from the block returns 0s.
32370031462bSMingming Cao 		 */
3238e35fd660STheodore Ts'o 		map->m_flags |= EXT4_MAP_UNWRITTEN;
32390031462bSMingming Cao 		goto out1;
32400031462bSMingming Cao 	}
32410031462bSMingming Cao 
32420031462bSMingming Cao 	/* buffered write, writepage time, convert*/
3243e35fd660STheodore Ts'o 	ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
324458590b06STheodore Ts'o 	if (ret >= 0) {
3245b436b9beSJan Kara 		ext4_update_inode_fsync_trans(handle, inode, 1);
3246d002ebf1SEric Sandeen 		err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
3247d002ebf1SEric Sandeen 					 map->m_len);
324858590b06STheodore Ts'o 		if (err < 0)
324958590b06STheodore Ts'o 			goto out2;
325058590b06STheodore Ts'o 	}
325158590b06STheodore Ts'o 
32520031462bSMingming Cao out:
32530031462bSMingming Cao 	if (ret <= 0) {
32540031462bSMingming Cao 		err = ret;
32550031462bSMingming Cao 		goto out2;
32560031462bSMingming Cao 	} else
32570031462bSMingming Cao 		allocated = ret;
3258e35fd660STheodore Ts'o 	map->m_flags |= EXT4_MAP_NEW;
3259515f41c3SAneesh Kumar K.V 	/*
3260515f41c3SAneesh Kumar K.V 	 * if we allocated more blocks than requested
3261515f41c3SAneesh Kumar K.V 	 * we need to make sure we unmap the extra block
3262515f41c3SAneesh Kumar K.V 	 * allocated. The actual needed block will get
3263515f41c3SAneesh Kumar K.V 	 * unmapped later when we find the buffer_head marked
3264515f41c3SAneesh Kumar K.V 	 * new.
3265515f41c3SAneesh Kumar K.V 	 */
3266e35fd660STheodore Ts'o 	if (allocated > map->m_len) {
3267515f41c3SAneesh Kumar K.V 		unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
3268e35fd660STheodore Ts'o 					newblock + map->m_len,
3269e35fd660STheodore Ts'o 					allocated - map->m_len);
3270e35fd660STheodore Ts'o 		allocated = map->m_len;
3271515f41c3SAneesh Kumar K.V 	}
32725f634d06SAneesh Kumar K.V 
32735f634d06SAneesh Kumar K.V 	/*
32745f634d06SAneesh Kumar K.V 	 * If we have done fallocate with the offset that is already
32755f634d06SAneesh Kumar K.V 	 * delayed allocated, we would have block reservation
32765f634d06SAneesh Kumar K.V 	 * and quota reservation done in the delayed write path.
32775f634d06SAneesh Kumar K.V 	 * But fallocate would have already updated quota and block
32785f634d06SAneesh Kumar K.V 	 * count for this offset. So cancel these reservation
32795f634d06SAneesh Kumar K.V 	 */
32801296cc85SAneesh Kumar K.V 	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
32815f634d06SAneesh Kumar K.V 		ext4_da_update_reserve_space(inode, allocated, 0);
32825f634d06SAneesh Kumar K.V 
32830031462bSMingming Cao map_out:
3284e35fd660STheodore Ts'o 	map->m_flags |= EXT4_MAP_MAPPED;
32850031462bSMingming Cao out1:
3286e35fd660STheodore Ts'o 	if (allocated > map->m_len)
3287e35fd660STheodore Ts'o 		allocated = map->m_len;
32880031462bSMingming Cao 	ext4_ext_show_leaf(inode, path);
3289e35fd660STheodore Ts'o 	map->m_pblk = newblock;
3290e35fd660STheodore Ts'o 	map->m_len = allocated;
32910031462bSMingming Cao out2:
32920031462bSMingming Cao 	if (path) {
32930031462bSMingming Cao 		ext4_ext_drop_refs(path);
32940031462bSMingming Cao 		kfree(path);
32950031462bSMingming Cao 	}
32960031462bSMingming Cao 	return err ? err : allocated;
32970031462bSMingming Cao }
329858590b06STheodore Ts'o 
32990031462bSMingming Cao /*
3300*4d33b1efSTheodore Ts'o  * get_implied_cluster_alloc - check to see if the requested
3301*4d33b1efSTheodore Ts'o  * allocation (in the map structure) overlaps with a cluster already
3302*4d33b1efSTheodore Ts'o  * allocated in an extent.
3303*4d33b1efSTheodore Ts'o  *	@sbi	The ext4-specific superblock structure
3304*4d33b1efSTheodore Ts'o  *	@map	The requested lblk->pblk mapping
3305*4d33b1efSTheodore Ts'o  *	@ex	The extent structure which might contain an implied
3306*4d33b1efSTheodore Ts'o  *			cluster allocation
3307*4d33b1efSTheodore Ts'o  *
3308*4d33b1efSTheodore Ts'o  * This function is called by ext4_ext_map_blocks() after we failed to
3309*4d33b1efSTheodore Ts'o  * find blocks that were already in the inode's extent tree.  Hence,
3310*4d33b1efSTheodore Ts'o  * we know that the beginning of the requested region cannot overlap
3311*4d33b1efSTheodore Ts'o  * the extent from the inode's extent tree.  There are three cases we
3312*4d33b1efSTheodore Ts'o  * want to catch.  The first is this case:
3313*4d33b1efSTheodore Ts'o  *
3314*4d33b1efSTheodore Ts'o  *		 |--- cluster # N--|
3315*4d33b1efSTheodore Ts'o  *    |--- extent ---|	|---- requested region ---|
3316*4d33b1efSTheodore Ts'o  *			|==========|
3317*4d33b1efSTheodore Ts'o  *
3318*4d33b1efSTheodore Ts'o  * The second case that we need to test for is this one:
3319*4d33b1efSTheodore Ts'o  *
3320*4d33b1efSTheodore Ts'o  *   |--------- cluster # N ----------------|
3321*4d33b1efSTheodore Ts'o  *	   |--- requested region --|   |------- extent ----|
3322*4d33b1efSTheodore Ts'o  *	   |=======================|
3323*4d33b1efSTheodore Ts'o  *
3324*4d33b1efSTheodore Ts'o  * The third case is when the requested region lies between two extents
3325*4d33b1efSTheodore Ts'o  * within the same cluster:
3326*4d33b1efSTheodore Ts'o  *          |------------- cluster # N-------------|
3327*4d33b1efSTheodore Ts'o  * |----- ex -----|                  |---- ex_right ----|
3328*4d33b1efSTheodore Ts'o  *                  |------ requested region ------|
3329*4d33b1efSTheodore Ts'o  *                  |================|
3330*4d33b1efSTheodore Ts'o  *
3331*4d33b1efSTheodore Ts'o  * In each of the above cases, we need to set the map->m_pblk and
3332*4d33b1efSTheodore Ts'o  * map->m_len so it corresponds to the return the extent labelled as
3333*4d33b1efSTheodore Ts'o  * "|====|" from cluster #N, since it is already in use for data in
3334*4d33b1efSTheodore Ts'o  * cluster EXT4_B2C(sbi, map->m_lblk).	We will then return 1 to
3335*4d33b1efSTheodore Ts'o  * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
3336*4d33b1efSTheodore Ts'o  * as a new "allocated" block region.  Otherwise, we will return 0 and
3337*4d33b1efSTheodore Ts'o  * ext4_ext_map_blocks() will then allocate one or more new clusters
3338*4d33b1efSTheodore Ts'o  * by calling ext4_mb_new_blocks().
3339*4d33b1efSTheodore Ts'o  */
3340*4d33b1efSTheodore Ts'o static int get_implied_cluster_alloc(struct ext4_sb_info *sbi,
3341*4d33b1efSTheodore Ts'o 				     struct ext4_map_blocks *map,
3342*4d33b1efSTheodore Ts'o 				     struct ext4_extent *ex,
3343*4d33b1efSTheodore Ts'o 				     struct ext4_ext_path *path)
3344*4d33b1efSTheodore Ts'o {
3345*4d33b1efSTheodore Ts'o 	ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
3346*4d33b1efSTheodore Ts'o 	ext4_lblk_t ex_cluster_start, ex_cluster_end;
3347*4d33b1efSTheodore Ts'o 	ext4_lblk_t rr_cluster_start, rr_cluster_end;
3348*4d33b1efSTheodore Ts'o 	ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
3349*4d33b1efSTheodore Ts'o 	ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
3350*4d33b1efSTheodore Ts'o 	unsigned short ee_len = ext4_ext_get_actual_len(ex);
3351*4d33b1efSTheodore Ts'o 
3352*4d33b1efSTheodore Ts'o 	/* The extent passed in that we are trying to match */
3353*4d33b1efSTheodore Ts'o 	ex_cluster_start = EXT4_B2C(sbi, ee_block);
3354*4d33b1efSTheodore Ts'o 	ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
3355*4d33b1efSTheodore Ts'o 
3356*4d33b1efSTheodore Ts'o 	/* The requested region passed into ext4_map_blocks() */
3357*4d33b1efSTheodore Ts'o 	rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
3358*4d33b1efSTheodore Ts'o 	rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1);
3359*4d33b1efSTheodore Ts'o 
3360*4d33b1efSTheodore Ts'o 	if ((rr_cluster_start == ex_cluster_end) ||
3361*4d33b1efSTheodore Ts'o 	    (rr_cluster_start == ex_cluster_start)) {
3362*4d33b1efSTheodore Ts'o 		if (rr_cluster_start == ex_cluster_end)
3363*4d33b1efSTheodore Ts'o 			ee_start += ee_len - 1;
3364*4d33b1efSTheodore Ts'o 		map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
3365*4d33b1efSTheodore Ts'o 			c_offset;
3366*4d33b1efSTheodore Ts'o 		map->m_len = min(map->m_len,
3367*4d33b1efSTheodore Ts'o 				 (unsigned) sbi->s_cluster_ratio - c_offset);
3368*4d33b1efSTheodore Ts'o 		/*
3369*4d33b1efSTheodore Ts'o 		 * Check for and handle this case:
3370*4d33b1efSTheodore Ts'o 		 *
3371*4d33b1efSTheodore Ts'o 		 *   |--------- cluster # N-------------|
3372*4d33b1efSTheodore Ts'o 		 *		       |------- extent ----|
3373*4d33b1efSTheodore Ts'o 		 *	   |--- requested region ---|
3374*4d33b1efSTheodore Ts'o 		 *	   |===========|
3375*4d33b1efSTheodore Ts'o 		 */
3376*4d33b1efSTheodore Ts'o 
3377*4d33b1efSTheodore Ts'o 		if (map->m_lblk < ee_block)
3378*4d33b1efSTheodore Ts'o 			map->m_len = min(map->m_len, ee_block - map->m_lblk);
3379*4d33b1efSTheodore Ts'o 
3380*4d33b1efSTheodore Ts'o 		/*
3381*4d33b1efSTheodore Ts'o 		 * Check for the case where there is already another allocated
3382*4d33b1efSTheodore Ts'o 		 * block to the right of 'ex' but before the end of the cluster.
3383*4d33b1efSTheodore Ts'o 		 *
3384*4d33b1efSTheodore Ts'o 		 *          |------------- cluster # N-------------|
3385*4d33b1efSTheodore Ts'o 		 * |----- ex -----|                  |---- ex_right ----|
3386*4d33b1efSTheodore Ts'o 		 *                  |------ requested region ------|
3387*4d33b1efSTheodore Ts'o 		 *                  |================|
3388*4d33b1efSTheodore Ts'o 		 */
3389*4d33b1efSTheodore Ts'o 		if (map->m_lblk > ee_block) {
3390*4d33b1efSTheodore Ts'o 			ext4_lblk_t next = ext4_ext_next_allocated_block(path);
3391*4d33b1efSTheodore Ts'o 			map->m_len = min(map->m_len, next - map->m_lblk);
3392*4d33b1efSTheodore Ts'o 		}
3393*4d33b1efSTheodore Ts'o 		return 1;
3394*4d33b1efSTheodore Ts'o 	}
3395*4d33b1efSTheodore Ts'o 	return 0;
3396*4d33b1efSTheodore Ts'o }
3397*4d33b1efSTheodore Ts'o 
3398*4d33b1efSTheodore Ts'o 
3399*4d33b1efSTheodore Ts'o /*
3400f5ab0d1fSMingming Cao  * Block allocation/map/preallocation routine for extents based files
3401f5ab0d1fSMingming Cao  *
3402f5ab0d1fSMingming Cao  *
3403c278bfecSAneesh Kumar K.V  * Need to be called with
34040e855ac8SAneesh Kumar K.V  * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
34050e855ac8SAneesh Kumar K.V  * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
3406f5ab0d1fSMingming Cao  *
3407f5ab0d1fSMingming Cao  * return > 0, number of of blocks already mapped/allocated
3408f5ab0d1fSMingming Cao  *          if create == 0 and these are pre-allocated blocks
3409f5ab0d1fSMingming Cao  *          	buffer head is unmapped
3410f5ab0d1fSMingming Cao  *          otherwise blocks are mapped
3411f5ab0d1fSMingming Cao  *
3412f5ab0d1fSMingming Cao  * return = 0, if plain look up failed (blocks have not been allocated)
3413f5ab0d1fSMingming Cao  *          buffer head is unmapped
3414f5ab0d1fSMingming Cao  *
3415f5ab0d1fSMingming Cao  * return < 0, error case.
3416c278bfecSAneesh Kumar K.V  */
3417e35fd660STheodore Ts'o int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3418e35fd660STheodore Ts'o 			struct ext4_map_blocks *map, int flags)
3419a86c6181SAlex Tomas {
3420a86c6181SAlex Tomas 	struct ext4_ext_path *path = NULL;
3421*4d33b1efSTheodore Ts'o 	struct ext4_extent newex, *ex, *ex2;
3422*4d33b1efSTheodore Ts'o 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
34230562e0baSJiaying Zhang 	ext4_fsblk_t newblock = 0;
3424*4d33b1efSTheodore Ts'o 	int free_on_err = 0, err = 0, depth, ret;
3425*4d33b1efSTheodore Ts'o 	unsigned int allocated = 0, offset = 0;
3426e861304bSAllison Henderson 	unsigned int punched_out = 0;
3427e861304bSAllison Henderson 	unsigned int result = 0;
3428c9de560dSAlex Tomas 	struct ext4_allocation_request ar;
34298d5d02e6SMingming Cao 	ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
3430*4d33b1efSTheodore Ts'o 	ext4_lblk_t cluster_offset;
3431e861304bSAllison Henderson 	struct ext4_map_blocks punch_map;
3432a86c6181SAlex Tomas 
343384fe3befSMingming 	ext_debug("blocks %u/%u requested for inode %lu\n",
3434e35fd660STheodore Ts'o 		  map->m_lblk, map->m_len, inode->i_ino);
34350562e0baSJiaying Zhang 	trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
3436a86c6181SAlex Tomas 
3437a86c6181SAlex Tomas 	/* check in cache */
3438015861baSRobin Dong 	if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
3439015861baSRobin Dong 		ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
3440b05e6ae5STheodore Ts'o 		if (!newex.ee_start_lo && !newex.ee_start_hi) {
3441c2177057STheodore Ts'o 			if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
344256055d3aSAmit Arora 				/*
344356055d3aSAmit Arora 				 * block isn't allocated yet and
344456055d3aSAmit Arora 				 * user doesn't want to allocate it
344556055d3aSAmit Arora 				 */
3446a86c6181SAlex Tomas 				goto out2;
3447a86c6181SAlex Tomas 			}
3448a86c6181SAlex Tomas 			/* we should allocate requested block */
3449b05e6ae5STheodore Ts'o 		} else {
3450a86c6181SAlex Tomas 			/* block is already allocated */
3451e35fd660STheodore Ts'o 			newblock = map->m_lblk
3452a86c6181SAlex Tomas 				   - le32_to_cpu(newex.ee_block)
3453bf89d16fSTheodore Ts'o 				   + ext4_ext_pblock(&newex);
3454d0d856e8SRandy Dunlap 			/* number of remaining blocks in the extent */
3455b939e376SAneesh Kumar K.V 			allocated = ext4_ext_get_actual_len(&newex) -
3456e35fd660STheodore Ts'o 				(map->m_lblk - le32_to_cpu(newex.ee_block));
3457a86c6181SAlex Tomas 			goto out;
3458a86c6181SAlex Tomas 		}
3459a86c6181SAlex Tomas 	}
3460a86c6181SAlex Tomas 
3461a86c6181SAlex Tomas 	/* find extent for this block */
3462e35fd660STheodore Ts'o 	path = ext4_ext_find_extent(inode, map->m_lblk, NULL);
3463a86c6181SAlex Tomas 	if (IS_ERR(path)) {
3464a86c6181SAlex Tomas 		err = PTR_ERR(path);
3465a86c6181SAlex Tomas 		path = NULL;
3466a86c6181SAlex Tomas 		goto out2;
3467a86c6181SAlex Tomas 	}
3468a86c6181SAlex Tomas 
3469a86c6181SAlex Tomas 	depth = ext_depth(inode);
3470a86c6181SAlex Tomas 
3471a86c6181SAlex Tomas 	/*
3472d0d856e8SRandy Dunlap 	 * consistent leaf must not be empty;
3473d0d856e8SRandy Dunlap 	 * this situation is possible, though, _during_ tree modification;
3474a86c6181SAlex Tomas 	 * this is why assert can't be put in ext4_ext_find_extent()
3475a86c6181SAlex Tomas 	 */
3476273df556SFrank Mayhar 	if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
3477273df556SFrank Mayhar 		EXT4_ERROR_INODE(inode, "bad extent address "
3478f70f362bSTheodore Ts'o 				 "lblock: %lu, depth: %d pblock %lld",
3479f70f362bSTheodore Ts'o 				 (unsigned long) map->m_lblk, depth,
3480f70f362bSTheodore Ts'o 				 path[depth].p_block);
3481034fb4c9SSurbhi Palande 		err = -EIO;
3482034fb4c9SSurbhi Palande 		goto out2;
3483034fb4c9SSurbhi Palande 	}
3484a86c6181SAlex Tomas 
34857e028976SAvantika Mathur 	ex = path[depth].p_ext;
34867e028976SAvantika Mathur 	if (ex) {
3487725d26d3SAneesh Kumar K.V 		ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
3488bf89d16fSTheodore Ts'o 		ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
3489a2df2a63SAmit Arora 		unsigned short ee_len;
3490471d4011SSuparna Bhattacharya 
3491471d4011SSuparna Bhattacharya 		/*
3492471d4011SSuparna Bhattacharya 		 * Uninitialized extents are treated as holes, except that
349356055d3aSAmit Arora 		 * we split out initialized portions during a write.
3494471d4011SSuparna Bhattacharya 		 */
3495a2df2a63SAmit Arora 		ee_len = ext4_ext_get_actual_len(ex);
3496d0d856e8SRandy Dunlap 		/* if found extent covers block, simply return it */
3497e35fd660STheodore Ts'o 		if (in_range(map->m_lblk, ee_block, ee_len)) {
3498e35fd660STheodore Ts'o 			newblock = map->m_lblk - ee_block + ee_start;
3499d0d856e8SRandy Dunlap 			/* number of remaining blocks in the extent */
3500e35fd660STheodore Ts'o 			allocated = ee_len - (map->m_lblk - ee_block);
3501e35fd660STheodore Ts'o 			ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
3502a86c6181SAlex Tomas 				  ee_block, ee_len, newblock);
350356055d3aSAmit Arora 
3504e861304bSAllison Henderson 			if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) {
3505e861304bSAllison Henderson 				/*
3506e861304bSAllison Henderson 				 * Do not put uninitialized extent
3507e861304bSAllison Henderson 				 * in the cache
3508e861304bSAllison Henderson 				 */
350956055d3aSAmit Arora 				if (!ext4_ext_is_uninitialized(ex)) {
3510a2df2a63SAmit Arora 					ext4_ext_put_in_cache(inode, ee_block,
3511b05e6ae5STheodore Ts'o 						ee_len, ee_start);
3512a86c6181SAlex Tomas 					goto out;
3513a86c6181SAlex Tomas 				}
3514e861304bSAllison Henderson 				ret = ext4_ext_handle_uninitialized_extents(
3515e861304bSAllison Henderson 					handle, inode, map, path, flags,
3516e861304bSAllison Henderson 					allocated, newblock);
35170031462bSMingming Cao 				return ret;
351856055d3aSAmit Arora 			}
3519e861304bSAllison Henderson 
3520e861304bSAllison Henderson 			/*
3521e861304bSAllison Henderson 			 * Punch out the map length, but only to the
3522e861304bSAllison Henderson 			 * end of the extent
3523e861304bSAllison Henderson 			 */
3524e861304bSAllison Henderson 			punched_out = allocated < map->m_len ?
3525e861304bSAllison Henderson 				allocated : map->m_len;
3526e861304bSAllison Henderson 
3527e861304bSAllison Henderson 			/*
3528e861304bSAllison Henderson 			 * Sense extents need to be converted to
3529e861304bSAllison Henderson 			 * uninitialized, they must fit in an
3530e861304bSAllison Henderson 			 * uninitialized extent
3531e861304bSAllison Henderson 			 */
3532e861304bSAllison Henderson 			if (punched_out > EXT_UNINIT_MAX_LEN)
3533e861304bSAllison Henderson 				punched_out = EXT_UNINIT_MAX_LEN;
3534e861304bSAllison Henderson 
3535e861304bSAllison Henderson 			punch_map.m_lblk = map->m_lblk;
3536e861304bSAllison Henderson 			punch_map.m_pblk = newblock;
3537e861304bSAllison Henderson 			punch_map.m_len = punched_out;
3538e861304bSAllison Henderson 			punch_map.m_flags = 0;
3539e861304bSAllison Henderson 
3540e861304bSAllison Henderson 			/* Check to see if the extent needs to be split */
3541e861304bSAllison Henderson 			if (punch_map.m_len != ee_len ||
3542e861304bSAllison Henderson 				punch_map.m_lblk != ee_block) {
3543e861304bSAllison Henderson 
3544e861304bSAllison Henderson 				ret = ext4_split_extent(handle, inode,
3545e861304bSAllison Henderson 				path, &punch_map, 0,
3546e861304bSAllison Henderson 				EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
3547e861304bSAllison Henderson 				EXT4_GET_BLOCKS_PRE_IO);
3548e861304bSAllison Henderson 
3549e861304bSAllison Henderson 				if (ret < 0) {
3550e861304bSAllison Henderson 					err = ret;
3551e861304bSAllison Henderson 					goto out2;
3552e861304bSAllison Henderson 				}
3553e861304bSAllison Henderson 				/*
3554e861304bSAllison Henderson 				 * find extent for the block at
3555e861304bSAllison Henderson 				 * the start of the hole
3556e861304bSAllison Henderson 				 */
3557e861304bSAllison Henderson 				ext4_ext_drop_refs(path);
3558e861304bSAllison Henderson 				kfree(path);
3559e861304bSAllison Henderson 
3560e861304bSAllison Henderson 				path = ext4_ext_find_extent(inode,
3561e861304bSAllison Henderson 				map->m_lblk, NULL);
3562e861304bSAllison Henderson 				if (IS_ERR(path)) {
3563e861304bSAllison Henderson 					err = PTR_ERR(path);
3564e861304bSAllison Henderson 					path = NULL;
3565e861304bSAllison Henderson 					goto out2;
3566e861304bSAllison Henderson 				}
3567e861304bSAllison Henderson 
3568e861304bSAllison Henderson 				depth = ext_depth(inode);
3569e861304bSAllison Henderson 				ex = path[depth].p_ext;
3570e861304bSAllison Henderson 				ee_len = ext4_ext_get_actual_len(ex);
3571e861304bSAllison Henderson 				ee_block = le32_to_cpu(ex->ee_block);
3572e861304bSAllison Henderson 				ee_start = ext4_ext_pblock(ex);
3573e861304bSAllison Henderson 
3574e861304bSAllison Henderson 			}
3575e861304bSAllison Henderson 
3576e861304bSAllison Henderson 			ext4_ext_mark_uninitialized(ex);
3577e861304bSAllison Henderson 
3578f7d0d379SAllison Henderson 			ext4_ext_invalidate_cache(inode);
3579f7d0d379SAllison Henderson 
3580f7d0d379SAllison Henderson 			err = ext4_ext_rm_leaf(handle, inode, path,
3581f7d0d379SAllison Henderson 				map->m_lblk, map->m_lblk + punched_out);
3582f7d0d379SAllison Henderson 
3583f7d0d379SAllison Henderson 			if (!err && path->p_hdr->eh_entries == 0) {
3584f7d0d379SAllison Henderson 				/*
3585f7d0d379SAllison Henderson 				 * Punch hole freed all of this sub tree,
3586f7d0d379SAllison Henderson 				 * so we need to correct eh_depth
3587f7d0d379SAllison Henderson 				 */
3588f7d0d379SAllison Henderson 				err = ext4_ext_get_access(handle, inode, path);
3589f7d0d379SAllison Henderson 				if (err == 0) {
3590f7d0d379SAllison Henderson 					ext_inode_hdr(inode)->eh_depth = 0;
3591f7d0d379SAllison Henderson 					ext_inode_hdr(inode)->eh_max =
3592f7d0d379SAllison Henderson 					cpu_to_le16(ext4_ext_space_root(
3593f7d0d379SAllison Henderson 						inode, 0));
3594f7d0d379SAllison Henderson 
3595f7d0d379SAllison Henderson 					err = ext4_ext_dirty(
3596f7d0d379SAllison Henderson 						handle, inode, path);
3597f7d0d379SAllison Henderson 				}
3598f7d0d379SAllison Henderson 			}
3599e861304bSAllison Henderson 
3600e861304bSAllison Henderson 			goto out2;
3601e861304bSAllison Henderson 		}
3602a86c6181SAlex Tomas 	}
3603a86c6181SAlex Tomas 
3604a86c6181SAlex Tomas 	/*
3605d0d856e8SRandy Dunlap 	 * requested block isn't allocated yet;
3606a86c6181SAlex Tomas 	 * we couldn't try to create block if create flag is zero
3607a86c6181SAlex Tomas 	 */
3608c2177057STheodore Ts'o 	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
360956055d3aSAmit Arora 		/*
361056055d3aSAmit Arora 		 * put just found gap into cache to speed up
361156055d3aSAmit Arora 		 * subsequent requests
361256055d3aSAmit Arora 		 */
3613e35fd660STheodore Ts'o 		ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
3614a86c6181SAlex Tomas 		goto out2;
3615a86c6181SAlex Tomas 	}
3616*4d33b1efSTheodore Ts'o 
3617a86c6181SAlex Tomas 	/*
3618c2ea3fdeSTheodore Ts'o 	 * Okay, we need to do block allocation.
3619a86c6181SAlex Tomas 	 */
3620*4d33b1efSTheodore Ts'o 	newex.ee_block = cpu_to_le32(map->m_lblk);
3621*4d33b1efSTheodore Ts'o 	cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
3622*4d33b1efSTheodore Ts'o 
3623*4d33b1efSTheodore Ts'o 	/*
3624*4d33b1efSTheodore Ts'o 	 * If we are doing bigalloc, check to see if the extent returned
3625*4d33b1efSTheodore Ts'o 	 * by ext4_ext_find_extent() implies a cluster we can use.
3626*4d33b1efSTheodore Ts'o 	 */
3627*4d33b1efSTheodore Ts'o 	if (cluster_offset && ex &&
3628*4d33b1efSTheodore Ts'o 	    get_implied_cluster_alloc(sbi, map, ex, path)) {
3629*4d33b1efSTheodore Ts'o 		ar.len = allocated = map->m_len;
3630*4d33b1efSTheodore Ts'o 		newblock = map->m_pblk;
3631*4d33b1efSTheodore Ts'o 		goto got_allocated_blocks;
3632*4d33b1efSTheodore Ts'o 	}
3633a86c6181SAlex Tomas 
3634c9de560dSAlex Tomas 	/* find neighbour allocated blocks */
3635e35fd660STheodore Ts'o 	ar.lleft = map->m_lblk;
3636c9de560dSAlex Tomas 	err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
3637c9de560dSAlex Tomas 	if (err)
3638c9de560dSAlex Tomas 		goto out2;
3639e35fd660STheodore Ts'o 	ar.lright = map->m_lblk;
3640*4d33b1efSTheodore Ts'o 	ex2 = NULL;
3641*4d33b1efSTheodore Ts'o 	err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
3642c9de560dSAlex Tomas 	if (err)
3643c9de560dSAlex Tomas 		goto out2;
364425d14f98SAmit Arora 
3645*4d33b1efSTheodore Ts'o 	/* Check if the extent after searching to the right implies a
3646*4d33b1efSTheodore Ts'o 	 * cluster we can use. */
3647*4d33b1efSTheodore Ts'o 	if ((sbi->s_cluster_ratio > 1) && ex2 &&
3648*4d33b1efSTheodore Ts'o 	    get_implied_cluster_alloc(sbi, map, ex2, path)) {
3649*4d33b1efSTheodore Ts'o 		ar.len = allocated = map->m_len;
3650*4d33b1efSTheodore Ts'o 		newblock = map->m_pblk;
3651*4d33b1efSTheodore Ts'o 		goto got_allocated_blocks;
3652*4d33b1efSTheodore Ts'o 	}
3653*4d33b1efSTheodore Ts'o 
3654749269faSAmit Arora 	/*
3655749269faSAmit Arora 	 * See if request is beyond maximum number of blocks we can have in
3656749269faSAmit Arora 	 * a single extent. For an initialized extent this limit is
3657749269faSAmit Arora 	 * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is
3658749269faSAmit Arora 	 * EXT_UNINIT_MAX_LEN.
3659749269faSAmit Arora 	 */
3660e35fd660STheodore Ts'o 	if (map->m_len > EXT_INIT_MAX_LEN &&
3661c2177057STheodore Ts'o 	    !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
3662e35fd660STheodore Ts'o 		map->m_len = EXT_INIT_MAX_LEN;
3663e35fd660STheodore Ts'o 	else if (map->m_len > EXT_UNINIT_MAX_LEN &&
3664c2177057STheodore Ts'o 		 (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
3665e35fd660STheodore Ts'o 		map->m_len = EXT_UNINIT_MAX_LEN;
3666749269faSAmit Arora 
3667e35fd660STheodore Ts'o 	/* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
3668e35fd660STheodore Ts'o 	newex.ee_len = cpu_to_le16(map->m_len);
3669*4d33b1efSTheodore Ts'o 	err = ext4_ext_check_overlap(sbi, inode, &newex, path);
367025d14f98SAmit Arora 	if (err)
3671b939e376SAneesh Kumar K.V 		allocated = ext4_ext_get_actual_len(&newex);
367225d14f98SAmit Arora 	else
3673e35fd660STheodore Ts'o 		allocated = map->m_len;
3674c9de560dSAlex Tomas 
3675c9de560dSAlex Tomas 	/* allocate new block */
3676c9de560dSAlex Tomas 	ar.inode = inode;
3677e35fd660STheodore Ts'o 	ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
3678e35fd660STheodore Ts'o 	ar.logical = map->m_lblk;
3679*4d33b1efSTheodore Ts'o 	/*
3680*4d33b1efSTheodore Ts'o 	 * We calculate the offset from the beginning of the cluster
3681*4d33b1efSTheodore Ts'o 	 * for the logical block number, since when we allocate a
3682*4d33b1efSTheodore Ts'o 	 * physical cluster, the physical block should start at the
3683*4d33b1efSTheodore Ts'o 	 * same offset from the beginning of the cluster.  This is
3684*4d33b1efSTheodore Ts'o 	 * needed so that future calls to get_implied_cluster_alloc()
3685*4d33b1efSTheodore Ts'o 	 * work correctly.
3686*4d33b1efSTheodore Ts'o 	 */
3687*4d33b1efSTheodore Ts'o 	offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
3688*4d33b1efSTheodore Ts'o 	ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
3689*4d33b1efSTheodore Ts'o 	ar.goal -= offset;
3690*4d33b1efSTheodore Ts'o 	ar.logical -= offset;
3691c9de560dSAlex Tomas 	if (S_ISREG(inode->i_mode))
3692c9de560dSAlex Tomas 		ar.flags = EXT4_MB_HINT_DATA;
3693c9de560dSAlex Tomas 	else
3694c9de560dSAlex Tomas 		/* disable in-core preallocation for non-regular files */
3695c9de560dSAlex Tomas 		ar.flags = 0;
3696556b27abSVivek Haldar 	if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
3697556b27abSVivek Haldar 		ar.flags |= EXT4_MB_HINT_NOPREALLOC;
3698c9de560dSAlex Tomas 	newblock = ext4_mb_new_blocks(handle, &ar, &err);
3699a86c6181SAlex Tomas 	if (!newblock)
3700a86c6181SAlex Tomas 		goto out2;
370184fe3befSMingming 	ext_debug("allocate new block: goal %llu, found %llu/%u\n",
3702498e5f24STheodore Ts'o 		  ar.goal, newblock, allocated);
3703*4d33b1efSTheodore Ts'o 	free_on_err = 1;
3704*4d33b1efSTheodore Ts'o 	ar.len = EXT4_C2B(sbi, ar.len) - offset;
3705*4d33b1efSTheodore Ts'o 	if (ar.len > allocated)
3706*4d33b1efSTheodore Ts'o 		ar.len = allocated;
3707a86c6181SAlex Tomas 
3708*4d33b1efSTheodore Ts'o got_allocated_blocks:
3709a86c6181SAlex Tomas 	/* try to insert new extent into found leaf and return */
3710*4d33b1efSTheodore Ts'o 	ext4_ext_store_pblock(&newex, newblock + offset);
3711c9de560dSAlex Tomas 	newex.ee_len = cpu_to_le16(ar.len);
37128d5d02e6SMingming Cao 	/* Mark uninitialized */
37138d5d02e6SMingming Cao 	if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
3714a2df2a63SAmit Arora 		ext4_ext_mark_uninitialized(&newex);
37158d5d02e6SMingming Cao 		/*
3716744692dcSJiaying Zhang 		 * io_end structure was created for every IO write to an
371725985edcSLucas De Marchi 		 * uninitialized extent. To avoid unnecessary conversion,
3718744692dcSJiaying Zhang 		 * here we flag the IO that really needs the conversion.
37195f524950SMingming 		 * For non asycn direct IO case, flag the inode state
372025985edcSLucas De Marchi 		 * that we need to perform conversion when IO is done.
37218d5d02e6SMingming Cao 		 */
3722744692dcSJiaying Zhang 		if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3723e9e3bcecSEric Sandeen 			if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
3724bd2d0210STheodore Ts'o 				io->flag = EXT4_IO_END_UNWRITTEN;
3725e9e3bcecSEric Sandeen 				atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
3726e9e3bcecSEric Sandeen 			} else
372719f5fb7aSTheodore Ts'o 				ext4_set_inode_state(inode,
372819f5fb7aSTheodore Ts'o 						     EXT4_STATE_DIO_UNWRITTEN);
37295f524950SMingming 		}
3730744692dcSJiaying Zhang 		if (ext4_should_dioread_nolock(inode))
3731e35fd660STheodore Ts'o 			map->m_flags |= EXT4_MAP_UNINIT;
37328d5d02e6SMingming Cao 	}
3733c8d46e41SJiaying Zhang 
3734d002ebf1SEric Sandeen 	err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
3735575a1d4bSJiaying Zhang 	if (!err)
3736575a1d4bSJiaying Zhang 		err = ext4_ext_insert_extent(handle, inode, path,
3737575a1d4bSJiaying Zhang 					     &newex, flags);
3738*4d33b1efSTheodore Ts'o 	if (err && free_on_err) {
37397132de74SMaxim Patlasov 		int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
37407132de74SMaxim Patlasov 			EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
3741315054f0SAlex Tomas 		/* free data blocks we just allocated */
3742c9de560dSAlex Tomas 		/* not a good idea to call discard here directly,
3743c9de560dSAlex Tomas 		 * but otherwise we'd need to call it every free() */
3744c2ea3fdeSTheodore Ts'o 		ext4_discard_preallocations(inode);
37457dc57615SPeter Huewe 		ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
37467132de74SMaxim Patlasov 				 ext4_ext_get_actual_len(&newex), fb_flags);
3747a86c6181SAlex Tomas 		goto out2;
3748315054f0SAlex Tomas 	}
3749a86c6181SAlex Tomas 
3750a86c6181SAlex Tomas 	/* previous routine could use block we allocated */
3751bf89d16fSTheodore Ts'o 	newblock = ext4_ext_pblock(&newex);
3752b939e376SAneesh Kumar K.V 	allocated = ext4_ext_get_actual_len(&newex);
3753e35fd660STheodore Ts'o 	if (allocated > map->m_len)
3754e35fd660STheodore Ts'o 		allocated = map->m_len;
3755e35fd660STheodore Ts'o 	map->m_flags |= EXT4_MAP_NEW;
3756a86c6181SAlex Tomas 
3757b436b9beSJan Kara 	/*
37585f634d06SAneesh Kumar K.V 	 * Update reserved blocks/metadata blocks after successful
37595f634d06SAneesh Kumar K.V 	 * block allocation which had been deferred till now.
37605f634d06SAneesh Kumar K.V 	 */
37611296cc85SAneesh Kumar K.V 	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
37625f634d06SAneesh Kumar K.V 		ext4_da_update_reserve_space(inode, allocated, 1);
37635f634d06SAneesh Kumar K.V 
37645f634d06SAneesh Kumar K.V 	/*
3765b436b9beSJan Kara 	 * Cache the extent and update transaction to commit on fdatasync only
3766b436b9beSJan Kara 	 * when it is _not_ an uninitialized extent.
3767b436b9beSJan Kara 	 */
3768b436b9beSJan Kara 	if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
3769b05e6ae5STheodore Ts'o 		ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock);
3770b436b9beSJan Kara 		ext4_update_inode_fsync_trans(handle, inode, 1);
3771b436b9beSJan Kara 	} else
3772b436b9beSJan Kara 		ext4_update_inode_fsync_trans(handle, inode, 0);
3773a86c6181SAlex Tomas out:
3774e35fd660STheodore Ts'o 	if (allocated > map->m_len)
3775e35fd660STheodore Ts'o 		allocated = map->m_len;
3776a86c6181SAlex Tomas 	ext4_ext_show_leaf(inode, path);
3777e35fd660STheodore Ts'o 	map->m_flags |= EXT4_MAP_MAPPED;
3778e35fd660STheodore Ts'o 	map->m_pblk = newblock;
3779e35fd660STheodore Ts'o 	map->m_len = allocated;
3780a86c6181SAlex Tomas out2:
3781a86c6181SAlex Tomas 	if (path) {
3782a86c6181SAlex Tomas 		ext4_ext_drop_refs(path);
3783a86c6181SAlex Tomas 		kfree(path);
3784a86c6181SAlex Tomas 	}
37850562e0baSJiaying Zhang 	trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
37860562e0baSJiaying Zhang 		newblock, map->m_len, err ? err : allocated);
3787e861304bSAllison Henderson 
3788e861304bSAllison Henderson 	result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ?
3789e861304bSAllison Henderson 			punched_out : allocated;
3790e861304bSAllison Henderson 
3791e861304bSAllison Henderson 	return err ? err : result;
3792a86c6181SAlex Tomas }
3793a86c6181SAlex Tomas 
3794cf108bcaSJan Kara void ext4_ext_truncate(struct inode *inode)
3795a86c6181SAlex Tomas {
3796a86c6181SAlex Tomas 	struct address_space *mapping = inode->i_mapping;
3797a86c6181SAlex Tomas 	struct super_block *sb = inode->i_sb;
3798725d26d3SAneesh Kumar K.V 	ext4_lblk_t last_block;
3799a86c6181SAlex Tomas 	handle_t *handle;
3800189e868fSAllison Henderson 	loff_t page_len;
3801a86c6181SAlex Tomas 	int err = 0;
3802a86c6181SAlex Tomas 
3803a86c6181SAlex Tomas 	/*
38043889fd57SJiaying Zhang 	 * finish any pending end_io work so we won't run the risk of
38053889fd57SJiaying Zhang 	 * converting any truncated blocks to initialized later
38063889fd57SJiaying Zhang 	 */
38073889fd57SJiaying Zhang 	ext4_flush_completed_IO(inode);
38083889fd57SJiaying Zhang 
38093889fd57SJiaying Zhang 	/*
3810a86c6181SAlex Tomas 	 * probably first extent we're gonna free will be last in block
3811a86c6181SAlex Tomas 	 */
3812f3bd1f3fSMingming Cao 	err = ext4_writepage_trans_blocks(inode);
3813a86c6181SAlex Tomas 	handle = ext4_journal_start(inode, err);
3814cf108bcaSJan Kara 	if (IS_ERR(handle))
3815a86c6181SAlex Tomas 		return;
3816a86c6181SAlex Tomas 
3817189e868fSAllison Henderson 	if (inode->i_size % PAGE_CACHE_SIZE != 0) {
3818189e868fSAllison Henderson 		page_len = PAGE_CACHE_SIZE -
3819189e868fSAllison Henderson 			(inode->i_size & (PAGE_CACHE_SIZE - 1));
3820189e868fSAllison Henderson 
3821189e868fSAllison Henderson 		err = ext4_discard_partial_page_buffers(handle,
3822189e868fSAllison Henderson 			mapping, inode->i_size, page_len, 0);
3823189e868fSAllison Henderson 
3824189e868fSAllison Henderson 		if (err)
3825189e868fSAllison Henderson 			goto out_stop;
3826189e868fSAllison Henderson 	}
3827a86c6181SAlex Tomas 
38289ddfc3dcSJan Kara 	if (ext4_orphan_add(handle, inode))
38299ddfc3dcSJan Kara 		goto out_stop;
38309ddfc3dcSJan Kara 
38310e855ac8SAneesh Kumar K.V 	down_write(&EXT4_I(inode)->i_data_sem);
3832a86c6181SAlex Tomas 	ext4_ext_invalidate_cache(inode);
3833a86c6181SAlex Tomas 
3834c2ea3fdeSTheodore Ts'o 	ext4_discard_preallocations(inode);
3835c9de560dSAlex Tomas 
3836a86c6181SAlex Tomas 	/*
3837d0d856e8SRandy Dunlap 	 * TODO: optimization is possible here.
3838d0d856e8SRandy Dunlap 	 * Probably we need not scan at all,
3839d0d856e8SRandy Dunlap 	 * because page truncation is enough.
3840a86c6181SAlex Tomas 	 */
3841a86c6181SAlex Tomas 
3842a86c6181SAlex Tomas 	/* we have to know where to truncate from in crash case */
3843a86c6181SAlex Tomas 	EXT4_I(inode)->i_disksize = inode->i_size;
3844a86c6181SAlex Tomas 	ext4_mark_inode_dirty(handle, inode);
3845a86c6181SAlex Tomas 
3846a86c6181SAlex Tomas 	last_block = (inode->i_size + sb->s_blocksize - 1)
3847a86c6181SAlex Tomas 			>> EXT4_BLOCK_SIZE_BITS(sb);
3848c6a0371cSAllison Henderson 	err = ext4_ext_remove_space(inode, last_block);
3849a86c6181SAlex Tomas 
3850a86c6181SAlex Tomas 	/* In a multi-transaction truncate, we only make the final
385156055d3aSAmit Arora 	 * transaction synchronous.
385256055d3aSAmit Arora 	 */
3853a86c6181SAlex Tomas 	if (IS_SYNC(inode))
38540390131bSFrank Mayhar 		ext4_handle_sync(handle);
3855a86c6181SAlex Tomas 
38569ddfc3dcSJan Kara 	up_write(&EXT4_I(inode)->i_data_sem);
3857f6d2f6b3SEric Gouriou 
3858f6d2f6b3SEric Gouriou out_stop:
3859a86c6181SAlex Tomas 	/*
3860d0d856e8SRandy Dunlap 	 * If this was a simple ftruncate() and the file will remain alive,
3861a86c6181SAlex Tomas 	 * then we need to clear up the orphan record which we created above.
3862a86c6181SAlex Tomas 	 * However, if this was a real unlink then we were called by
3863a86c6181SAlex Tomas 	 * ext4_delete_inode(), and we allow that function to clean up the
3864a86c6181SAlex Tomas 	 * orphan info for us.
3865a86c6181SAlex Tomas 	 */
3866a86c6181SAlex Tomas 	if (inode->i_nlink)
3867a86c6181SAlex Tomas 		ext4_orphan_del(handle, inode);
3868a86c6181SAlex Tomas 
3869ef737728SSolofo Ramangalahy 	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
3870ef737728SSolofo Ramangalahy 	ext4_mark_inode_dirty(handle, inode);
3871a86c6181SAlex Tomas 	ext4_journal_stop(handle);
3872a86c6181SAlex Tomas }
3873a86c6181SAlex Tomas 
3874fd28784aSAneesh Kumar K.V static void ext4_falloc_update_inode(struct inode *inode,
3875fd28784aSAneesh Kumar K.V 				int mode, loff_t new_size, int update_ctime)
3876fd28784aSAneesh Kumar K.V {
3877fd28784aSAneesh Kumar K.V 	struct timespec now;
3878fd28784aSAneesh Kumar K.V 
3879fd28784aSAneesh Kumar K.V 	if (update_ctime) {
3880fd28784aSAneesh Kumar K.V 		now = current_fs_time(inode->i_sb);
3881fd28784aSAneesh Kumar K.V 		if (!timespec_equal(&inode->i_ctime, &now))
3882fd28784aSAneesh Kumar K.V 			inode->i_ctime = now;
3883fd28784aSAneesh Kumar K.V 	}
3884fd28784aSAneesh Kumar K.V 	/*
3885fd28784aSAneesh Kumar K.V 	 * Update only when preallocation was requested beyond
3886fd28784aSAneesh Kumar K.V 	 * the file size.
3887fd28784aSAneesh Kumar K.V 	 */
3888cf17fea6SAneesh Kumar K.V 	if (!(mode & FALLOC_FL_KEEP_SIZE)) {
3889cf17fea6SAneesh Kumar K.V 		if (new_size > i_size_read(inode))
3890fd28784aSAneesh Kumar K.V 			i_size_write(inode, new_size);
3891cf17fea6SAneesh Kumar K.V 		if (new_size > EXT4_I(inode)->i_disksize)
3892cf17fea6SAneesh Kumar K.V 			ext4_update_i_disksize(inode, new_size);
3893c8d46e41SJiaying Zhang 	} else {
3894c8d46e41SJiaying Zhang 		/*
3895c8d46e41SJiaying Zhang 		 * Mark that we allocate beyond EOF so the subsequent truncate
3896c8d46e41SJiaying Zhang 		 * can proceed even if the new size is the same as i_size.
3897c8d46e41SJiaying Zhang 		 */
3898c8d46e41SJiaying Zhang 		if (new_size > i_size_read(inode))
389912e9b892SDmitry Monakhov 			ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
3900fd28784aSAneesh Kumar K.V 	}
3901fd28784aSAneesh Kumar K.V 
3902fd28784aSAneesh Kumar K.V }
3903fd28784aSAneesh Kumar K.V 
3904a2df2a63SAmit Arora /*
39052fe17c10SChristoph Hellwig  * preallocate space for a file. This implements ext4's fallocate file
3906a2df2a63SAmit Arora  * operation, which gets called from sys_fallocate system call.
3907a2df2a63SAmit Arora  * For block-mapped files, posix_fallocate should fall back to the method
3908a2df2a63SAmit Arora  * of writing zeroes to the required new blocks (the same behavior which is
3909a2df2a63SAmit Arora  * expected for file systems which do not support fallocate() system call).
3910a2df2a63SAmit Arora  */
39112fe17c10SChristoph Hellwig long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
3912a2df2a63SAmit Arora {
39132fe17c10SChristoph Hellwig 	struct inode *inode = file->f_path.dentry->d_inode;
3914a2df2a63SAmit Arora 	handle_t *handle;
3915fd28784aSAneesh Kumar K.V 	loff_t new_size;
3916498e5f24STheodore Ts'o 	unsigned int max_blocks;
3917a2df2a63SAmit Arora 	int ret = 0;
3918a2df2a63SAmit Arora 	int ret2 = 0;
3919a2df2a63SAmit Arora 	int retries = 0;
39202ed88685STheodore Ts'o 	struct ext4_map_blocks map;
3921a2df2a63SAmit Arora 	unsigned int credits, blkbits = inode->i_blkbits;
3922a2df2a63SAmit Arora 
3923a2df2a63SAmit Arora 	/*
3924a2df2a63SAmit Arora 	 * currently supporting (pre)allocate mode for extent-based
3925a2df2a63SAmit Arora 	 * files _only_
3926a2df2a63SAmit Arora 	 */
392712e9b892SDmitry Monakhov 	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
3928a2df2a63SAmit Arora 		return -EOPNOTSUPP;
3929a2df2a63SAmit Arora 
3930a4bb6b64SAllison Henderson 	/* Return error if mode is not supported */
3931a4bb6b64SAllison Henderson 	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
3932a4bb6b64SAllison Henderson 		return -EOPNOTSUPP;
3933a4bb6b64SAllison Henderson 
3934a4bb6b64SAllison Henderson 	if (mode & FALLOC_FL_PUNCH_HOLE)
3935a4bb6b64SAllison Henderson 		return ext4_punch_hole(file, offset, len);
3936a4bb6b64SAllison Henderson 
39370562e0baSJiaying Zhang 	trace_ext4_fallocate_enter(inode, offset, len, mode);
39382ed88685STheodore Ts'o 	map.m_lblk = offset >> blkbits;
3939fd28784aSAneesh Kumar K.V 	/*
3940fd28784aSAneesh Kumar K.V 	 * We can't just convert len to max_blocks because
3941fd28784aSAneesh Kumar K.V 	 * If blocksize = 4096 offset = 3072 and len = 2048
3942fd28784aSAneesh Kumar K.V 	 */
3943a2df2a63SAmit Arora 	max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
39442ed88685STheodore Ts'o 		- map.m_lblk;
3945a2df2a63SAmit Arora 	/*
3946f3bd1f3fSMingming Cao 	 * credits to insert 1 extent into extent tree
3947a2df2a63SAmit Arora 	 */
3948f3bd1f3fSMingming Cao 	credits = ext4_chunk_trans_blocks(inode, max_blocks);
394955bd725aSAneesh Kumar K.V 	mutex_lock(&inode->i_mutex);
39506d19c42bSNikanth Karthikesan 	ret = inode_newsize_ok(inode, (len + offset));
39516d19c42bSNikanth Karthikesan 	if (ret) {
39526d19c42bSNikanth Karthikesan 		mutex_unlock(&inode->i_mutex);
39530562e0baSJiaying Zhang 		trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
39546d19c42bSNikanth Karthikesan 		return ret;
39556d19c42bSNikanth Karthikesan 	}
3956a2df2a63SAmit Arora retry:
3957a2df2a63SAmit Arora 	while (ret >= 0 && ret < max_blocks) {
39582ed88685STheodore Ts'o 		map.m_lblk = map.m_lblk + ret;
39592ed88685STheodore Ts'o 		map.m_len = max_blocks = max_blocks - ret;
3960a2df2a63SAmit Arora 		handle = ext4_journal_start(inode, credits);
3961a2df2a63SAmit Arora 		if (IS_ERR(handle)) {
3962a2df2a63SAmit Arora 			ret = PTR_ERR(handle);
3963a2df2a63SAmit Arora 			break;
3964a2df2a63SAmit Arora 		}
39652ed88685STheodore Ts'o 		ret = ext4_map_blocks(handle, inode, &map,
3966556b27abSVivek Haldar 				      EXT4_GET_BLOCKS_CREATE_UNINIT_EXT |
3967556b27abSVivek Haldar 				      EXT4_GET_BLOCKS_NO_NORMALIZE);
3968221879c9SAneesh Kumar K.V 		if (ret <= 0) {
39692c98615dSAneesh Kumar K.V #ifdef EXT4FS_DEBUG
39702c98615dSAneesh Kumar K.V 			WARN_ON(ret <= 0);
3971e35fd660STheodore Ts'o 			printk(KERN_ERR "%s: ext4_ext_map_blocks "
39722c98615dSAneesh Kumar K.V 				    "returned error inode#%lu, block=%u, "
39739fd9784cSThadeu Lima de Souza Cascardo 				    "max_blocks=%u", __func__,
3974a6371b63SKazuya Mio 				    inode->i_ino, map.m_lblk, max_blocks);
39752c98615dSAneesh Kumar K.V #endif
3976a2df2a63SAmit Arora 			ext4_mark_inode_dirty(handle, inode);
3977a2df2a63SAmit Arora 			ret2 = ext4_journal_stop(handle);
3978a2df2a63SAmit Arora 			break;
3979a2df2a63SAmit Arora 		}
39802ed88685STheodore Ts'o 		if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
3981fd28784aSAneesh Kumar K.V 						blkbits) >> blkbits))
3982fd28784aSAneesh Kumar K.V 			new_size = offset + len;
3983fd28784aSAneesh Kumar K.V 		else
398429ae07b7SUtako Kusaka 			new_size = ((loff_t) map.m_lblk + ret) << blkbits;
3985a2df2a63SAmit Arora 
3986fd28784aSAneesh Kumar K.V 		ext4_falloc_update_inode(inode, mode, new_size,
39872ed88685STheodore Ts'o 					 (map.m_flags & EXT4_MAP_NEW));
3988a2df2a63SAmit Arora 		ext4_mark_inode_dirty(handle, inode);
3989a2df2a63SAmit Arora 		ret2 = ext4_journal_stop(handle);
3990a2df2a63SAmit Arora 		if (ret2)
3991a2df2a63SAmit Arora 			break;
3992a2df2a63SAmit Arora 	}
3993fd28784aSAneesh Kumar K.V 	if (ret == -ENOSPC &&
3994fd28784aSAneesh Kumar K.V 			ext4_should_retry_alloc(inode->i_sb, &retries)) {
3995fd28784aSAneesh Kumar K.V 		ret = 0;
3996a2df2a63SAmit Arora 		goto retry;
3997a2df2a63SAmit Arora 	}
399855bd725aSAneesh Kumar K.V 	mutex_unlock(&inode->i_mutex);
39990562e0baSJiaying Zhang 	trace_ext4_fallocate_exit(inode, offset, max_blocks,
40000562e0baSJiaying Zhang 				ret > 0 ? ret2 : ret);
4001a2df2a63SAmit Arora 	return ret > 0 ? ret2 : ret;
4002a2df2a63SAmit Arora }
40036873fa0dSEric Sandeen 
40046873fa0dSEric Sandeen /*
40050031462bSMingming Cao  * This function convert a range of blocks to written extents
40060031462bSMingming Cao  * The caller of this function will pass the start offset and the size.
40070031462bSMingming Cao  * all unwritten extents within this range will be converted to
40080031462bSMingming Cao  * written extents.
40090031462bSMingming Cao  *
40100031462bSMingming Cao  * This function is called from the direct IO end io call back
40110031462bSMingming Cao  * function, to convert the fallocated extents after IO is completed.
4012109f5565SMingming  * Returns 0 on success.
40130031462bSMingming Cao  */
40140031462bSMingming Cao int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
4015a1de02dcSEric Sandeen 				    ssize_t len)
40160031462bSMingming Cao {
40170031462bSMingming Cao 	handle_t *handle;
40180031462bSMingming Cao 	unsigned int max_blocks;
40190031462bSMingming Cao 	int ret = 0;
40200031462bSMingming Cao 	int ret2 = 0;
40212ed88685STheodore Ts'o 	struct ext4_map_blocks map;
40220031462bSMingming Cao 	unsigned int credits, blkbits = inode->i_blkbits;
40230031462bSMingming Cao 
40242ed88685STheodore Ts'o 	map.m_lblk = offset >> blkbits;
40250031462bSMingming Cao 	/*
40260031462bSMingming Cao 	 * We can't just convert len to max_blocks because
40270031462bSMingming Cao 	 * If blocksize = 4096 offset = 3072 and len = 2048
40280031462bSMingming Cao 	 */
40292ed88685STheodore Ts'o 	max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -
40302ed88685STheodore Ts'o 		      map.m_lblk);
40310031462bSMingming Cao 	/*
40320031462bSMingming Cao 	 * credits to insert 1 extent into extent tree
40330031462bSMingming Cao 	 */
40340031462bSMingming Cao 	credits = ext4_chunk_trans_blocks(inode, max_blocks);
40350031462bSMingming Cao 	while (ret >= 0 && ret < max_blocks) {
40362ed88685STheodore Ts'o 		map.m_lblk += ret;
40372ed88685STheodore Ts'o 		map.m_len = (max_blocks -= ret);
40380031462bSMingming Cao 		handle = ext4_journal_start(inode, credits);
40390031462bSMingming Cao 		if (IS_ERR(handle)) {
40400031462bSMingming Cao 			ret = PTR_ERR(handle);
40410031462bSMingming Cao 			break;
40420031462bSMingming Cao 		}
40432ed88685STheodore Ts'o 		ret = ext4_map_blocks(handle, inode, &map,
4044c7064ef1SJiaying Zhang 				      EXT4_GET_BLOCKS_IO_CONVERT_EXT);
40450031462bSMingming Cao 		if (ret <= 0) {
40460031462bSMingming Cao 			WARN_ON(ret <= 0);
4047e35fd660STheodore Ts'o 			printk(KERN_ERR "%s: ext4_ext_map_blocks "
40480031462bSMingming Cao 				    "returned error inode#%lu, block=%u, "
40490031462bSMingming Cao 				    "max_blocks=%u", __func__,
40502ed88685STheodore Ts'o 				    inode->i_ino, map.m_lblk, map.m_len);
40510031462bSMingming Cao 		}
40520031462bSMingming Cao 		ext4_mark_inode_dirty(handle, inode);
40530031462bSMingming Cao 		ret2 = ext4_journal_stop(handle);
40540031462bSMingming Cao 		if (ret <= 0 || ret2 )
40550031462bSMingming Cao 			break;
40560031462bSMingming Cao 	}
40570031462bSMingming Cao 	return ret > 0 ? ret2 : ret;
40580031462bSMingming Cao }
40596d9c85ebSYongqiang Yang 
40600031462bSMingming Cao /*
40616873fa0dSEric Sandeen  * Callback function called for each extent to gather FIEMAP information.
40626873fa0dSEric Sandeen  */
4063c03f8aa9SLukas Czerner static int ext4_ext_fiemap_cb(struct inode *inode, ext4_lblk_t next,
40646873fa0dSEric Sandeen 		       struct ext4_ext_cache *newex, struct ext4_extent *ex,
40656873fa0dSEric Sandeen 		       void *data)
40666873fa0dSEric Sandeen {
40676873fa0dSEric Sandeen 	__u64	logical;
40686873fa0dSEric Sandeen 	__u64	physical;
40696873fa0dSEric Sandeen 	__u64	length;
40706873fa0dSEric Sandeen 	__u32	flags = 0;
40716d9c85ebSYongqiang Yang 	int		ret = 0;
40726d9c85ebSYongqiang Yang 	struct fiemap_extent_info *fieinfo = data;
40736d9c85ebSYongqiang Yang 	unsigned char blksize_bits;
40746873fa0dSEric Sandeen 
40756d9c85ebSYongqiang Yang 	blksize_bits = inode->i_sb->s_blocksize_bits;
40766873fa0dSEric Sandeen 	logical = (__u64)newex->ec_block << blksize_bits;
40776873fa0dSEric Sandeen 
4078b05e6ae5STheodore Ts'o 	if (newex->ec_start == 0) {
40796d9c85ebSYongqiang Yang 		/*
40806d9c85ebSYongqiang Yang 		 * No extent in extent-tree contains block @newex->ec_start,
40816d9c85ebSYongqiang Yang 		 * then the block may stay in 1)a hole or 2)delayed-extent.
40826d9c85ebSYongqiang Yang 		 *
40836d9c85ebSYongqiang Yang 		 * Holes or delayed-extents are processed as follows.
40846d9c85ebSYongqiang Yang 		 * 1. lookup dirty pages with specified range in pagecache.
40856d9c85ebSYongqiang Yang 		 *    If no page is got, then there is no delayed-extent and
40866d9c85ebSYongqiang Yang 		 *    return with EXT_CONTINUE.
40876d9c85ebSYongqiang Yang 		 * 2. find the 1st mapped buffer,
40886d9c85ebSYongqiang Yang 		 * 3. check if the mapped buffer is both in the request range
40896d9c85ebSYongqiang Yang 		 *    and a delayed buffer. If not, there is no delayed-extent,
40906d9c85ebSYongqiang Yang 		 *    then return.
40916d9c85ebSYongqiang Yang 		 * 4. a delayed-extent is found, the extent will be collected.
40926d9c85ebSYongqiang Yang 		 */
40936d9c85ebSYongqiang Yang 		ext4_lblk_t	end = 0;
40946d9c85ebSYongqiang Yang 		pgoff_t		last_offset;
40956873fa0dSEric Sandeen 		pgoff_t		offset;
40966d9c85ebSYongqiang Yang 		pgoff_t		index;
4097b221349fSYongqiang Yang 		pgoff_t		start_index = 0;
40986d9c85ebSYongqiang Yang 		struct page	**pages = NULL;
40996873fa0dSEric Sandeen 		struct buffer_head *bh = NULL;
41006d9c85ebSYongqiang Yang 		struct buffer_head *head = NULL;
41016d9c85ebSYongqiang Yang 		unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
41026d9c85ebSYongqiang Yang 
41036d9c85ebSYongqiang Yang 		pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
41046d9c85ebSYongqiang Yang 		if (pages == NULL)
41056d9c85ebSYongqiang Yang 			return -ENOMEM;
41066873fa0dSEric Sandeen 
41076873fa0dSEric Sandeen 		offset = logical >> PAGE_SHIFT;
41086d9c85ebSYongqiang Yang repeat:
41096d9c85ebSYongqiang Yang 		last_offset = offset;
41106d9c85ebSYongqiang Yang 		head = NULL;
41116d9c85ebSYongqiang Yang 		ret = find_get_pages_tag(inode->i_mapping, &offset,
41126d9c85ebSYongqiang Yang 					PAGECACHE_TAG_DIRTY, nr_pages, pages);
41136873fa0dSEric Sandeen 
41146d9c85ebSYongqiang Yang 		if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
41156d9c85ebSYongqiang Yang 			/* First time, try to find a mapped buffer. */
41166d9c85ebSYongqiang Yang 			if (ret == 0) {
41176d9c85ebSYongqiang Yang out:
41186d9c85ebSYongqiang Yang 				for (index = 0; index < ret; index++)
41196d9c85ebSYongqiang Yang 					page_cache_release(pages[index]);
41206d9c85ebSYongqiang Yang 				/* just a hole. */
41216d9c85ebSYongqiang Yang 				kfree(pages);
41226873fa0dSEric Sandeen 				return EXT_CONTINUE;
41236873fa0dSEric Sandeen 			}
4124b221349fSYongqiang Yang 			index = 0;
41256d9c85ebSYongqiang Yang 
4126b221349fSYongqiang Yang next_page:
41276d9c85ebSYongqiang Yang 			/* Try to find the 1st mapped buffer. */
4128b221349fSYongqiang Yang 			end = ((__u64)pages[index]->index << PAGE_SHIFT) >>
41296d9c85ebSYongqiang Yang 				  blksize_bits;
4130b221349fSYongqiang Yang 			if (!page_has_buffers(pages[index]))
41316d9c85ebSYongqiang Yang 				goto out;
4132b221349fSYongqiang Yang 			head = page_buffers(pages[index]);
41336d9c85ebSYongqiang Yang 			if (!head)
41346d9c85ebSYongqiang Yang 				goto out;
41356d9c85ebSYongqiang Yang 
4136b221349fSYongqiang Yang 			index++;
41376d9c85ebSYongqiang Yang 			bh = head;
41386d9c85ebSYongqiang Yang 			do {
4139b221349fSYongqiang Yang 				if (end >= newex->ec_block +
41406d9c85ebSYongqiang Yang 					newex->ec_len)
41416d9c85ebSYongqiang Yang 					/* The buffer is out of
41426d9c85ebSYongqiang Yang 					 * the request range.
41436d9c85ebSYongqiang Yang 					 */
41446d9c85ebSYongqiang Yang 					goto out;
4145b221349fSYongqiang Yang 
4146b221349fSYongqiang Yang 				if (buffer_mapped(bh) &&
4147b221349fSYongqiang Yang 				    end >= newex->ec_block) {
4148b221349fSYongqiang Yang 					start_index = index - 1;
4149b221349fSYongqiang Yang 					/* get the 1st mapped buffer. */
41506d9c85ebSYongqiang Yang 					goto found_mapped_buffer;
41516d9c85ebSYongqiang Yang 				}
4152b221349fSYongqiang Yang 
41536d9c85ebSYongqiang Yang 				bh = bh->b_this_page;
41546d9c85ebSYongqiang Yang 				end++;
41556d9c85ebSYongqiang Yang 			} while (bh != head);
41566d9c85ebSYongqiang Yang 
4157b221349fSYongqiang Yang 			/* No mapped buffer in the range found in this page,
4158b221349fSYongqiang Yang 			 * We need to look up next page.
4159b221349fSYongqiang Yang 			 */
4160b221349fSYongqiang Yang 			if (index >= ret) {
4161b221349fSYongqiang Yang 				/* There is no page left, but we need to limit
4162b221349fSYongqiang Yang 				 * newex->ec_len.
4163b221349fSYongqiang Yang 				 */
4164b221349fSYongqiang Yang 				newex->ec_len = end - newex->ec_block;
41656d9c85ebSYongqiang Yang 				goto out;
4166b221349fSYongqiang Yang 			}
4167b221349fSYongqiang Yang 			goto next_page;
41686d9c85ebSYongqiang Yang 		} else {
41696d9c85ebSYongqiang Yang 			/*Find contiguous delayed buffers. */
41706d9c85ebSYongqiang Yang 			if (ret > 0 && pages[0]->index == last_offset)
41716d9c85ebSYongqiang Yang 				head = page_buffers(pages[0]);
41726d9c85ebSYongqiang Yang 			bh = head;
4173b221349fSYongqiang Yang 			index = 1;
4174b221349fSYongqiang Yang 			start_index = 0;
41756d9c85ebSYongqiang Yang 		}
41766d9c85ebSYongqiang Yang 
41776d9c85ebSYongqiang Yang found_mapped_buffer:
41786d9c85ebSYongqiang Yang 		if (bh != NULL && buffer_delay(bh)) {
41796d9c85ebSYongqiang Yang 			/* 1st or contiguous delayed buffer found. */
41806d9c85ebSYongqiang Yang 			if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
41816d9c85ebSYongqiang Yang 				/*
41826d9c85ebSYongqiang Yang 				 * 1st delayed buffer found, record
41836d9c85ebSYongqiang Yang 				 * the start of extent.
41846d9c85ebSYongqiang Yang 				 */
41856d9c85ebSYongqiang Yang 				flags |= FIEMAP_EXTENT_DELALLOC;
41866d9c85ebSYongqiang Yang 				newex->ec_block = end;
41876d9c85ebSYongqiang Yang 				logical = (__u64)end << blksize_bits;
41886d9c85ebSYongqiang Yang 			}
41896d9c85ebSYongqiang Yang 			/* Find contiguous delayed buffers. */
41906d9c85ebSYongqiang Yang 			do {
41916d9c85ebSYongqiang Yang 				if (!buffer_delay(bh))
41926d9c85ebSYongqiang Yang 					goto found_delayed_extent;
41936d9c85ebSYongqiang Yang 				bh = bh->b_this_page;
41946d9c85ebSYongqiang Yang 				end++;
41956d9c85ebSYongqiang Yang 			} while (bh != head);
41966d9c85ebSYongqiang Yang 
4197b221349fSYongqiang Yang 			for (; index < ret; index++) {
41986d9c85ebSYongqiang Yang 				if (!page_has_buffers(pages[index])) {
41996d9c85ebSYongqiang Yang 					bh = NULL;
42006d9c85ebSYongqiang Yang 					break;
42016d9c85ebSYongqiang Yang 				}
42026d9c85ebSYongqiang Yang 				head = page_buffers(pages[index]);
42036d9c85ebSYongqiang Yang 				if (!head) {
42046d9c85ebSYongqiang Yang 					bh = NULL;
42056d9c85ebSYongqiang Yang 					break;
42066d9c85ebSYongqiang Yang 				}
4207b221349fSYongqiang Yang 
42086d9c85ebSYongqiang Yang 				if (pages[index]->index !=
4209b221349fSYongqiang Yang 				    pages[start_index]->index + index
4210b221349fSYongqiang Yang 				    - start_index) {
42116d9c85ebSYongqiang Yang 					/* Blocks are not contiguous. */
42126d9c85ebSYongqiang Yang 					bh = NULL;
42136d9c85ebSYongqiang Yang 					break;
42146d9c85ebSYongqiang Yang 				}
42156d9c85ebSYongqiang Yang 				bh = head;
42166d9c85ebSYongqiang Yang 				do {
42176d9c85ebSYongqiang Yang 					if (!buffer_delay(bh))
42186d9c85ebSYongqiang Yang 						/* Delayed-extent ends. */
42196d9c85ebSYongqiang Yang 						goto found_delayed_extent;
42206d9c85ebSYongqiang Yang 					bh = bh->b_this_page;
42216d9c85ebSYongqiang Yang 					end++;
42226d9c85ebSYongqiang Yang 				} while (bh != head);
42236d9c85ebSYongqiang Yang 			}
42246d9c85ebSYongqiang Yang 		} else if (!(flags & FIEMAP_EXTENT_DELALLOC))
42256d9c85ebSYongqiang Yang 			/* a hole found. */
42266d9c85ebSYongqiang Yang 			goto out;
42276d9c85ebSYongqiang Yang 
42286d9c85ebSYongqiang Yang found_delayed_extent:
42296d9c85ebSYongqiang Yang 		newex->ec_len = min(end - newex->ec_block,
42306d9c85ebSYongqiang Yang 						(ext4_lblk_t)EXT_INIT_MAX_LEN);
42316d9c85ebSYongqiang Yang 		if (ret == nr_pages && bh != NULL &&
42326d9c85ebSYongqiang Yang 			newex->ec_len < EXT_INIT_MAX_LEN &&
42336d9c85ebSYongqiang Yang 			buffer_delay(bh)) {
42346d9c85ebSYongqiang Yang 			/* Have not collected an extent and continue. */
42356d9c85ebSYongqiang Yang 			for (index = 0; index < ret; index++)
42366d9c85ebSYongqiang Yang 				page_cache_release(pages[index]);
42376d9c85ebSYongqiang Yang 			goto repeat;
42386d9c85ebSYongqiang Yang 		}
42396d9c85ebSYongqiang Yang 
42406d9c85ebSYongqiang Yang 		for (index = 0; index < ret; index++)
42416d9c85ebSYongqiang Yang 			page_cache_release(pages[index]);
42426d9c85ebSYongqiang Yang 		kfree(pages);
42436873fa0dSEric Sandeen 	}
42446873fa0dSEric Sandeen 
42456873fa0dSEric Sandeen 	physical = (__u64)newex->ec_start << blksize_bits;
42466873fa0dSEric Sandeen 	length =   (__u64)newex->ec_len << blksize_bits;
42476873fa0dSEric Sandeen 
42486873fa0dSEric Sandeen 	if (ex && ext4_ext_is_uninitialized(ex))
42496873fa0dSEric Sandeen 		flags |= FIEMAP_EXTENT_UNWRITTEN;
42506873fa0dSEric Sandeen 
4251c03f8aa9SLukas Czerner 	if (next == EXT_MAX_BLOCKS)
42526873fa0dSEric Sandeen 		flags |= FIEMAP_EXTENT_LAST;
42536873fa0dSEric Sandeen 
42546d9c85ebSYongqiang Yang 	ret = fiemap_fill_next_extent(fieinfo, logical, physical,
42556873fa0dSEric Sandeen 					length, flags);
42566d9c85ebSYongqiang Yang 	if (ret < 0)
42576d9c85ebSYongqiang Yang 		return ret;
42586d9c85ebSYongqiang Yang 	if (ret == 1)
42596873fa0dSEric Sandeen 		return EXT_BREAK;
42606873fa0dSEric Sandeen 	return EXT_CONTINUE;
42616873fa0dSEric Sandeen }
42626873fa0dSEric Sandeen /* fiemap flags we can handle specified here */
42636873fa0dSEric Sandeen #define EXT4_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
42646873fa0dSEric Sandeen 
42653a06d778SAneesh Kumar K.V static int ext4_xattr_fiemap(struct inode *inode,
42663a06d778SAneesh Kumar K.V 				struct fiemap_extent_info *fieinfo)
42676873fa0dSEric Sandeen {
42686873fa0dSEric Sandeen 	__u64 physical = 0;
42696873fa0dSEric Sandeen 	__u64 length;
42706873fa0dSEric Sandeen 	__u32 flags = FIEMAP_EXTENT_LAST;
42716873fa0dSEric Sandeen 	int blockbits = inode->i_sb->s_blocksize_bits;
42726873fa0dSEric Sandeen 	int error = 0;
42736873fa0dSEric Sandeen 
42746873fa0dSEric Sandeen 	/* in-inode? */
427519f5fb7aSTheodore Ts'o 	if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
42766873fa0dSEric Sandeen 		struct ext4_iloc iloc;
42776873fa0dSEric Sandeen 		int offset;	/* offset of xattr in inode */
42786873fa0dSEric Sandeen 
42796873fa0dSEric Sandeen 		error = ext4_get_inode_loc(inode, &iloc);
42806873fa0dSEric Sandeen 		if (error)
42816873fa0dSEric Sandeen 			return error;
42826873fa0dSEric Sandeen 		physical = iloc.bh->b_blocknr << blockbits;
42836873fa0dSEric Sandeen 		offset = EXT4_GOOD_OLD_INODE_SIZE +
42846873fa0dSEric Sandeen 				EXT4_I(inode)->i_extra_isize;
42856873fa0dSEric Sandeen 		physical += offset;
42866873fa0dSEric Sandeen 		length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
42876873fa0dSEric Sandeen 		flags |= FIEMAP_EXTENT_DATA_INLINE;
4288fd2dd9fbSCurt Wohlgemuth 		brelse(iloc.bh);
42896873fa0dSEric Sandeen 	} else { /* external block */
42906873fa0dSEric Sandeen 		physical = EXT4_I(inode)->i_file_acl << blockbits;
42916873fa0dSEric Sandeen 		length = inode->i_sb->s_blocksize;
42926873fa0dSEric Sandeen 	}
42936873fa0dSEric Sandeen 
42946873fa0dSEric Sandeen 	if (physical)
42956873fa0dSEric Sandeen 		error = fiemap_fill_next_extent(fieinfo, 0, physical,
42966873fa0dSEric Sandeen 						length, flags);
42976873fa0dSEric Sandeen 	return (error < 0 ? error : 0);
42986873fa0dSEric Sandeen }
42996873fa0dSEric Sandeen 
4300a4bb6b64SAllison Henderson /*
4301a4bb6b64SAllison Henderson  * ext4_ext_punch_hole
4302a4bb6b64SAllison Henderson  *
4303a4bb6b64SAllison Henderson  * Punches a hole of "length" bytes in a file starting
4304a4bb6b64SAllison Henderson  * at byte "offset"
4305a4bb6b64SAllison Henderson  *
4306a4bb6b64SAllison Henderson  * @inode:  The inode of the file to punch a hole in
4307a4bb6b64SAllison Henderson  * @offset: The starting byte offset of the hole
4308a4bb6b64SAllison Henderson  * @length: The length of the hole
4309a4bb6b64SAllison Henderson  *
4310a4bb6b64SAllison Henderson  * Returns the number of blocks removed or negative on err
4311a4bb6b64SAllison Henderson  */
4312a4bb6b64SAllison Henderson int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4313a4bb6b64SAllison Henderson {
4314a4bb6b64SAllison Henderson 	struct inode *inode = file->f_path.dentry->d_inode;
4315a4bb6b64SAllison Henderson 	struct super_block *sb = inode->i_sb;
4316a4bb6b64SAllison Henderson 	struct ext4_ext_cache cache_ex;
4317a4bb6b64SAllison Henderson 	ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks;
4318a4bb6b64SAllison Henderson 	struct address_space *mapping = inode->i_mapping;
4319a4bb6b64SAllison Henderson 	struct ext4_map_blocks map;
4320a4bb6b64SAllison Henderson 	handle_t *handle;
4321ba06208aSAllison Henderson 	loff_t first_page, last_page, page_len;
4322ba06208aSAllison Henderson 	loff_t first_page_offset, last_page_offset;
4323a4bb6b64SAllison Henderson 	int ret, credits, blocks_released, err = 0;
4324a4bb6b64SAllison Henderson 
43252be4751bSAllison Henderson 	/* No need to punch hole beyond i_size */
43262be4751bSAllison Henderson 	if (offset >= inode->i_size)
43272be4751bSAllison Henderson 		return 0;
43282be4751bSAllison Henderson 
43292be4751bSAllison Henderson 	/*
43302be4751bSAllison Henderson 	 * If the hole extends beyond i_size, set the hole
43312be4751bSAllison Henderson 	 * to end after the page that contains i_size
43322be4751bSAllison Henderson 	 */
43332be4751bSAllison Henderson 	if (offset + length > inode->i_size) {
43342be4751bSAllison Henderson 		length = inode->i_size +
43352be4751bSAllison Henderson 		   PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
43362be4751bSAllison Henderson 		   offset;
43372be4751bSAllison Henderson 	}
43382be4751bSAllison Henderson 
4339a4bb6b64SAllison Henderson 	first_block = (offset + sb->s_blocksize - 1) >>
4340a4bb6b64SAllison Henderson 		EXT4_BLOCK_SIZE_BITS(sb);
4341a4bb6b64SAllison Henderson 	last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
4342a4bb6b64SAllison Henderson 
4343a4bb6b64SAllison Henderson 	first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
4344a4bb6b64SAllison Henderson 	last_page = (offset + length) >> PAGE_CACHE_SHIFT;
4345a4bb6b64SAllison Henderson 
4346a4bb6b64SAllison Henderson 	first_page_offset = first_page << PAGE_CACHE_SHIFT;
4347a4bb6b64SAllison Henderson 	last_page_offset = last_page << PAGE_CACHE_SHIFT;
4348a4bb6b64SAllison Henderson 
4349a4bb6b64SAllison Henderson 	/*
4350a4bb6b64SAllison Henderson 	 * Write out all dirty pages to avoid race conditions
4351a4bb6b64SAllison Henderson 	 * Then release them.
4352a4bb6b64SAllison Henderson 	 */
4353a4bb6b64SAllison Henderson 	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4354a4bb6b64SAllison Henderson 		err = filemap_write_and_wait_range(mapping,
43552be4751bSAllison Henderson 			offset, offset + length - 1);
4356a4bb6b64SAllison Henderson 
4357a4bb6b64SAllison Henderson 		if (err)
4358a4bb6b64SAllison Henderson 			return err;
4359a4bb6b64SAllison Henderson 	}
4360a4bb6b64SAllison Henderson 
4361a4bb6b64SAllison Henderson 	/* Now release the pages */
4362a4bb6b64SAllison Henderson 	if (last_page_offset > first_page_offset) {
4363a4bb6b64SAllison Henderson 		truncate_inode_pages_range(mapping, first_page_offset,
4364a4bb6b64SAllison Henderson 					   last_page_offset-1);
4365a4bb6b64SAllison Henderson 	}
4366a4bb6b64SAllison Henderson 
4367a4bb6b64SAllison Henderson 	/* finish any pending end_io work */
4368a4bb6b64SAllison Henderson 	ext4_flush_completed_IO(inode);
4369a4bb6b64SAllison Henderson 
4370a4bb6b64SAllison Henderson 	credits = ext4_writepage_trans_blocks(inode);
4371a4bb6b64SAllison Henderson 	handle = ext4_journal_start(inode, credits);
4372a4bb6b64SAllison Henderson 	if (IS_ERR(handle))
4373a4bb6b64SAllison Henderson 		return PTR_ERR(handle);
4374a4bb6b64SAllison Henderson 
4375a4bb6b64SAllison Henderson 	err = ext4_orphan_add(handle, inode);
4376a4bb6b64SAllison Henderson 	if (err)
4377a4bb6b64SAllison Henderson 		goto out;
4378a4bb6b64SAllison Henderson 
4379a4bb6b64SAllison Henderson 	/*
4380ba06208aSAllison Henderson 	 * Now we need to zero out the non-page-aligned data in the
4381ba06208aSAllison Henderson 	 * pages at the start and tail of the hole, and unmap the buffer
4382ba06208aSAllison Henderson 	 * heads for the block aligned regions of the page that were
4383ba06208aSAllison Henderson 	 * completely zeroed.
4384a4bb6b64SAllison Henderson 	 */
4385ba06208aSAllison Henderson 	if (first_page > last_page) {
4386ba06208aSAllison Henderson 		/*
4387ba06208aSAllison Henderson 		 * If the file space being truncated is contained within a page
4388ba06208aSAllison Henderson 		 * just zero out and unmap the middle of that page
4389ba06208aSAllison Henderson 		 */
4390ba06208aSAllison Henderson 		err = ext4_discard_partial_page_buffers(handle,
4391ba06208aSAllison Henderson 			mapping, offset, length, 0);
4392a4bb6b64SAllison Henderson 
4393ba06208aSAllison Henderson 		if (err)
4394ba06208aSAllison Henderson 			goto out;
4395ba06208aSAllison Henderson 	} else {
4396ba06208aSAllison Henderson 		/*
4397ba06208aSAllison Henderson 		 * zero out and unmap the partial page that contains
4398ba06208aSAllison Henderson 		 * the start of the hole
4399ba06208aSAllison Henderson 		 */
4400ba06208aSAllison Henderson 		page_len  = first_page_offset - offset;
4401ba06208aSAllison Henderson 		if (page_len > 0) {
4402ba06208aSAllison Henderson 			err = ext4_discard_partial_page_buffers(handle, mapping,
4403ba06208aSAllison Henderson 						   offset, page_len, 0);
4404ba06208aSAllison Henderson 			if (err)
4405ba06208aSAllison Henderson 				goto out;
4406ba06208aSAllison Henderson 		}
4407ba06208aSAllison Henderson 
4408ba06208aSAllison Henderson 		/*
4409ba06208aSAllison Henderson 		 * zero out and unmap the partial page that contains
4410ba06208aSAllison Henderson 		 * the end of the hole
4411ba06208aSAllison Henderson 		 */
4412ba06208aSAllison Henderson 		page_len = offset + length - last_page_offset;
4413ba06208aSAllison Henderson 		if (page_len > 0) {
4414ba06208aSAllison Henderson 			err = ext4_discard_partial_page_buffers(handle, mapping,
4415ba06208aSAllison Henderson 					last_page_offset, page_len, 0);
4416ba06208aSAllison Henderson 			if (err)
4417ba06208aSAllison Henderson 				goto out;
4418a4bb6b64SAllison Henderson 		}
4419a4bb6b64SAllison Henderson 	}
4420a4bb6b64SAllison Henderson 
44212be4751bSAllison Henderson 
44222be4751bSAllison Henderson 	/*
44232be4751bSAllison Henderson 	 * If i_size is contained in the last page, we need to
44242be4751bSAllison Henderson 	 * unmap and zero the partial page after i_size
44252be4751bSAllison Henderson 	 */
44262be4751bSAllison Henderson 	if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
44272be4751bSAllison Henderson 	   inode->i_size % PAGE_CACHE_SIZE != 0) {
44282be4751bSAllison Henderson 
44292be4751bSAllison Henderson 		page_len = PAGE_CACHE_SIZE -
44302be4751bSAllison Henderson 			(inode->i_size & (PAGE_CACHE_SIZE - 1));
44312be4751bSAllison Henderson 
44322be4751bSAllison Henderson 		if (page_len > 0) {
44332be4751bSAllison Henderson 			err = ext4_discard_partial_page_buffers(handle,
44342be4751bSAllison Henderson 			  mapping, inode->i_size, page_len, 0);
44352be4751bSAllison Henderson 
44362be4751bSAllison Henderson 			if (err)
44372be4751bSAllison Henderson 				goto out;
44382be4751bSAllison Henderson 		}
44392be4751bSAllison Henderson 	}
44402be4751bSAllison Henderson 
4441a4bb6b64SAllison Henderson 	/* If there are no blocks to remove, return now */
4442a4bb6b64SAllison Henderson 	if (first_block >= last_block)
4443a4bb6b64SAllison Henderson 		goto out;
4444a4bb6b64SAllison Henderson 
4445a4bb6b64SAllison Henderson 	down_write(&EXT4_I(inode)->i_data_sem);
4446a4bb6b64SAllison Henderson 	ext4_ext_invalidate_cache(inode);
4447a4bb6b64SAllison Henderson 	ext4_discard_preallocations(inode);
4448a4bb6b64SAllison Henderson 
4449a4bb6b64SAllison Henderson 	/*
4450a4bb6b64SAllison Henderson 	 * Loop over all the blocks and identify blocks
4451a4bb6b64SAllison Henderson 	 * that need to be punched out
4452a4bb6b64SAllison Henderson 	 */
4453a4bb6b64SAllison Henderson 	iblock = first_block;
4454a4bb6b64SAllison Henderson 	blocks_released = 0;
4455a4bb6b64SAllison Henderson 	while (iblock < last_block) {
4456a4bb6b64SAllison Henderson 		max_blocks = last_block - iblock;
4457a4bb6b64SAllison Henderson 		num_blocks = 1;
4458a4bb6b64SAllison Henderson 		memset(&map, 0, sizeof(map));
4459a4bb6b64SAllison Henderson 		map.m_lblk = iblock;
4460a4bb6b64SAllison Henderson 		map.m_len = max_blocks;
4461a4bb6b64SAllison Henderson 		ret = ext4_ext_map_blocks(handle, inode, &map,
4462a4bb6b64SAllison Henderson 			EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
4463a4bb6b64SAllison Henderson 
4464a4bb6b64SAllison Henderson 		if (ret > 0) {
4465a4bb6b64SAllison Henderson 			blocks_released += ret;
4466a4bb6b64SAllison Henderson 			num_blocks = ret;
4467a4bb6b64SAllison Henderson 		} else if (ret == 0) {
4468a4bb6b64SAllison Henderson 			/*
4469a4bb6b64SAllison Henderson 			 * If map blocks could not find the block,
4470a4bb6b64SAllison Henderson 			 * then it is in a hole.  If the hole was
4471a4bb6b64SAllison Henderson 			 * not already cached, then map blocks should
4472a4bb6b64SAllison Henderson 			 * put it in the cache.  So we can get the hole
4473a4bb6b64SAllison Henderson 			 * out of the cache
4474a4bb6b64SAllison Henderson 			 */
4475a4bb6b64SAllison Henderson 			memset(&cache_ex, 0, sizeof(cache_ex));
4476a4bb6b64SAllison Henderson 			if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) &&
4477a4bb6b64SAllison Henderson 				!cache_ex.ec_start) {
4478a4bb6b64SAllison Henderson 
4479a4bb6b64SAllison Henderson 				/* The hole is cached */
4480a4bb6b64SAllison Henderson 				num_blocks = cache_ex.ec_block +
4481a4bb6b64SAllison Henderson 				cache_ex.ec_len - iblock;
4482a4bb6b64SAllison Henderson 
4483a4bb6b64SAllison Henderson 			} else {
4484a4bb6b64SAllison Henderson 				/* The block could not be identified */
4485a4bb6b64SAllison Henderson 				err = -EIO;
4486a4bb6b64SAllison Henderson 				break;
4487a4bb6b64SAllison Henderson 			}
4488a4bb6b64SAllison Henderson 		} else {
4489a4bb6b64SAllison Henderson 			/* Map blocks error */
4490a4bb6b64SAllison Henderson 			err = ret;
4491a4bb6b64SAllison Henderson 			break;
4492a4bb6b64SAllison Henderson 		}
4493a4bb6b64SAllison Henderson 
4494a4bb6b64SAllison Henderson 		if (num_blocks == 0) {
4495a4bb6b64SAllison Henderson 			/* This condition should never happen */
4496a4bb6b64SAllison Henderson 			ext_debug("Block lookup failed");
4497a4bb6b64SAllison Henderson 			err = -EIO;
4498a4bb6b64SAllison Henderson 			break;
4499a4bb6b64SAllison Henderson 		}
4500a4bb6b64SAllison Henderson 
4501a4bb6b64SAllison Henderson 		iblock += num_blocks;
4502a4bb6b64SAllison Henderson 	}
4503a4bb6b64SAllison Henderson 
4504a4bb6b64SAllison Henderson 	if (blocks_released > 0) {
4505a4bb6b64SAllison Henderson 		ext4_ext_invalidate_cache(inode);
4506a4bb6b64SAllison Henderson 		ext4_discard_preallocations(inode);
4507a4bb6b64SAllison Henderson 	}
4508a4bb6b64SAllison Henderson 
4509a4bb6b64SAllison Henderson 	if (IS_SYNC(inode))
4510a4bb6b64SAllison Henderson 		ext4_handle_sync(handle);
4511a4bb6b64SAllison Henderson 
4512a4bb6b64SAllison Henderson 	up_write(&EXT4_I(inode)->i_data_sem);
4513a4bb6b64SAllison Henderson 
4514a4bb6b64SAllison Henderson out:
4515a4bb6b64SAllison Henderson 	ext4_orphan_del(handle, inode);
4516a4bb6b64SAllison Henderson 	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4517a4bb6b64SAllison Henderson 	ext4_mark_inode_dirty(handle, inode);
4518a4bb6b64SAllison Henderson 	ext4_journal_stop(handle);
4519a4bb6b64SAllison Henderson 	return err;
4520a4bb6b64SAllison Henderson }
45216873fa0dSEric Sandeen int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
45226873fa0dSEric Sandeen 		__u64 start, __u64 len)
45236873fa0dSEric Sandeen {
45246873fa0dSEric Sandeen 	ext4_lblk_t start_blk;
45256873fa0dSEric Sandeen 	int error = 0;
45266873fa0dSEric Sandeen 
45276873fa0dSEric Sandeen 	/* fallback to generic here if not in extents fmt */
452812e9b892SDmitry Monakhov 	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
45296873fa0dSEric Sandeen 		return generic_block_fiemap(inode, fieinfo, start, len,
45306873fa0dSEric Sandeen 			ext4_get_block);
45316873fa0dSEric Sandeen 
45326873fa0dSEric Sandeen 	if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
45336873fa0dSEric Sandeen 		return -EBADR;
45346873fa0dSEric Sandeen 
45356873fa0dSEric Sandeen 	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
45366873fa0dSEric Sandeen 		error = ext4_xattr_fiemap(inode, fieinfo);
45376873fa0dSEric Sandeen 	} else {
4538aca92ff6SLeonard Michlmayr 		ext4_lblk_t len_blks;
4539aca92ff6SLeonard Michlmayr 		__u64 last_blk;
4540aca92ff6SLeonard Michlmayr 
45416873fa0dSEric Sandeen 		start_blk = start >> inode->i_sb->s_blocksize_bits;
4542aca92ff6SLeonard Michlmayr 		last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
4543f17722f9SLukas Czerner 		if (last_blk >= EXT_MAX_BLOCKS)
4544f17722f9SLukas Czerner 			last_blk = EXT_MAX_BLOCKS-1;
4545aca92ff6SLeonard Michlmayr 		len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
45466873fa0dSEric Sandeen 
45476873fa0dSEric Sandeen 		/*
45486873fa0dSEric Sandeen 		 * Walk the extent tree gathering extent information.
45496873fa0dSEric Sandeen 		 * ext4_ext_fiemap_cb will push extents back to user.
45506873fa0dSEric Sandeen 		 */
45516873fa0dSEric Sandeen 		error = ext4_ext_walk_space(inode, start_blk, len_blks,
45526873fa0dSEric Sandeen 					  ext4_ext_fiemap_cb, fieinfo);
45536873fa0dSEric Sandeen 	}
45546873fa0dSEric Sandeen 
45556873fa0dSEric Sandeen 	return error;
45566873fa0dSEric Sandeen }
4557