xref: /openbmc/linux/fs/ocfs2/extent_map.c (revision fa60ce2c)
1921a3d4dSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2*fa60ce2cSMasahiro Yamada /*
3ccd979bdSMark Fasheh  * extent_map.c
4ccd979bdSMark Fasheh  *
5363041a5SMark Fasheh  * Block/Cluster mapping functions
6ccd979bdSMark Fasheh  *
7ccd979bdSMark Fasheh  * Copyright (C) 2004 Oracle.  All rights reserved.
8ccd979bdSMark Fasheh  */
9ccd979bdSMark Fasheh 
10ccd979bdSMark Fasheh #include <linux/fs.h>
11ccd979bdSMark Fasheh #include <linux/init.h>
125a0e3ad6STejun Heo #include <linux/slab.h>
13ccd979bdSMark Fasheh #include <linux/types.h>
1400dc417fSMark Fasheh #include <linux/fiemap.h>
15ccd979bdSMark Fasheh 
16ccd979bdSMark Fasheh #include <cluster/masklog.h>
17ccd979bdSMark Fasheh 
18ccd979bdSMark Fasheh #include "ocfs2.h"
19ccd979bdSMark Fasheh 
20363041a5SMark Fasheh #include "alloc.h"
2100dc417fSMark Fasheh #include "dlmglue.h"
22ccd979bdSMark Fasheh #include "extent_map.h"
23ccd979bdSMark Fasheh #include "inode.h"
24ccd979bdSMark Fasheh #include "super.h"
2586239d59STristan Ye #include "symlink.h"
26ac604d3cSGang He #include "aops.h"
27a716357cSTao Ma #include "ocfs2_trace.h"
28ccd979bdSMark Fasheh 
29ccd979bdSMark Fasheh #include "buffer_head_io.h"
30ccd979bdSMark Fasheh 
31ccd979bdSMark Fasheh /*
3283418978SMark Fasheh  * The extent caching implementation is intentionally trivial.
3383418978SMark Fasheh  *
3483418978SMark Fasheh  * We only cache a small number of extents stored directly on the
3583418978SMark Fasheh  * inode, so linear order operations are acceptable. If we ever want
3683418978SMark Fasheh  * to increase the size of the extent map, then these algorithms must
3783418978SMark Fasheh  * get smarter.
3883418978SMark Fasheh  */
3983418978SMark Fasheh 
ocfs2_extent_map_init(struct inode * inode)4083418978SMark Fasheh void ocfs2_extent_map_init(struct inode *inode)
4183418978SMark Fasheh {
4283418978SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
4383418978SMark Fasheh 
4483418978SMark Fasheh 	oi->ip_extent_map.em_num_items = 0;
4583418978SMark Fasheh 	INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
4683418978SMark Fasheh }
4783418978SMark Fasheh 
__ocfs2_extent_map_lookup(struct ocfs2_extent_map * em,unsigned int cpos,struct ocfs2_extent_map_item ** ret_emi)4883418978SMark Fasheh static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
4983418978SMark Fasheh 				      unsigned int cpos,
5083418978SMark Fasheh 				      struct ocfs2_extent_map_item **ret_emi)
5183418978SMark Fasheh {
5283418978SMark Fasheh 	unsigned int range;
5383418978SMark Fasheh 	struct ocfs2_extent_map_item *emi;
5483418978SMark Fasheh 
5583418978SMark Fasheh 	*ret_emi = NULL;
5683418978SMark Fasheh 
5783418978SMark Fasheh 	list_for_each_entry(emi, &em->em_list, ei_list) {
5883418978SMark Fasheh 		range = emi->ei_cpos + emi->ei_clusters;
5983418978SMark Fasheh 
6083418978SMark Fasheh 		if (cpos >= emi->ei_cpos && cpos < range) {
6183418978SMark Fasheh 			list_move(&emi->ei_list, &em->em_list);
6283418978SMark Fasheh 
6383418978SMark Fasheh 			*ret_emi = emi;
6483418978SMark Fasheh 			break;
6583418978SMark Fasheh 		}
6683418978SMark Fasheh 	}
6783418978SMark Fasheh }
6883418978SMark Fasheh 
ocfs2_extent_map_lookup(struct inode * inode,unsigned int cpos,unsigned int * phys,unsigned int * len,unsigned int * flags)6983418978SMark Fasheh static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
7083418978SMark Fasheh 				   unsigned int *phys, unsigned int *len,
7183418978SMark Fasheh 				   unsigned int *flags)
7283418978SMark Fasheh {
7383418978SMark Fasheh 	unsigned int coff;
7483418978SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
7583418978SMark Fasheh 	struct ocfs2_extent_map_item *emi;
7683418978SMark Fasheh 
7783418978SMark Fasheh 	spin_lock(&oi->ip_lock);
7883418978SMark Fasheh 
7983418978SMark Fasheh 	__ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
8083418978SMark Fasheh 	if (emi) {
8183418978SMark Fasheh 		coff = cpos - emi->ei_cpos;
8283418978SMark Fasheh 		*phys = emi->ei_phys + coff;
8383418978SMark Fasheh 		if (len)
8483418978SMark Fasheh 			*len = emi->ei_clusters - coff;
8583418978SMark Fasheh 		if (flags)
8683418978SMark Fasheh 			*flags = emi->ei_flags;
8783418978SMark Fasheh 	}
8883418978SMark Fasheh 
8983418978SMark Fasheh 	spin_unlock(&oi->ip_lock);
9083418978SMark Fasheh 
9183418978SMark Fasheh 	if (emi == NULL)
9283418978SMark Fasheh 		return -ENOENT;
9383418978SMark Fasheh 
9483418978SMark Fasheh 	return 0;
9583418978SMark Fasheh }
9683418978SMark Fasheh 
9783418978SMark Fasheh /*
9883418978SMark Fasheh  * Forget about all clusters equal to or greater than cpos.
9983418978SMark Fasheh  */
ocfs2_extent_map_trunc(struct inode * inode,unsigned int cpos)10083418978SMark Fasheh void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
10183418978SMark Fasheh {
102800deef3SChristoph Hellwig 	struct ocfs2_extent_map_item *emi, *n;
10383418978SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
10483418978SMark Fasheh 	struct ocfs2_extent_map *em = &oi->ip_extent_map;
10583418978SMark Fasheh 	LIST_HEAD(tmp_list);
10683418978SMark Fasheh 	unsigned int range;
10783418978SMark Fasheh 
10883418978SMark Fasheh 	spin_lock(&oi->ip_lock);
109800deef3SChristoph Hellwig 	list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
11083418978SMark Fasheh 		if (emi->ei_cpos >= cpos) {
11183418978SMark Fasheh 			/* Full truncate of this record. */
11283418978SMark Fasheh 			list_move(&emi->ei_list, &tmp_list);
11383418978SMark Fasheh 			BUG_ON(em->em_num_items == 0);
11483418978SMark Fasheh 			em->em_num_items--;
11583418978SMark Fasheh 			continue;
11683418978SMark Fasheh 		}
11783418978SMark Fasheh 
11883418978SMark Fasheh 		range = emi->ei_cpos + emi->ei_clusters;
11983418978SMark Fasheh 		if (range > cpos) {
12083418978SMark Fasheh 			/* Partial truncate */
12183418978SMark Fasheh 			emi->ei_clusters = cpos - emi->ei_cpos;
12283418978SMark Fasheh 		}
12383418978SMark Fasheh 	}
12483418978SMark Fasheh 	spin_unlock(&oi->ip_lock);
12583418978SMark Fasheh 
126800deef3SChristoph Hellwig 	list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
12783418978SMark Fasheh 		list_del(&emi->ei_list);
12883418978SMark Fasheh 		kfree(emi);
12983418978SMark Fasheh 	}
13083418978SMark Fasheh }
13183418978SMark Fasheh 
13283418978SMark Fasheh /*
13383418978SMark Fasheh  * Is any part of emi2 contained within emi1
13483418978SMark Fasheh  */
ocfs2_ei_is_contained(struct ocfs2_extent_map_item * emi1,struct ocfs2_extent_map_item * emi2)13583418978SMark Fasheh static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
13683418978SMark Fasheh 				 struct ocfs2_extent_map_item *emi2)
13783418978SMark Fasheh {
13883418978SMark Fasheh 	unsigned int range1, range2;
13983418978SMark Fasheh 
14083418978SMark Fasheh 	/*
14183418978SMark Fasheh 	 * Check if logical start of emi2 is inside emi1
14283418978SMark Fasheh 	 */
14383418978SMark Fasheh 	range1 = emi1->ei_cpos + emi1->ei_clusters;
14483418978SMark Fasheh 	if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
14583418978SMark Fasheh 		return 1;
14683418978SMark Fasheh 
14783418978SMark Fasheh 	/*
14883418978SMark Fasheh 	 * Check if logical end of emi2 is inside emi1
14983418978SMark Fasheh 	 */
15083418978SMark Fasheh 	range2 = emi2->ei_cpos + emi2->ei_clusters;
15183418978SMark Fasheh 	if (range2 > emi1->ei_cpos && range2 <= range1)
15283418978SMark Fasheh 		return 1;
15383418978SMark Fasheh 
15483418978SMark Fasheh 	return 0;
15583418978SMark Fasheh }
15683418978SMark Fasheh 
ocfs2_copy_emi_fields(struct ocfs2_extent_map_item * dest,struct ocfs2_extent_map_item * src)15783418978SMark Fasheh static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
15883418978SMark Fasheh 				  struct ocfs2_extent_map_item *src)
15983418978SMark Fasheh {
16083418978SMark Fasheh 	dest->ei_cpos = src->ei_cpos;
16183418978SMark Fasheh 	dest->ei_phys = src->ei_phys;
16283418978SMark Fasheh 	dest->ei_clusters = src->ei_clusters;
16383418978SMark Fasheh 	dest->ei_flags = src->ei_flags;
16483418978SMark Fasheh }
16583418978SMark Fasheh 
16683418978SMark Fasheh /*
16783418978SMark Fasheh  * Try to merge emi with ins. Returns 1 if merge succeeds, zero
16883418978SMark Fasheh  * otherwise.
16983418978SMark Fasheh  */
ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item * emi,struct ocfs2_extent_map_item * ins)17083418978SMark Fasheh static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
17183418978SMark Fasheh 					 struct ocfs2_extent_map_item *ins)
17283418978SMark Fasheh {
17383418978SMark Fasheh 	/*
17483418978SMark Fasheh 	 * Handle contiguousness
17583418978SMark Fasheh 	 */
17683418978SMark Fasheh 	if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
17783418978SMark Fasheh 	    ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
17883418978SMark Fasheh 	    ins->ei_flags == emi->ei_flags) {
17983418978SMark Fasheh 		emi->ei_clusters += ins->ei_clusters;
18083418978SMark Fasheh 		return 1;
18183418978SMark Fasheh 	} else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
182bd6b0bf8SRoel Kluin 		   (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
18383418978SMark Fasheh 		   ins->ei_flags == emi->ei_flags) {
18483418978SMark Fasheh 		emi->ei_phys = ins->ei_phys;
18583418978SMark Fasheh 		emi->ei_cpos = ins->ei_cpos;
18683418978SMark Fasheh 		emi->ei_clusters += ins->ei_clusters;
18783418978SMark Fasheh 		return 1;
18883418978SMark Fasheh 	}
18983418978SMark Fasheh 
19083418978SMark Fasheh 	/*
19183418978SMark Fasheh 	 * Overlapping extents - this shouldn't happen unless we've
19283418978SMark Fasheh 	 * split an extent to change it's flags. That is exceedingly
19383418978SMark Fasheh 	 * rare, so there's no sense in trying to optimize it yet.
19483418978SMark Fasheh 	 */
19583418978SMark Fasheh 	if (ocfs2_ei_is_contained(emi, ins) ||
19683418978SMark Fasheh 	    ocfs2_ei_is_contained(ins, emi)) {
19783418978SMark Fasheh 		ocfs2_copy_emi_fields(emi, ins);
19883418978SMark Fasheh 		return 1;
19983418978SMark Fasheh 	}
20083418978SMark Fasheh 
20183418978SMark Fasheh 	/* No merge was possible. */
20283418978SMark Fasheh 	return 0;
20383418978SMark Fasheh }
20483418978SMark Fasheh 
20583418978SMark Fasheh /*
20683418978SMark Fasheh  * In order to reduce complexity on the caller, this insert function
20783418978SMark Fasheh  * is intentionally liberal in what it will accept.
20883418978SMark Fasheh  *
20983418978SMark Fasheh  * The only rule is that the truncate call *must* be used whenever
21083418978SMark Fasheh  * records have been deleted. This avoids inserting overlapping
21183418978SMark Fasheh  * records with different physical mappings.
21283418978SMark Fasheh  */
ocfs2_extent_map_insert_rec(struct inode * inode,struct ocfs2_extent_rec * rec)21383418978SMark Fasheh void ocfs2_extent_map_insert_rec(struct inode *inode,
21483418978SMark Fasheh 				 struct ocfs2_extent_rec *rec)
21583418978SMark Fasheh {
21683418978SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
21783418978SMark Fasheh 	struct ocfs2_extent_map *em = &oi->ip_extent_map;
21883418978SMark Fasheh 	struct ocfs2_extent_map_item *emi, *new_emi = NULL;
21983418978SMark Fasheh 	struct ocfs2_extent_map_item ins;
22083418978SMark Fasheh 
22183418978SMark Fasheh 	ins.ei_cpos = le32_to_cpu(rec->e_cpos);
22283418978SMark Fasheh 	ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
22383418978SMark Fasheh 					       le64_to_cpu(rec->e_blkno));
22483418978SMark Fasheh 	ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
22583418978SMark Fasheh 	ins.ei_flags = rec->e_flags;
22683418978SMark Fasheh 
22783418978SMark Fasheh search:
22883418978SMark Fasheh 	spin_lock(&oi->ip_lock);
22983418978SMark Fasheh 
23083418978SMark Fasheh 	list_for_each_entry(emi, &em->em_list, ei_list) {
23183418978SMark Fasheh 		if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
23283418978SMark Fasheh 			list_move(&emi->ei_list, &em->em_list);
23383418978SMark Fasheh 			spin_unlock(&oi->ip_lock);
23483418978SMark Fasheh 			goto out;
23583418978SMark Fasheh 		}
23683418978SMark Fasheh 	}
23783418978SMark Fasheh 
23883418978SMark Fasheh 	/*
23983418978SMark Fasheh 	 * No item could be merged.
24083418978SMark Fasheh 	 *
24183418978SMark Fasheh 	 * Either allocate and add a new item, or overwrite the last recently
24283418978SMark Fasheh 	 * inserted.
24383418978SMark Fasheh 	 */
24483418978SMark Fasheh 
24583418978SMark Fasheh 	if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
24683418978SMark Fasheh 		if (new_emi == NULL) {
24783418978SMark Fasheh 			spin_unlock(&oi->ip_lock);
24883418978SMark Fasheh 
24983418978SMark Fasheh 			new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
25083418978SMark Fasheh 			if (new_emi == NULL)
25183418978SMark Fasheh 				goto out;
25283418978SMark Fasheh 
25383418978SMark Fasheh 			goto search;
25483418978SMark Fasheh 		}
25583418978SMark Fasheh 
25683418978SMark Fasheh 		ocfs2_copy_emi_fields(new_emi, &ins);
25783418978SMark Fasheh 		list_add(&new_emi->ei_list, &em->em_list);
25883418978SMark Fasheh 		em->em_num_items++;
25983418978SMark Fasheh 		new_emi = NULL;
26083418978SMark Fasheh 	} else {
26183418978SMark Fasheh 		BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
26283418978SMark Fasheh 		emi = list_entry(em->em_list.prev,
26383418978SMark Fasheh 				 struct ocfs2_extent_map_item, ei_list);
26483418978SMark Fasheh 		list_move(&emi->ei_list, &em->em_list);
26583418978SMark Fasheh 		ocfs2_copy_emi_fields(emi, &ins);
26683418978SMark Fasheh 	}
26783418978SMark Fasheh 
26883418978SMark Fasheh 	spin_unlock(&oi->ip_lock);
26983418978SMark Fasheh 
27083418978SMark Fasheh out:
27183418978SMark Fasheh 	kfree(new_emi);
27283418978SMark Fasheh }
27383418978SMark Fasheh 
ocfs2_last_eb_is_empty(struct inode * inode,struct ocfs2_dinode * di)27400dc417fSMark Fasheh static int ocfs2_last_eb_is_empty(struct inode *inode,
27500dc417fSMark Fasheh 				  struct ocfs2_dinode *di)
27600dc417fSMark Fasheh {
27700dc417fSMark Fasheh 	int ret, next_free;
27800dc417fSMark Fasheh 	u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
27900dc417fSMark Fasheh 	struct buffer_head *eb_bh = NULL;
28000dc417fSMark Fasheh 	struct ocfs2_extent_block *eb;
28100dc417fSMark Fasheh 	struct ocfs2_extent_list *el;
28200dc417fSMark Fasheh 
2833d03a305SJoel Becker 	ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
28400dc417fSMark Fasheh 	if (ret) {
28500dc417fSMark Fasheh 		mlog_errno(ret);
28600dc417fSMark Fasheh 		goto out;
28700dc417fSMark Fasheh 	}
28800dc417fSMark Fasheh 
28900dc417fSMark Fasheh 	eb = (struct ocfs2_extent_block *) eb_bh->b_data;
29000dc417fSMark Fasheh 	el = &eb->h_list;
29100dc417fSMark Fasheh 
29200dc417fSMark Fasheh 	if (el->l_tree_depth) {
29300dc417fSMark Fasheh 		ocfs2_error(inode->i_sb,
2947ecef14aSJoe Perches 			    "Inode %lu has non zero tree depth in leaf block %llu\n",
2957ecef14aSJoe Perches 			    inode->i_ino,
29600dc417fSMark Fasheh 			    (unsigned long long)eb_bh->b_blocknr);
29700dc417fSMark Fasheh 		ret = -EROFS;
29800dc417fSMark Fasheh 		goto out;
29900dc417fSMark Fasheh 	}
30000dc417fSMark Fasheh 
30100dc417fSMark Fasheh 	next_free = le16_to_cpu(el->l_next_free_rec);
30200dc417fSMark Fasheh 
30300dc417fSMark Fasheh 	if (next_free == 0 ||
30400dc417fSMark Fasheh 	    (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
30500dc417fSMark Fasheh 		ret = 1;
30600dc417fSMark Fasheh 
30700dc417fSMark Fasheh out:
30800dc417fSMark Fasheh 	brelse(eb_bh);
30900dc417fSMark Fasheh 	return ret;
31000dc417fSMark Fasheh }
31100dc417fSMark Fasheh 
31283418978SMark Fasheh /*
3134f902c37SMark Fasheh  * Return the 1st index within el which contains an extent start
3144f902c37SMark Fasheh  * larger than v_cluster.
3154f902c37SMark Fasheh  */
ocfs2_search_for_hole_index(struct ocfs2_extent_list * el,u32 v_cluster)3164f902c37SMark Fasheh static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
3174f902c37SMark Fasheh 				       u32 v_cluster)
3184f902c37SMark Fasheh {
3194f902c37SMark Fasheh 	int i;
3204f902c37SMark Fasheh 	struct ocfs2_extent_rec *rec;
3214f902c37SMark Fasheh 
3224f902c37SMark Fasheh 	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
3234f902c37SMark Fasheh 		rec = &el->l_recs[i];
3244f902c37SMark Fasheh 
3254f902c37SMark Fasheh 		if (v_cluster < le32_to_cpu(rec->e_cpos))
3264f902c37SMark Fasheh 			break;
3274f902c37SMark Fasheh 	}
3284f902c37SMark Fasheh 
3294f902c37SMark Fasheh 	return i;
3304f902c37SMark Fasheh }
3314f902c37SMark Fasheh 
3324f902c37SMark Fasheh /*
3334f902c37SMark Fasheh  * Figure out the size of a hole which starts at v_cluster within the given
3344f902c37SMark Fasheh  * extent list.
3354f902c37SMark Fasheh  *
3364f902c37SMark Fasheh  * If there is no more allocation past v_cluster, we return the maximum
3374f902c37SMark Fasheh  * cluster size minus v_cluster.
3384f902c37SMark Fasheh  *
3394f902c37SMark Fasheh  * If we have in-inode extents, then el points to the dinode list and
3404f902c37SMark Fasheh  * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
3414f902c37SMark Fasheh  * containing el.
3424f902c37SMark Fasheh  */
ocfs2_figure_hole_clusters(struct ocfs2_caching_info * ci,struct ocfs2_extent_list * el,struct buffer_head * eb_bh,u32 v_cluster,u32 * num_clusters)343e73a819dSTao Ma int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
3444f902c37SMark Fasheh 			       struct ocfs2_extent_list *el,
3454f902c37SMark Fasheh 			       struct buffer_head *eb_bh,
3464f902c37SMark Fasheh 			       u32 v_cluster,
3474f902c37SMark Fasheh 			       u32 *num_clusters)
3484f902c37SMark Fasheh {
3494f902c37SMark Fasheh 	int ret, i;
3504f902c37SMark Fasheh 	struct buffer_head *next_eb_bh = NULL;
3514f902c37SMark Fasheh 	struct ocfs2_extent_block *eb, *next_eb;
3524f902c37SMark Fasheh 
3534f902c37SMark Fasheh 	i = ocfs2_search_for_hole_index(el, v_cluster);
3544f902c37SMark Fasheh 
3554f902c37SMark Fasheh 	if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
3564f902c37SMark Fasheh 		eb = (struct ocfs2_extent_block *)eb_bh->b_data;
3574f902c37SMark Fasheh 
3584f902c37SMark Fasheh 		/*
3594f902c37SMark Fasheh 		 * Check the next leaf for any extents.
3604f902c37SMark Fasheh 		 */
3614f902c37SMark Fasheh 
3624f902c37SMark Fasheh 		if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
3634f902c37SMark Fasheh 			goto no_more_extents;
3644f902c37SMark Fasheh 
365e73a819dSTao Ma 		ret = ocfs2_read_extent_block(ci,
3664f902c37SMark Fasheh 					      le64_to_cpu(eb->h_next_leaf_blk),
3670fcaa56aSJoel Becker 					      &next_eb_bh);
3684f902c37SMark Fasheh 		if (ret) {
3694f902c37SMark Fasheh 			mlog_errno(ret);
3704f902c37SMark Fasheh 			goto out;
3714f902c37SMark Fasheh 		}
3725e96581aSJoel Becker 
3734f902c37SMark Fasheh 		next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
3744f902c37SMark Fasheh 		el = &next_eb->h_list;
3754f902c37SMark Fasheh 		i = ocfs2_search_for_hole_index(el, v_cluster);
3764f902c37SMark Fasheh 	}
3774f902c37SMark Fasheh 
3784f902c37SMark Fasheh no_more_extents:
3794f902c37SMark Fasheh 	if (i == le16_to_cpu(el->l_next_free_rec)) {
3804f902c37SMark Fasheh 		/*
3814f902c37SMark Fasheh 		 * We're at the end of our existing allocation. Just
3824f902c37SMark Fasheh 		 * return the maximum number of clusters we could
3834f902c37SMark Fasheh 		 * possibly allocate.
3844f902c37SMark Fasheh 		 */
3854f902c37SMark Fasheh 		*num_clusters = UINT_MAX - v_cluster;
3864f902c37SMark Fasheh 	} else {
3874f902c37SMark Fasheh 		*num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
3884f902c37SMark Fasheh 	}
3894f902c37SMark Fasheh 
3904f902c37SMark Fasheh 	ret = 0;
3914f902c37SMark Fasheh out:
3924f902c37SMark Fasheh 	brelse(next_eb_bh);
3934f902c37SMark Fasheh 	return ret;
3944f902c37SMark Fasheh }
3954f902c37SMark Fasheh 
ocfs2_get_clusters_nocache(struct inode * inode,struct buffer_head * di_bh,u32 v_cluster,unsigned int * hole_len,struct ocfs2_extent_rec * ret_rec,unsigned int * is_last)39600dc417fSMark Fasheh static int ocfs2_get_clusters_nocache(struct inode *inode,
39700dc417fSMark Fasheh 				      struct buffer_head *di_bh,
39800dc417fSMark Fasheh 				      u32 v_cluster, unsigned int *hole_len,
39900dc417fSMark Fasheh 				      struct ocfs2_extent_rec *ret_rec,
40000dc417fSMark Fasheh 				      unsigned int *is_last)
401ccd979bdSMark Fasheh {
40200dc417fSMark Fasheh 	int i, ret, tree_height, len;
403ccd979bdSMark Fasheh 	struct ocfs2_dinode *di;
4043f649ab7SKees Cook 	struct ocfs2_extent_block *eb;
405ccd979bdSMark Fasheh 	struct ocfs2_extent_list *el;
406ccd979bdSMark Fasheh 	struct ocfs2_extent_rec *rec;
40700dc417fSMark Fasheh 	struct buffer_head *eb_bh = NULL;
408ccd979bdSMark Fasheh 
40900dc417fSMark Fasheh 	memset(ret_rec, 0, sizeof(*ret_rec));
41000dc417fSMark Fasheh 	if (is_last)
41100dc417fSMark Fasheh 		*is_last = 0;
412363041a5SMark Fasheh 
413363041a5SMark Fasheh 	di = (struct ocfs2_dinode *) di_bh->b_data;
414363041a5SMark Fasheh 	el = &di->id2.i_list;
41500dc417fSMark Fasheh 	tree_height = le16_to_cpu(el->l_tree_depth);
416363041a5SMark Fasheh 
41700dc417fSMark Fasheh 	if (tree_height > 0) {
418facdb77fSJoel Becker 		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
419facdb77fSJoel Becker 				      &eb_bh);
420363041a5SMark Fasheh 		if (ret) {
421363041a5SMark Fasheh 			mlog_errno(ret);
422363041a5SMark Fasheh 			goto out;
423363041a5SMark Fasheh 		}
424363041a5SMark Fasheh 
425363041a5SMark Fasheh 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
426363041a5SMark Fasheh 		el = &eb->h_list;
427e48edee2SMark Fasheh 
428e48edee2SMark Fasheh 		if (el->l_tree_depth) {
429e48edee2SMark Fasheh 			ocfs2_error(inode->i_sb,
4307ecef14aSJoe Perches 				    "Inode %lu has non zero tree depth in leaf block %llu\n",
4317ecef14aSJoe Perches 				    inode->i_ino,
432e48edee2SMark Fasheh 				    (unsigned long long)eb_bh->b_blocknr);
433e48edee2SMark Fasheh 			ret = -EROFS;
434e48edee2SMark Fasheh 			goto out;
435e48edee2SMark Fasheh 		}
436363041a5SMark Fasheh 	}
437363041a5SMark Fasheh 
438363041a5SMark Fasheh 	i = ocfs2_search_extent_list(el, v_cluster);
439363041a5SMark Fasheh 	if (i == -1) {
440363041a5SMark Fasheh 		/*
44100dc417fSMark Fasheh 		 * Holes can be larger than the maximum size of an
4423ad2f3fbSDaniel Mack 		 * extent, so we return their lengths in a separate
44300dc417fSMark Fasheh 		 * field.
444363041a5SMark Fasheh 		 */
44500dc417fSMark Fasheh 		if (hole_len) {
446e73a819dSTao Ma 			ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
447e73a819dSTao Ma 							 el, eb_bh,
44800dc417fSMark Fasheh 							 v_cluster, &len);
4494f902c37SMark Fasheh 			if (ret) {
4504f902c37SMark Fasheh 				mlog_errno(ret);
4514f902c37SMark Fasheh 				goto out;
4524f902c37SMark Fasheh 			}
45300dc417fSMark Fasheh 
45400dc417fSMark Fasheh 			*hole_len = len;
4554f902c37SMark Fasheh 		}
45600dc417fSMark Fasheh 		goto out_hole;
45700dc417fSMark Fasheh 	}
45800dc417fSMark Fasheh 
459363041a5SMark Fasheh 	rec = &el->l_recs[i];
460363041a5SMark Fasheh 
461363041a5SMark Fasheh 	BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
462363041a5SMark Fasheh 
463363041a5SMark Fasheh 	if (!rec->e_blkno) {
4647ecef14aSJoe Perches 		ocfs2_error(inode->i_sb,
4657ecef14aSJoe Perches 			    "Inode %lu has bad extent record (%u, %u, 0)\n",
4667ecef14aSJoe Perches 			    inode->i_ino,
467363041a5SMark Fasheh 			    le32_to_cpu(rec->e_cpos),
468e48edee2SMark Fasheh 			    ocfs2_rec_clusters(el, rec));
469363041a5SMark Fasheh 		ret = -EROFS;
470363041a5SMark Fasheh 		goto out;
471363041a5SMark Fasheh 	}
472363041a5SMark Fasheh 
47300dc417fSMark Fasheh 	*ret_rec = *rec;
474363041a5SMark Fasheh 
47500dc417fSMark Fasheh 	/*
47600dc417fSMark Fasheh 	 * Checking for last extent is potentially expensive - we
47700dc417fSMark Fasheh 	 * might have to look at the next leaf over to see if it's
47800dc417fSMark Fasheh 	 * empty.
47900dc417fSMark Fasheh 	 *
48000dc417fSMark Fasheh 	 * The first two checks are to see whether the caller even
48100dc417fSMark Fasheh 	 * cares for this information, and if the extent is at least
48200dc417fSMark Fasheh 	 * the last in it's list.
48300dc417fSMark Fasheh 	 *
48400dc417fSMark Fasheh 	 * If those hold true, then the extent is last if any of the
48500dc417fSMark Fasheh 	 * additional conditions hold true:
48600dc417fSMark Fasheh 	 *  - Extent list is in-inode
48700dc417fSMark Fasheh 	 *  - Extent list is right-most
48800dc417fSMark Fasheh 	 *  - Extent list is 2nd to rightmost, with empty right-most
48900dc417fSMark Fasheh 	 */
49000dc417fSMark Fasheh 	if (is_last) {
49100dc417fSMark Fasheh 		if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
49200dc417fSMark Fasheh 			if (tree_height == 0)
49300dc417fSMark Fasheh 				*is_last = 1;
49400dc417fSMark Fasheh 			else if (eb->h_blkno == di->i_last_eb_blk)
49500dc417fSMark Fasheh 				*is_last = 1;
49600dc417fSMark Fasheh 			else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
49700dc417fSMark Fasheh 				ret = ocfs2_last_eb_is_empty(inode, di);
49800dc417fSMark Fasheh 				if (ret < 0) {
49900dc417fSMark Fasheh 					mlog_errno(ret);
50000dc417fSMark Fasheh 					goto out;
50100dc417fSMark Fasheh 				}
50200dc417fSMark Fasheh 				if (ret == 1)
50300dc417fSMark Fasheh 					*is_last = 1;
50400dc417fSMark Fasheh 			}
50500dc417fSMark Fasheh 		}
50600dc417fSMark Fasheh 	}
50700dc417fSMark Fasheh 
50800dc417fSMark Fasheh out_hole:
50900dc417fSMark Fasheh 	ret = 0;
51000dc417fSMark Fasheh out:
51100dc417fSMark Fasheh 	brelse(eb_bh);
51200dc417fSMark Fasheh 	return ret;
51300dc417fSMark Fasheh }
51400dc417fSMark Fasheh 
ocfs2_relative_extent_offsets(struct super_block * sb,u32 v_cluster,struct ocfs2_extent_rec * rec,u32 * p_cluster,u32 * num_clusters)51500dc417fSMark Fasheh static void ocfs2_relative_extent_offsets(struct super_block *sb,
51600dc417fSMark Fasheh 					  u32 v_cluster,
51700dc417fSMark Fasheh 					  struct ocfs2_extent_rec *rec,
51800dc417fSMark Fasheh 					  u32 *p_cluster, u32 *num_clusters)
51900dc417fSMark Fasheh 
52000dc417fSMark Fasheh {
52100dc417fSMark Fasheh 	u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
52200dc417fSMark Fasheh 
52300dc417fSMark Fasheh 	*p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
524363041a5SMark Fasheh 	*p_cluster = *p_cluster + coff;
525363041a5SMark Fasheh 
526363041a5SMark Fasheh 	if (num_clusters)
52700dc417fSMark Fasheh 		*num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
52800dc417fSMark Fasheh }
52949cb8d2dSMark Fasheh 
ocfs2_xattr_get_clusters(struct inode * inode,u32 v_cluster,u32 * p_cluster,u32 * num_clusters,struct ocfs2_extent_list * el,unsigned int * extent_flags)530f56654c4STao Ma int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
531f56654c4STao Ma 			     u32 *p_cluster, u32 *num_clusters,
5321061f9c1STao Ma 			     struct ocfs2_extent_list *el,
5331061f9c1STao Ma 			     unsigned int *extent_flags)
534f56654c4STao Ma {
535f56654c4STao Ma 	int ret = 0, i;
536f56654c4STao Ma 	struct buffer_head *eb_bh = NULL;
537f56654c4STao Ma 	struct ocfs2_extent_block *eb;
538f56654c4STao Ma 	struct ocfs2_extent_rec *rec;
539f56654c4STao Ma 	u32 coff;
540f56654c4STao Ma 
541f56654c4STao Ma 	if (el->l_tree_depth) {
542facdb77fSJoel Becker 		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
543facdb77fSJoel Becker 				      &eb_bh);
544f56654c4STao Ma 		if (ret) {
545f56654c4STao Ma 			mlog_errno(ret);
546f56654c4STao Ma 			goto out;
547f56654c4STao Ma 		}
548f56654c4STao Ma 
549f56654c4STao Ma 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
550f56654c4STao Ma 		el = &eb->h_list;
551f56654c4STao Ma 
552f56654c4STao Ma 		if (el->l_tree_depth) {
553f56654c4STao Ma 			ocfs2_error(inode->i_sb,
5547ecef14aSJoe Perches 				    "Inode %lu has non zero tree depth in xattr leaf block %llu\n",
5557ecef14aSJoe Perches 				    inode->i_ino,
556f56654c4STao Ma 				    (unsigned long long)eb_bh->b_blocknr);
557f56654c4STao Ma 			ret = -EROFS;
558f56654c4STao Ma 			goto out;
559f56654c4STao Ma 		}
560f56654c4STao Ma 	}
561f56654c4STao Ma 
562f56654c4STao Ma 	i = ocfs2_search_extent_list(el, v_cluster);
563f56654c4STao Ma 	if (i == -1) {
564f56654c4STao Ma 		ret = -EROFS;
565f56654c4STao Ma 		mlog_errno(ret);
566f56654c4STao Ma 		goto out;
567f56654c4STao Ma 	} else {
568f56654c4STao Ma 		rec = &el->l_recs[i];
569f56654c4STao Ma 		BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
570f56654c4STao Ma 
571f56654c4STao Ma 		if (!rec->e_blkno) {
5727ecef14aSJoe Perches 			ocfs2_error(inode->i_sb,
5737ecef14aSJoe Perches 				    "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
5747ecef14aSJoe Perches 				    inode->i_ino,
575f56654c4STao Ma 				    le32_to_cpu(rec->e_cpos),
576f56654c4STao Ma 				    ocfs2_rec_clusters(el, rec));
577f56654c4STao Ma 			ret = -EROFS;
578f56654c4STao Ma 			goto out;
579f56654c4STao Ma 		}
580f56654c4STao Ma 		coff = v_cluster - le32_to_cpu(rec->e_cpos);
581f56654c4STao Ma 		*p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
582f56654c4STao Ma 						    le64_to_cpu(rec->e_blkno));
583f56654c4STao Ma 		*p_cluster = *p_cluster + coff;
584f56654c4STao Ma 		if (num_clusters)
585f56654c4STao Ma 			*num_clusters = ocfs2_rec_clusters(el, rec) - coff;
5861061f9c1STao Ma 
5871061f9c1STao Ma 		if (extent_flags)
5881061f9c1STao Ma 			*extent_flags = rec->e_flags;
589f56654c4STao Ma 	}
590f56654c4STao Ma out:
591f56654c4STao Ma 	brelse(eb_bh);
592f56654c4STao Ma 	return ret;
593f56654c4STao Ma }
594f56654c4STao Ma 
ocfs2_get_clusters(struct inode * inode,u32 v_cluster,u32 * p_cluster,u32 * num_clusters,unsigned int * extent_flags)59500dc417fSMark Fasheh int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
59600dc417fSMark Fasheh 		       u32 *p_cluster, u32 *num_clusters,
59700dc417fSMark Fasheh 		       unsigned int *extent_flags)
59800dc417fSMark Fasheh {
59900dc417fSMark Fasheh 	int ret;
6003f649ab7SKees Cook 	unsigned int hole_len, flags = 0;
60100dc417fSMark Fasheh 	struct buffer_head *di_bh = NULL;
60200dc417fSMark Fasheh 	struct ocfs2_extent_rec rec;
60383418978SMark Fasheh 
60400dc417fSMark Fasheh 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
60500dc417fSMark Fasheh 		ret = -ERANGE;
60600dc417fSMark Fasheh 		mlog_errno(ret);
60700dc417fSMark Fasheh 		goto out;
60800dc417fSMark Fasheh 	}
60900dc417fSMark Fasheh 
61000dc417fSMark Fasheh 	ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
61100dc417fSMark Fasheh 				      num_clusters, extent_flags);
61200dc417fSMark Fasheh 	if (ret == 0)
61300dc417fSMark Fasheh 		goto out;
61400dc417fSMark Fasheh 
615b657c95cSJoel Becker 	ret = ocfs2_read_inode_block(inode, &di_bh);
61600dc417fSMark Fasheh 	if (ret) {
61700dc417fSMark Fasheh 		mlog_errno(ret);
61800dc417fSMark Fasheh 		goto out;
61900dc417fSMark Fasheh 	}
62000dc417fSMark Fasheh 
62100dc417fSMark Fasheh 	ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
62200dc417fSMark Fasheh 					 &rec, NULL);
62300dc417fSMark Fasheh 	if (ret) {
62400dc417fSMark Fasheh 		mlog_errno(ret);
62500dc417fSMark Fasheh 		goto out;
62600dc417fSMark Fasheh 	}
62700dc417fSMark Fasheh 
62800dc417fSMark Fasheh 	if (rec.e_blkno == 0ULL) {
62900dc417fSMark Fasheh 		/*
63000dc417fSMark Fasheh 		 * A hole was found. Return some canned values that
63100dc417fSMark Fasheh 		 * callers can key on. If asked for, num_clusters will
63200dc417fSMark Fasheh 		 * be populated with the size of the hole.
63300dc417fSMark Fasheh 		 */
63400dc417fSMark Fasheh 		*p_cluster = 0;
63500dc417fSMark Fasheh 		if (num_clusters) {
63600dc417fSMark Fasheh 			*num_clusters = hole_len;
63700dc417fSMark Fasheh 		}
63800dc417fSMark Fasheh 	} else {
63900dc417fSMark Fasheh 		ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
64000dc417fSMark Fasheh 					      p_cluster, num_clusters);
64100dc417fSMark Fasheh 		flags = rec.e_flags;
64200dc417fSMark Fasheh 
64300dc417fSMark Fasheh 		ocfs2_extent_map_insert_rec(inode, &rec);
644363041a5SMark Fasheh 	}
645363041a5SMark Fasheh 
64649cb8d2dSMark Fasheh 	if (extent_flags)
64749cb8d2dSMark Fasheh 		*extent_flags = flags;
64849cb8d2dSMark Fasheh 
649363041a5SMark Fasheh out:
650363041a5SMark Fasheh 	brelse(di_bh);
651363041a5SMark Fasheh 	return ret;
652363041a5SMark Fasheh }
653363041a5SMark Fasheh 
654363041a5SMark Fasheh /*
655363041a5SMark Fasheh  * This expects alloc_sem to be held. The allocation cannot change at
656363041a5SMark Fasheh  * all while the map is in the process of being updated.
657363041a5SMark Fasheh  */
ocfs2_extent_map_get_blocks(struct inode * inode,u64 v_blkno,u64 * p_blkno,u64 * ret_count,unsigned int * extent_flags)658363041a5SMark Fasheh int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
6594f902c37SMark Fasheh 				u64 *ret_count, unsigned int *extent_flags)
660363041a5SMark Fasheh {
661363041a5SMark Fasheh 	int ret;
662363041a5SMark Fasheh 	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
663363041a5SMark Fasheh 	u32 cpos, num_clusters, p_cluster;
664363041a5SMark Fasheh 	u64 boff = 0;
665ccd979bdSMark Fasheh 
666ccd979bdSMark Fasheh 	cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
667ccd979bdSMark Fasheh 
66849cb8d2dSMark Fasheh 	ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
66949cb8d2dSMark Fasheh 				 extent_flags);
670ccd979bdSMark Fasheh 	if (ret) {
671ccd979bdSMark Fasheh 		mlog_errno(ret);
672363041a5SMark Fasheh 		goto out;
673ccd979bdSMark Fasheh 	}
674ccd979bdSMark Fasheh 
675363041a5SMark Fasheh 	/*
676363041a5SMark Fasheh 	 * p_cluster == 0 indicates a hole.
677363041a5SMark Fasheh 	 */
678363041a5SMark Fasheh 	if (p_cluster) {
679363041a5SMark Fasheh 		boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
680ccd979bdSMark Fasheh 		boff += (v_blkno & (u64)(bpc - 1));
681363041a5SMark Fasheh 	}
682363041a5SMark Fasheh 
683363041a5SMark Fasheh 	*p_blkno = boff;
684ccd979bdSMark Fasheh 
685ccd979bdSMark Fasheh 	if (ret_count) {
686363041a5SMark Fasheh 		*ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
687363041a5SMark Fasheh 		*ret_count -= v_blkno & (u64)(bpc - 1);
688ccd979bdSMark Fasheh 	}
689ccd979bdSMark Fasheh 
690363041a5SMark Fasheh out:
691363041a5SMark Fasheh 	return ret;
692ccd979bdSMark Fasheh }
69300dc417fSMark Fasheh 
69486239d59STristan Ye /*
69586239d59STristan Ye  * The ocfs2_fiemap_inline() may be a little bit misleading, since
69686239d59STristan Ye  * it not only handles the fiemap for inlined files, but also deals
69786239d59STristan Ye  * with the fast symlink, cause they have no difference for extent
69886239d59STristan Ye  * mapping per se.
69986239d59STristan Ye  */
ocfs2_fiemap_inline(struct inode * inode,struct buffer_head * di_bh,struct fiemap_extent_info * fieinfo,u64 map_start)70000dc417fSMark Fasheh static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
70100dc417fSMark Fasheh 			       struct fiemap_extent_info *fieinfo,
70200dc417fSMark Fasheh 			       u64 map_start)
70300dc417fSMark Fasheh {
70400dc417fSMark Fasheh 	int ret;
70500dc417fSMark Fasheh 	unsigned int id_count;
70600dc417fSMark Fasheh 	struct ocfs2_dinode *di;
70700dc417fSMark Fasheh 	u64 phys;
70800dc417fSMark Fasheh 	u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
70900dc417fSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
71000dc417fSMark Fasheh 
71100dc417fSMark Fasheh 	di = (struct ocfs2_dinode *)di_bh->b_data;
71286239d59STristan Ye 	if (ocfs2_inode_is_fast_symlink(inode))
71386239d59STristan Ye 		id_count = ocfs2_fast_symlink_chars(inode->i_sb);
71486239d59STristan Ye 	else
71500dc417fSMark Fasheh 		id_count = le16_to_cpu(di->id2.i_data.id_count);
71600dc417fSMark Fasheh 
71700dc417fSMark Fasheh 	if (map_start < id_count) {
71800dc417fSMark Fasheh 		phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
71986239d59STristan Ye 		if (ocfs2_inode_is_fast_symlink(inode))
72086239d59STristan Ye 			phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
72186239d59STristan Ye 		else
72286239d59STristan Ye 			phys += offsetof(struct ocfs2_dinode,
72386239d59STristan Ye 					 id2.i_data.id_data);
72400dc417fSMark Fasheh 
72500dc417fSMark Fasheh 		ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
72600dc417fSMark Fasheh 					      flags);
72700dc417fSMark Fasheh 		if (ret < 0)
72800dc417fSMark Fasheh 			return ret;
72900dc417fSMark Fasheh 	}
73000dc417fSMark Fasheh 
73100dc417fSMark Fasheh 	return 0;
73200dc417fSMark Fasheh }
73300dc417fSMark Fasheh 
ocfs2_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo,u64 map_start,u64 map_len)73400dc417fSMark Fasheh int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
73500dc417fSMark Fasheh 		 u64 map_start, u64 map_len)
73600dc417fSMark Fasheh {
73700dc417fSMark Fasheh 	int ret, is_last;
73800dc417fSMark Fasheh 	u32 mapping_end, cpos;
73900dc417fSMark Fasheh 	unsigned int hole_size;
74000dc417fSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
74100dc417fSMark Fasheh 	u64 len_bytes, phys_bytes, virt_bytes;
74200dc417fSMark Fasheh 	struct buffer_head *di_bh = NULL;
74300dc417fSMark Fasheh 	struct ocfs2_extent_rec rec;
74400dc417fSMark Fasheh 
74545dd052eSChristoph Hellwig 	ret = fiemap_prep(inode, fieinfo, map_start, &map_len, 0);
74600dc417fSMark Fasheh 	if (ret)
74700dc417fSMark Fasheh 		return ret;
74800dc417fSMark Fasheh 
74900dc417fSMark Fasheh 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
75000dc417fSMark Fasheh 	if (ret) {
75100dc417fSMark Fasheh 		mlog_errno(ret);
75200dc417fSMark Fasheh 		goto out;
75300dc417fSMark Fasheh 	}
75400dc417fSMark Fasheh 
75500dc417fSMark Fasheh 	down_read(&OCFS2_I(inode)->ip_alloc_sem);
75600dc417fSMark Fasheh 
75700dc417fSMark Fasheh 	/*
75886239d59STristan Ye 	 * Handle inline-data and fast symlink separately.
75900dc417fSMark Fasheh 	 */
76086239d59STristan Ye 	if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
76186239d59STristan Ye 	    ocfs2_inode_is_fast_symlink(inode)) {
76200dc417fSMark Fasheh 		ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
76300dc417fSMark Fasheh 		goto out_unlock;
76400dc417fSMark Fasheh 	}
76500dc417fSMark Fasheh 
76600dc417fSMark Fasheh 	cpos = map_start >> osb->s_clustersize_bits;
76700dc417fSMark Fasheh 	mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
76800dc417fSMark Fasheh 					       map_start + map_len);
76900dc417fSMark Fasheh 	is_last = 0;
77000dc417fSMark Fasheh 	while (cpos < mapping_end && !is_last) {
77100dc417fSMark Fasheh 		u32 fe_flags;
77200dc417fSMark Fasheh 
77300dc417fSMark Fasheh 		ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
77400dc417fSMark Fasheh 						 &hole_size, &rec, &is_last);
77500dc417fSMark Fasheh 		if (ret) {
77600dc417fSMark Fasheh 			mlog_errno(ret);
777b4ca2b4bSJoseph Qi 			goto out_unlock;
77800dc417fSMark Fasheh 		}
77900dc417fSMark Fasheh 
78000dc417fSMark Fasheh 		if (rec.e_blkno == 0ULL) {
78100dc417fSMark Fasheh 			cpos += hole_size;
78200dc417fSMark Fasheh 			continue;
78300dc417fSMark Fasheh 		}
78400dc417fSMark Fasheh 
78500dc417fSMark Fasheh 		fe_flags = 0;
78600dc417fSMark Fasheh 		if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
78700dc417fSMark Fasheh 			fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
788faf8b70fSSunil Mushran 		if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
789faf8b70fSSunil Mushran 			fe_flags |= FIEMAP_EXTENT_SHARED;
79000dc417fSMark Fasheh 		if (is_last)
79100dc417fSMark Fasheh 			fe_flags |= FIEMAP_EXTENT_LAST;
79200dc417fSMark Fasheh 		len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
79300dc417fSMark Fasheh 		phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
79400dc417fSMark Fasheh 		virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
79500dc417fSMark Fasheh 
79600dc417fSMark Fasheh 		ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
79700dc417fSMark Fasheh 					      len_bytes, fe_flags);
79800dc417fSMark Fasheh 		if (ret)
79900dc417fSMark Fasheh 			break;
80000dc417fSMark Fasheh 
80100dc417fSMark Fasheh 		cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
80200dc417fSMark Fasheh 	}
80300dc417fSMark Fasheh 
80400dc417fSMark Fasheh 	if (ret > 0)
80500dc417fSMark Fasheh 		ret = 0;
80600dc417fSMark Fasheh 
80700dc417fSMark Fasheh out_unlock:
80800dc417fSMark Fasheh 	brelse(di_bh);
80900dc417fSMark Fasheh 
81000dc417fSMark Fasheh 	up_read(&OCFS2_I(inode)->ip_alloc_sem);
81100dc417fSMark Fasheh 
81200dc417fSMark Fasheh 	ocfs2_inode_unlock(inode, 0);
81300dc417fSMark Fasheh out:
81400dc417fSMark Fasheh 
81500dc417fSMark Fasheh 	return ret;
81600dc417fSMark Fasheh }
817a8549fb5SJoel Becker 
818ac604d3cSGang He /* Is IO overwriting allocated blocks? */
ocfs2_overwrite_io(struct inode * inode,struct buffer_head * di_bh,u64 map_start,u64 map_len)819ac604d3cSGang He int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
820ac604d3cSGang He 		       u64 map_start, u64 map_len)
821ac604d3cSGang He {
822ac604d3cSGang He 	int ret = 0, is_last;
823ac604d3cSGang He 	u32 mapping_end, cpos;
824ac604d3cSGang He 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
825ac604d3cSGang He 	struct ocfs2_extent_rec rec;
826ac604d3cSGang He 
827ac604d3cSGang He 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
828ac604d3cSGang He 		if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
829ac604d3cSGang He 			return ret;
830ac604d3cSGang He 		else
831ac604d3cSGang He 			return -EAGAIN;
832ac604d3cSGang He 	}
833ac604d3cSGang He 
834ac604d3cSGang He 	cpos = map_start >> osb->s_clustersize_bits;
835ac604d3cSGang He 	mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
836ac604d3cSGang He 					       map_start + map_len);
837ac604d3cSGang He 	is_last = 0;
838ac604d3cSGang He 	while (cpos < mapping_end && !is_last) {
839ac604d3cSGang He 		ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
840ac604d3cSGang He 						 NULL, &rec, &is_last);
841ac604d3cSGang He 		if (ret) {
842ac604d3cSGang He 			mlog_errno(ret);
843ac604d3cSGang He 			goto out;
844ac604d3cSGang He 		}
845ac604d3cSGang He 
846ac604d3cSGang He 		if (rec.e_blkno == 0ULL)
847ac604d3cSGang He 			break;
848ac604d3cSGang He 
849ac604d3cSGang He 		if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
850ac604d3cSGang He 			break;
851ac604d3cSGang He 
852ac604d3cSGang He 		cpos = le32_to_cpu(rec.e_cpos) +
853ac604d3cSGang He 			le16_to_cpu(rec.e_leaf_clusters);
854ac604d3cSGang He 	}
855ac604d3cSGang He 
856ac604d3cSGang He 	if (cpos < mapping_end)
857ac604d3cSGang He 		ret = -EAGAIN;
858ac604d3cSGang He out:
859ac604d3cSGang He 	return ret;
860ac604d3cSGang He }
861ac604d3cSGang He 
ocfs2_seek_data_hole_offset(struct file * file,loff_t * offset,int whence)862965c8e59SAndrew Morton int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
86393862d5eSSunil Mushran {
86493862d5eSSunil Mushran 	struct inode *inode = file->f_mapping->host;
86593862d5eSSunil Mushran 	int ret;
86693862d5eSSunil Mushran 	unsigned int is_last = 0, is_data = 0;
86793862d5eSSunil Mushran 	u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
86893862d5eSSunil Mushran 	u32 cpos, cend, clen, hole_size;
86993862d5eSSunil Mushran 	u64 extoff, extlen;
87093862d5eSSunil Mushran 	struct buffer_head *di_bh = NULL;
87193862d5eSSunil Mushran 	struct ocfs2_extent_rec rec;
87293862d5eSSunil Mushran 
873965c8e59SAndrew Morton 	BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE);
87493862d5eSSunil Mushran 
87593862d5eSSunil Mushran 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
87693862d5eSSunil Mushran 	if (ret) {
87793862d5eSSunil Mushran 		mlog_errno(ret);
87893862d5eSSunil Mushran 		goto out;
87993862d5eSSunil Mushran 	}
88093862d5eSSunil Mushran 
88193862d5eSSunil Mushran 	down_read(&OCFS2_I(inode)->ip_alloc_sem);
88293862d5eSSunil Mushran 
883f17c20ddSJunxiao Bi 	if (*offset >= i_size_read(inode)) {
88493862d5eSSunil Mushran 		ret = -ENXIO;
88593862d5eSSunil Mushran 		goto out_unlock;
88693862d5eSSunil Mushran 	}
88793862d5eSSunil Mushran 
88893862d5eSSunil Mushran 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
889965c8e59SAndrew Morton 		if (whence == SEEK_HOLE)
890f17c20ddSJunxiao Bi 			*offset = i_size_read(inode);
89193862d5eSSunil Mushran 		goto out_unlock;
89293862d5eSSunil Mushran 	}
89393862d5eSSunil Mushran 
89493862d5eSSunil Mushran 	clen = 0;
89593862d5eSSunil Mushran 	cpos = *offset >> cs_bits;
896f17c20ddSJunxiao Bi 	cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
89793862d5eSSunil Mushran 
89893862d5eSSunil Mushran 	while (cpos < cend && !is_last) {
89993862d5eSSunil Mushran 		ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
90093862d5eSSunil Mushran 						 &rec, &is_last);
90193862d5eSSunil Mushran 		if (ret) {
90293862d5eSSunil Mushran 			mlog_errno(ret);
90393862d5eSSunil Mushran 			goto out_unlock;
90493862d5eSSunil Mushran 		}
90593862d5eSSunil Mushran 
90693862d5eSSunil Mushran 		extoff = cpos;
90793862d5eSSunil Mushran 		extoff <<= cs_bits;
90893862d5eSSunil Mushran 
90993862d5eSSunil Mushran 		if (rec.e_blkno == 0ULL) {
91093862d5eSSunil Mushran 			clen = hole_size;
91193862d5eSSunil Mushran 			is_data = 0;
91293862d5eSSunil Mushran 		} else {
91393862d5eSSunil Mushran 			clen = le16_to_cpu(rec.e_leaf_clusters) -
91493862d5eSSunil Mushran 				(cpos - le32_to_cpu(rec.e_cpos));
91593862d5eSSunil Mushran 			is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
91693862d5eSSunil Mushran 		}
91793862d5eSSunil Mushran 
918965c8e59SAndrew Morton 		if ((!is_data && whence == SEEK_HOLE) ||
919965c8e59SAndrew Morton 		    (is_data && whence == SEEK_DATA)) {
92093862d5eSSunil Mushran 			if (extoff > *offset)
92193862d5eSSunil Mushran 				*offset = extoff;
92293862d5eSSunil Mushran 			goto out_unlock;
92393862d5eSSunil Mushran 		}
92493862d5eSSunil Mushran 
92593862d5eSSunil Mushran 		if (!is_last)
92693862d5eSSunil Mushran 			cpos += clen;
92793862d5eSSunil Mushran 	}
92893862d5eSSunil Mushran 
929965c8e59SAndrew Morton 	if (whence == SEEK_HOLE) {
93093862d5eSSunil Mushran 		extoff = cpos;
93193862d5eSSunil Mushran 		extoff <<= cs_bits;
93293862d5eSSunil Mushran 		extlen = clen;
93393862d5eSSunil Mushran 		extlen <<=  cs_bits;
93493862d5eSSunil Mushran 
935f17c20ddSJunxiao Bi 		if ((extoff + extlen) > i_size_read(inode))
936f17c20ddSJunxiao Bi 			extlen = i_size_read(inode) - extoff;
93793862d5eSSunil Mushran 		extoff += extlen;
93893862d5eSSunil Mushran 		if (extoff > *offset)
93993862d5eSSunil Mushran 			*offset = extoff;
94093862d5eSSunil Mushran 		goto out_unlock;
94193862d5eSSunil Mushran 	}
94293862d5eSSunil Mushran 
94393862d5eSSunil Mushran 	ret = -ENXIO;
94493862d5eSSunil Mushran 
94593862d5eSSunil Mushran out_unlock:
94693862d5eSSunil Mushran 
94793862d5eSSunil Mushran 	brelse(di_bh);
94893862d5eSSunil Mushran 
94993862d5eSSunil Mushran 	up_read(&OCFS2_I(inode)->ip_alloc_sem);
95093862d5eSSunil Mushran 
95193862d5eSSunil Mushran 	ocfs2_inode_unlock(inode, 0);
95293862d5eSSunil Mushran out:
95393862d5eSSunil Mushran 	return ret;
95493862d5eSSunil Mushran }
95593862d5eSSunil Mushran 
ocfs2_read_virt_blocks(struct inode * inode,u64 v_block,int nr,struct buffer_head * bhs[],int flags,int (* validate)(struct super_block * sb,struct buffer_head * bh))956a8549fb5SJoel Becker int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
957a8549fb5SJoel Becker 			   struct buffer_head *bhs[], int flags,
958a8549fb5SJoel Becker 			   int (*validate)(struct super_block *sb,
959a8549fb5SJoel Becker 					   struct buffer_head *bh))
960a8549fb5SJoel Becker {
961a8549fb5SJoel Becker 	int rc = 0;
962a8549fb5SJoel Becker 	u64 p_block, p_count;
963a8549fb5SJoel Becker 	int i, count, done = 0;
964a8549fb5SJoel Becker 
965a716357cSTao Ma 	trace_ocfs2_read_virt_blocks(
966a8549fb5SJoel Becker 	     inode, (unsigned long long)v_block, nr, bhs, flags,
967a8549fb5SJoel Becker 	     validate);
968a8549fb5SJoel Becker 
969a8549fb5SJoel Becker 	if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
970a8549fb5SJoel Becker 	    i_size_read(inode)) {
971a8549fb5SJoel Becker 		BUG_ON(!(flags & OCFS2_BH_READAHEAD));
972a8549fb5SJoel Becker 		goto out;
973a8549fb5SJoel Becker 	}
974a8549fb5SJoel Becker 
975a8549fb5SJoel Becker 	while (done < nr) {
976a8549fb5SJoel Becker 		down_read(&OCFS2_I(inode)->ip_alloc_sem);
977a8549fb5SJoel Becker 		rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
978a8549fb5SJoel Becker 						 &p_block, &p_count, NULL);
979a8549fb5SJoel Becker 		up_read(&OCFS2_I(inode)->ip_alloc_sem);
980a8549fb5SJoel Becker 		if (rc) {
981a8549fb5SJoel Becker 			mlog_errno(rc);
982a8549fb5SJoel Becker 			break;
983a8549fb5SJoel Becker 		}
984a8549fb5SJoel Becker 
985a8549fb5SJoel Becker 		if (!p_block) {
986a8549fb5SJoel Becker 			rc = -EIO;
987a8549fb5SJoel Becker 			mlog(ML_ERROR,
988a8549fb5SJoel Becker 			     "Inode #%llu contains a hole at offset %llu\n",
989a8549fb5SJoel Becker 			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
990a8549fb5SJoel Becker 			     (unsigned long long)(v_block + done) <<
991a8549fb5SJoel Becker 			     inode->i_sb->s_blocksize_bits);
992a8549fb5SJoel Becker 			break;
993a8549fb5SJoel Becker 		}
994a8549fb5SJoel Becker 
995a8549fb5SJoel Becker 		count = nr - done;
996a8549fb5SJoel Becker 		if (p_count < count)
997a8549fb5SJoel Becker 			count = p_count;
998a8549fb5SJoel Becker 
999a8549fb5SJoel Becker 		/*
1000a8549fb5SJoel Becker 		 * If the caller passed us bhs, they should have come
1001a8549fb5SJoel Becker 		 * from a previous readahead call to this function.  Thus,
1002a8549fb5SJoel Becker 		 * they should have the right b_blocknr.
1003a8549fb5SJoel Becker 		 */
1004a8549fb5SJoel Becker 		for (i = 0; i < count; i++) {
1005a8549fb5SJoel Becker 			if (!bhs[done + i])
1006a8549fb5SJoel Becker 				continue;
1007a8549fb5SJoel Becker 			BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
1008a8549fb5SJoel Becker 		}
1009a8549fb5SJoel Becker 
10108cb471e8SJoel Becker 		rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
10118cb471e8SJoel Becker 				       bhs + done, flags, validate);
1012a8549fb5SJoel Becker 		if (rc) {
1013a8549fb5SJoel Becker 			mlog_errno(rc);
1014a8549fb5SJoel Becker 			break;
1015a8549fb5SJoel Becker 		}
1016a8549fb5SJoel Becker 		done += count;
1017a8549fb5SJoel Becker 	}
1018a8549fb5SJoel Becker 
1019a8549fb5SJoel Becker out:
1020a8549fb5SJoel Becker 	return rc;
1021a8549fb5SJoel Becker }
1022a8549fb5SJoel Becker 
1023a8549fb5SJoel Becker 
1024