xref: /openbmc/linux/net/ceph/striper.c (revision 0898782247ae533d1f4e47a06bc5d4870931b284)
1ed0811d2SIlya Dryomov /* SPDX-License-Identifier: GPL-2.0 */
2ed0811d2SIlya Dryomov 
3ed0811d2SIlya Dryomov #include <linux/ceph/ceph_debug.h>
4ed0811d2SIlya Dryomov 
5ed0811d2SIlya Dryomov #include <linux/math64.h>
6ed0811d2SIlya Dryomov #include <linux/slab.h>
7ed0811d2SIlya Dryomov 
8ed0811d2SIlya Dryomov #include <linux/ceph/striper.h>
9ed0811d2SIlya Dryomov #include <linux/ceph/types.h>
10ed0811d2SIlya Dryomov 
11ed0811d2SIlya Dryomov /*
1208c1ac50SIlya Dryomov  * Map a file extent to a stripe unit within an object.
1308c1ac50SIlya Dryomov  * Fill in objno, offset into object, and object extent length (i.e. the
1408c1ac50SIlya Dryomov  * number of bytes mapped, less than or equal to @l->stripe_unit).
1508c1ac50SIlya Dryomov  *
1608c1ac50SIlya Dryomov  * Example for stripe_count = 3, stripes_per_object = 4:
1708c1ac50SIlya Dryomov  *
1808c1ac50SIlya Dryomov  * blockno   |  0  3  6  9 |  1  4  7 10 |  2  5  8 11 | 12 15 18 21 | 13 16 19
1908c1ac50SIlya Dryomov  * stripeno  |  0  1  2  3 |  0  1  2  3 |  0  1  2  3 |  4  5  6  7 |  4  5  6
2008c1ac50SIlya Dryomov  * stripepos |      0      |      1      |      2      |      0      |      1
2108c1ac50SIlya Dryomov  * objno     |      0      |      1      |      2      |      3      |      4
2208c1ac50SIlya Dryomov  * objsetno  |                    0                    |                    1
2308c1ac50SIlya Dryomov  */
ceph_calc_file_object_mapping(struct ceph_file_layout * l,u64 off,u64 len,u64 * objno,u64 * objoff,u32 * xlen)2408c1ac50SIlya Dryomov void ceph_calc_file_object_mapping(struct ceph_file_layout *l,
2508c1ac50SIlya Dryomov 				   u64 off, u64 len,
2608c1ac50SIlya Dryomov 				   u64 *objno, u64 *objoff, u32 *xlen)
2708c1ac50SIlya Dryomov {
2808c1ac50SIlya Dryomov 	u32 stripes_per_object = l->object_size / l->stripe_unit;
2908c1ac50SIlya Dryomov 	u64 blockno;	/* which su in the file (i.e. globally) */
3008c1ac50SIlya Dryomov 	u32 blockoff;	/* offset into su */
3108c1ac50SIlya Dryomov 	u64 stripeno;	/* which stripe */
3208c1ac50SIlya Dryomov 	u32 stripepos;	/* which su in the stripe,
3308c1ac50SIlya Dryomov 			   which object in the object set */
3408c1ac50SIlya Dryomov 	u64 objsetno;	/* which object set */
3508c1ac50SIlya Dryomov 	u32 objsetpos;	/* which stripe in the object set */
3608c1ac50SIlya Dryomov 
3708c1ac50SIlya Dryomov 	blockno = div_u64_rem(off, l->stripe_unit, &blockoff);
3808c1ac50SIlya Dryomov 	stripeno = div_u64_rem(blockno, l->stripe_count, &stripepos);
3908c1ac50SIlya Dryomov 	objsetno = div_u64_rem(stripeno, stripes_per_object, &objsetpos);
4008c1ac50SIlya Dryomov 
4108c1ac50SIlya Dryomov 	*objno = objsetno * l->stripe_count + stripepos;
4208c1ac50SIlya Dryomov 	*objoff = objsetpos * l->stripe_unit + blockoff;
4308c1ac50SIlya Dryomov 	*xlen = min_t(u64, len, l->stripe_unit - blockoff);
4408c1ac50SIlya Dryomov }
4508c1ac50SIlya Dryomov EXPORT_SYMBOL(ceph_calc_file_object_mapping);
4608c1ac50SIlya Dryomov 
4708c1ac50SIlya Dryomov /*
48ed0811d2SIlya Dryomov  * Return the last extent with given objno (@object_extents is sorted
49ed0811d2SIlya Dryomov  * by objno).  If not found, return NULL and set @add_pos so that the
50ed0811d2SIlya Dryomov  * new extent can be added with list_add(add_pos, new_ex).
51ed0811d2SIlya Dryomov  */
52ed0811d2SIlya Dryomov static struct ceph_object_extent *
lookup_last(struct list_head * object_extents,u64 objno,struct list_head ** add_pos)53ed0811d2SIlya Dryomov lookup_last(struct list_head *object_extents, u64 objno,
54ed0811d2SIlya Dryomov 	    struct list_head **add_pos)
55ed0811d2SIlya Dryomov {
56ed0811d2SIlya Dryomov 	struct list_head *pos;
57ed0811d2SIlya Dryomov 
58ed0811d2SIlya Dryomov 	list_for_each_prev(pos, object_extents) {
59ed0811d2SIlya Dryomov 		struct ceph_object_extent *ex =
60ed0811d2SIlya Dryomov 		    list_entry(pos, typeof(*ex), oe_item);
61ed0811d2SIlya Dryomov 
62ed0811d2SIlya Dryomov 		if (ex->oe_objno == objno)
63ed0811d2SIlya Dryomov 			return ex;
64ed0811d2SIlya Dryomov 
65ed0811d2SIlya Dryomov 		if (ex->oe_objno < objno)
66ed0811d2SIlya Dryomov 			break;
67ed0811d2SIlya Dryomov 	}
68ed0811d2SIlya Dryomov 
69ed0811d2SIlya Dryomov 	*add_pos = pos;
70ed0811d2SIlya Dryomov 	return NULL;
71ed0811d2SIlya Dryomov }
72ed0811d2SIlya Dryomov 
73ed0811d2SIlya Dryomov static struct ceph_object_extent *
lookup_containing(struct list_head * object_extents,u64 objno,u64 objoff,u32 xlen)74ed0811d2SIlya Dryomov lookup_containing(struct list_head *object_extents, u64 objno,
75ed0811d2SIlya Dryomov 		  u64 objoff, u32 xlen)
76ed0811d2SIlya Dryomov {
77ed0811d2SIlya Dryomov 	struct ceph_object_extent *ex;
78ed0811d2SIlya Dryomov 
79ed0811d2SIlya Dryomov 	list_for_each_entry(ex, object_extents, oe_item) {
80ed0811d2SIlya Dryomov 		if (ex->oe_objno == objno &&
81ed0811d2SIlya Dryomov 		    ex->oe_off <= objoff &&
82ed0811d2SIlya Dryomov 		    ex->oe_off + ex->oe_len >= objoff + xlen) /* paranoia */
83ed0811d2SIlya Dryomov 			return ex;
84ed0811d2SIlya Dryomov 
85ed0811d2SIlya Dryomov 		if (ex->oe_objno > objno)
86ed0811d2SIlya Dryomov 			break;
87ed0811d2SIlya Dryomov 	}
88ed0811d2SIlya Dryomov 
89ed0811d2SIlya Dryomov 	return NULL;
90ed0811d2SIlya Dryomov }
91ed0811d2SIlya Dryomov 
92ed0811d2SIlya Dryomov /*
93ed0811d2SIlya Dryomov  * Map a file extent to a sorted list of object extents.
94ed0811d2SIlya Dryomov  *
95ed0811d2SIlya Dryomov  * We want only one (or as few as possible) object extents per object.
96ed0811d2SIlya Dryomov  * Adjacent object extents will be merged together, each returned object
97ed0811d2SIlya Dryomov  * extent may reverse map to multiple different file extents.
98ed0811d2SIlya Dryomov  *
99ed0811d2SIlya Dryomov  * Call @alloc_fn for each new object extent and @action_fn for each
100ed0811d2SIlya Dryomov  * mapped stripe unit, whether it was merged into an already allocated
101ed0811d2SIlya Dryomov  * object extent or started a new object extent.
102ed0811d2SIlya Dryomov  *
103ed0811d2SIlya Dryomov  * Newly allocated object extents are added to @object_extents.
104ed0811d2SIlya Dryomov  * To keep @object_extents sorted, successive calls to this function
105ed0811d2SIlya Dryomov  * must map successive file extents (i.e. the list of file extents that
106ed0811d2SIlya Dryomov  * are mapped using the same @object_extents must be sorted).
107ed0811d2SIlya Dryomov  *
108ed0811d2SIlya Dryomov  * The caller is responsible for @object_extents.
109ed0811d2SIlya Dryomov  */
ceph_file_to_extents(struct ceph_file_layout * l,u64 off,u64 len,struct list_head * object_extents,struct ceph_object_extent * alloc_fn (void * arg),void * alloc_arg,ceph_object_extent_fn_t action_fn,void * action_arg)110ed0811d2SIlya Dryomov int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
111ed0811d2SIlya Dryomov 			 struct list_head *object_extents,
112ed0811d2SIlya Dryomov 			 struct ceph_object_extent *alloc_fn(void *arg),
113ed0811d2SIlya Dryomov 			 void *alloc_arg,
114ed0811d2SIlya Dryomov 			 ceph_object_extent_fn_t action_fn,
115ed0811d2SIlya Dryomov 			 void *action_arg)
116ed0811d2SIlya Dryomov {
117ed0811d2SIlya Dryomov 	struct ceph_object_extent *last_ex, *ex;
118ed0811d2SIlya Dryomov 
119ed0811d2SIlya Dryomov 	while (len) {
120ed0811d2SIlya Dryomov 		struct list_head *add_pos = NULL;
121ed0811d2SIlya Dryomov 		u64 objno, objoff;
122ed0811d2SIlya Dryomov 		u32 xlen;
123ed0811d2SIlya Dryomov 
124ed0811d2SIlya Dryomov 		ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
125ed0811d2SIlya Dryomov 					      &xlen);
126ed0811d2SIlya Dryomov 
127ed0811d2SIlya Dryomov 		last_ex = lookup_last(object_extents, objno, &add_pos);
128ed0811d2SIlya Dryomov 		if (!last_ex || last_ex->oe_off + last_ex->oe_len != objoff) {
129ed0811d2SIlya Dryomov 			ex = alloc_fn(alloc_arg);
130ed0811d2SIlya Dryomov 			if (!ex)
131ed0811d2SIlya Dryomov 				return -ENOMEM;
132ed0811d2SIlya Dryomov 
133ed0811d2SIlya Dryomov 			ex->oe_objno = objno;
134ed0811d2SIlya Dryomov 			ex->oe_off = objoff;
135ed0811d2SIlya Dryomov 			ex->oe_len = xlen;
136ed0811d2SIlya Dryomov 			if (action_fn)
137ed0811d2SIlya Dryomov 				action_fn(ex, xlen, action_arg);
138ed0811d2SIlya Dryomov 
139ed0811d2SIlya Dryomov 			if (!last_ex)
140ed0811d2SIlya Dryomov 				list_add(&ex->oe_item, add_pos);
141ed0811d2SIlya Dryomov 			else
142ed0811d2SIlya Dryomov 				list_add(&ex->oe_item, &last_ex->oe_item);
143ed0811d2SIlya Dryomov 		} else {
144ed0811d2SIlya Dryomov 			last_ex->oe_len += xlen;
145ed0811d2SIlya Dryomov 			if (action_fn)
146ed0811d2SIlya Dryomov 				action_fn(last_ex, xlen, action_arg);
147ed0811d2SIlya Dryomov 		}
148ed0811d2SIlya Dryomov 
149ed0811d2SIlya Dryomov 		off += xlen;
150ed0811d2SIlya Dryomov 		len -= xlen;
151ed0811d2SIlya Dryomov 	}
152ed0811d2SIlya Dryomov 
153ed0811d2SIlya Dryomov 	for (last_ex = list_first_entry(object_extents, typeof(*ex), oe_item),
154ed0811d2SIlya Dryomov 	     ex = list_next_entry(last_ex, oe_item);
155ed0811d2SIlya Dryomov 	     &ex->oe_item != object_extents;
156ed0811d2SIlya Dryomov 	     last_ex = ex, ex = list_next_entry(ex, oe_item)) {
157ed0811d2SIlya Dryomov 		if (last_ex->oe_objno > ex->oe_objno ||
158ed0811d2SIlya Dryomov 		    (last_ex->oe_objno == ex->oe_objno &&
159ed0811d2SIlya Dryomov 		     last_ex->oe_off + last_ex->oe_len >= ex->oe_off)) {
160ed0811d2SIlya Dryomov 			WARN(1, "%s: object_extents list not sorted!\n",
161ed0811d2SIlya Dryomov 			     __func__);
162ed0811d2SIlya Dryomov 			return -EINVAL;
163ed0811d2SIlya Dryomov 		}
164ed0811d2SIlya Dryomov 	}
165ed0811d2SIlya Dryomov 
166ed0811d2SIlya Dryomov 	return 0;
167ed0811d2SIlya Dryomov }
168ed0811d2SIlya Dryomov EXPORT_SYMBOL(ceph_file_to_extents);
169ed0811d2SIlya Dryomov 
170ed0811d2SIlya Dryomov /*
171ed0811d2SIlya Dryomov  * A stripped down, non-allocating version of ceph_file_to_extents(),
172ed0811d2SIlya Dryomov  * for when @object_extents is already populated.
173ed0811d2SIlya Dryomov  */
ceph_iterate_extents(struct ceph_file_layout * l,u64 off,u64 len,struct list_head * object_extents,ceph_object_extent_fn_t action_fn,void * action_arg)174ed0811d2SIlya Dryomov int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
175ed0811d2SIlya Dryomov 			 struct list_head *object_extents,
176ed0811d2SIlya Dryomov 			 ceph_object_extent_fn_t action_fn,
177ed0811d2SIlya Dryomov 			 void *action_arg)
178ed0811d2SIlya Dryomov {
179ed0811d2SIlya Dryomov 	while (len) {
180ed0811d2SIlya Dryomov 		struct ceph_object_extent *ex;
181ed0811d2SIlya Dryomov 		u64 objno, objoff;
182ed0811d2SIlya Dryomov 		u32 xlen;
183ed0811d2SIlya Dryomov 
184ed0811d2SIlya Dryomov 		ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
185ed0811d2SIlya Dryomov 					      &xlen);
186ed0811d2SIlya Dryomov 
187ed0811d2SIlya Dryomov 		ex = lookup_containing(object_extents, objno, objoff, xlen);
188ed0811d2SIlya Dryomov 		if (!ex) {
189ed0811d2SIlya Dryomov 			WARN(1, "%s: objno %llu %llu~%u not found!\n",
190ed0811d2SIlya Dryomov 			     __func__, objno, objoff, xlen);
191ed0811d2SIlya Dryomov 			return -EINVAL;
192ed0811d2SIlya Dryomov 		}
193ed0811d2SIlya Dryomov 
194ed0811d2SIlya Dryomov 		action_fn(ex, xlen, action_arg);
195ed0811d2SIlya Dryomov 
196ed0811d2SIlya Dryomov 		off += xlen;
197ed0811d2SIlya Dryomov 		len -= xlen;
198ed0811d2SIlya Dryomov 	}
199ed0811d2SIlya Dryomov 
200ed0811d2SIlya Dryomov 	return 0;
201ed0811d2SIlya Dryomov }
202ed0811d2SIlya Dryomov EXPORT_SYMBOL(ceph_iterate_extents);
203ed0811d2SIlya Dryomov 
204ed0811d2SIlya Dryomov /*
205ed0811d2SIlya Dryomov  * Reverse map an object extent to a sorted list of file extents.
206ed0811d2SIlya Dryomov  *
207ed0811d2SIlya Dryomov  * On success, the caller is responsible for:
208ed0811d2SIlya Dryomov  *
209ed0811d2SIlya Dryomov  *     kfree(file_extents)
210ed0811d2SIlya Dryomov  */
ceph_extent_to_file(struct ceph_file_layout * l,u64 objno,u64 objoff,u64 objlen,struct ceph_file_extent ** file_extents,u32 * num_file_extents)211ed0811d2SIlya Dryomov int ceph_extent_to_file(struct ceph_file_layout *l,
212ed0811d2SIlya Dryomov 			u64 objno, u64 objoff, u64 objlen,
213ed0811d2SIlya Dryomov 			struct ceph_file_extent **file_extents,
214ed0811d2SIlya Dryomov 			u32 *num_file_extents)
215ed0811d2SIlya Dryomov {
216ed0811d2SIlya Dryomov 	u32 stripes_per_object = l->object_size / l->stripe_unit;
217ed0811d2SIlya Dryomov 	u64 blockno;	/* which su */
218ed0811d2SIlya Dryomov 	u32 blockoff;	/* offset into su */
219ed0811d2SIlya Dryomov 	u64 stripeno;	/* which stripe */
220ed0811d2SIlya Dryomov 	u32 stripepos;	/* which su in the stripe,
221ed0811d2SIlya Dryomov 			   which object in the object set */
222ed0811d2SIlya Dryomov 	u64 objsetno;	/* which object set */
223ed0811d2SIlya Dryomov 	u32 i = 0;
224ed0811d2SIlya Dryomov 
225ed0811d2SIlya Dryomov 	if (!objlen) {
226ed0811d2SIlya Dryomov 		*file_extents = NULL;
227ed0811d2SIlya Dryomov 		*num_file_extents = 0;
228ed0811d2SIlya Dryomov 		return 0;
229ed0811d2SIlya Dryomov 	}
230ed0811d2SIlya Dryomov 
231ed0811d2SIlya Dryomov 	*num_file_extents = DIV_ROUND_UP_ULL(objoff + objlen, l->stripe_unit) -
232ed0811d2SIlya Dryomov 				     DIV_ROUND_DOWN_ULL(objoff, l->stripe_unit);
233ed0811d2SIlya Dryomov 	*file_extents = kmalloc_array(*num_file_extents, sizeof(**file_extents),
234ed0811d2SIlya Dryomov 				      GFP_NOIO);
235ed0811d2SIlya Dryomov 	if (!*file_extents)
236ed0811d2SIlya Dryomov 		return -ENOMEM;
237ed0811d2SIlya Dryomov 
238ed0811d2SIlya Dryomov 	div_u64_rem(objoff, l->stripe_unit, &blockoff);
239ed0811d2SIlya Dryomov 	while (objlen) {
240ed0811d2SIlya Dryomov 		u64 off, len;
241ed0811d2SIlya Dryomov 
242ed0811d2SIlya Dryomov 		objsetno = div_u64_rem(objno, l->stripe_count, &stripepos);
243ed0811d2SIlya Dryomov 		stripeno = div_u64(objoff, l->stripe_unit) +
244ed0811d2SIlya Dryomov 						objsetno * stripes_per_object;
245ed0811d2SIlya Dryomov 		blockno = stripeno * l->stripe_count + stripepos;
246ed0811d2SIlya Dryomov 		off = blockno * l->stripe_unit + blockoff;
247ed0811d2SIlya Dryomov 		len = min_t(u64, objlen, l->stripe_unit - blockoff);
248ed0811d2SIlya Dryomov 
249ed0811d2SIlya Dryomov 		(*file_extents)[i].fe_off = off;
250ed0811d2SIlya Dryomov 		(*file_extents)[i].fe_len = len;
251ed0811d2SIlya Dryomov 
252ed0811d2SIlya Dryomov 		blockoff = 0;
253ed0811d2SIlya Dryomov 		objoff += len;
254ed0811d2SIlya Dryomov 		objlen -= len;
255ed0811d2SIlya Dryomov 		i++;
256ed0811d2SIlya Dryomov 	}
257ed0811d2SIlya Dryomov 
258ed0811d2SIlya Dryomov 	BUG_ON(i != *num_file_extents);
259ed0811d2SIlya Dryomov 	return 0;
260ed0811d2SIlya Dryomov }
261ed0811d2SIlya Dryomov EXPORT_SYMBOL(ceph_extent_to_file);
262*22e8bd51SIlya Dryomov 
ceph_get_num_objects(struct ceph_file_layout * l,u64 size)263*22e8bd51SIlya Dryomov u64 ceph_get_num_objects(struct ceph_file_layout *l, u64 size)
264*22e8bd51SIlya Dryomov {
265*22e8bd51SIlya Dryomov 	u64 period = (u64)l->stripe_count * l->object_size;
266*22e8bd51SIlya Dryomov 	u64 num_periods = DIV64_U64_ROUND_UP(size, period);
267*22e8bd51SIlya Dryomov 	u64 remainder_bytes;
268*22e8bd51SIlya Dryomov 	u64 remainder_objs = 0;
269*22e8bd51SIlya Dryomov 
270*22e8bd51SIlya Dryomov 	div64_u64_rem(size, period, &remainder_bytes);
271*22e8bd51SIlya Dryomov 	if (remainder_bytes > 0 &&
272*22e8bd51SIlya Dryomov 	    remainder_bytes < (u64)l->stripe_count * l->stripe_unit)
273*22e8bd51SIlya Dryomov 		remainder_objs = l->stripe_count -
274*22e8bd51SIlya Dryomov 			    DIV_ROUND_UP_ULL(remainder_bytes, l->stripe_unit);
275*22e8bd51SIlya Dryomov 
276*22e8bd51SIlya Dryomov 	return num_periods * l->stripe_count - remainder_objs;
277*22e8bd51SIlya Dryomov }
278*22e8bd51SIlya Dryomov EXPORT_SYMBOL(ceph_get_num_objects);
279