1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5  */
6 
7 #include "rxe.h"
8 
9 #define RXE_POOL_TIMEOUT	(200)
10 #define RXE_POOL_ALIGN		(16)
11 
12 static const struct rxe_type_info {
13 	const char *name;
14 	size_t size;
15 	size_t elem_offset;
16 	void (*cleanup)(struct rxe_pool_elem *elem);
17 	u32 min_index;
18 	u32 max_index;
19 	u32 max_elem;
20 } rxe_type_info[RXE_NUM_TYPES] = {
21 	[RXE_TYPE_UC] = {
22 		.name		= "uc",
23 		.size		= sizeof(struct rxe_ucontext),
24 		.elem_offset	= offsetof(struct rxe_ucontext, elem),
25 		.min_index	= 1,
26 		.max_index	= UINT_MAX,
27 		.max_elem	= UINT_MAX,
28 	},
29 	[RXE_TYPE_PD] = {
30 		.name		= "pd",
31 		.size		= sizeof(struct rxe_pd),
32 		.elem_offset	= offsetof(struct rxe_pd, elem),
33 		.min_index	= 1,
34 		.max_index	= UINT_MAX,
35 		.max_elem	= UINT_MAX,
36 	},
37 	[RXE_TYPE_AH] = {
38 		.name		= "ah",
39 		.size		= sizeof(struct rxe_ah),
40 		.elem_offset	= offsetof(struct rxe_ah, elem),
41 		.min_index	= RXE_MIN_AH_INDEX,
42 		.max_index	= RXE_MAX_AH_INDEX,
43 		.max_elem	= RXE_MAX_AH_INDEX - RXE_MIN_AH_INDEX + 1,
44 	},
45 	[RXE_TYPE_SRQ] = {
46 		.name		= "srq",
47 		.size		= sizeof(struct rxe_srq),
48 		.elem_offset	= offsetof(struct rxe_srq, elem),
49 		.cleanup	= rxe_srq_cleanup,
50 		.min_index	= RXE_MIN_SRQ_INDEX,
51 		.max_index	= RXE_MAX_SRQ_INDEX,
52 		.max_elem	= RXE_MAX_SRQ_INDEX - RXE_MIN_SRQ_INDEX + 1,
53 	},
54 	[RXE_TYPE_QP] = {
55 		.name		= "qp",
56 		.size		= sizeof(struct rxe_qp),
57 		.elem_offset	= offsetof(struct rxe_qp, elem),
58 		.cleanup	= rxe_qp_cleanup,
59 		.min_index	= RXE_MIN_QP_INDEX,
60 		.max_index	= RXE_MAX_QP_INDEX,
61 		.max_elem	= RXE_MAX_QP_INDEX - RXE_MIN_QP_INDEX + 1,
62 	},
63 	[RXE_TYPE_CQ] = {
64 		.name		= "cq",
65 		.size		= sizeof(struct rxe_cq),
66 		.elem_offset	= offsetof(struct rxe_cq, elem),
67 		.cleanup	= rxe_cq_cleanup,
68 		.min_index	= 1,
69 		.max_index	= UINT_MAX,
70 		.max_elem	= UINT_MAX,
71 	},
72 	[RXE_TYPE_MR] = {
73 		.name		= "mr",
74 		.size		= sizeof(struct rxe_mr),
75 		.elem_offset	= offsetof(struct rxe_mr, elem),
76 		.cleanup	= rxe_mr_cleanup,
77 		.min_index	= RXE_MIN_MR_INDEX,
78 		.max_index	= RXE_MAX_MR_INDEX,
79 		.max_elem	= RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX + 1,
80 	},
81 	[RXE_TYPE_MW] = {
82 		.name		= "mw",
83 		.size		= sizeof(struct rxe_mw),
84 		.elem_offset	= offsetof(struct rxe_mw, elem),
85 		.cleanup	= rxe_mw_cleanup,
86 		.min_index	= RXE_MIN_MW_INDEX,
87 		.max_index	= RXE_MAX_MW_INDEX,
88 		.max_elem	= RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX + 1,
89 	},
90 };
91 
92 void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
93 		   enum rxe_elem_type type)
94 {
95 	const struct rxe_type_info *info = &rxe_type_info[type];
96 
97 	memset(pool, 0, sizeof(*pool));
98 
99 	pool->rxe		= rxe;
100 	pool->name		= info->name;
101 	pool->type		= type;
102 	pool->max_elem		= info->max_elem;
103 	pool->elem_size		= ALIGN(info->size, RXE_POOL_ALIGN);
104 	pool->elem_offset	= info->elem_offset;
105 	pool->cleanup		= info->cleanup;
106 
107 	atomic_set(&pool->num_elem, 0);
108 
109 	xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
110 	pool->limit.min = info->min_index;
111 	pool->limit.max = info->max_index;
112 }
113 
114 void rxe_pool_cleanup(struct rxe_pool *pool)
115 {
116 	WARN_ON(!xa_empty(&pool->xa));
117 }
118 
119 void *rxe_alloc(struct rxe_pool *pool)
120 {
121 	struct rxe_pool_elem *elem;
122 	void *obj;
123 	int err;
124 
125 	if (WARN_ON(!(pool->type == RXE_TYPE_MR)))
126 		return NULL;
127 
128 	if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
129 		goto err_cnt;
130 
131 	obj = kzalloc(pool->elem_size, GFP_KERNEL);
132 	if (!obj)
133 		goto err_cnt;
134 
135 	elem = (struct rxe_pool_elem *)((u8 *)obj + pool->elem_offset);
136 
137 	elem->pool = pool;
138 	elem->obj = obj;
139 	kref_init(&elem->ref_cnt);
140 	init_completion(&elem->complete);
141 
142 	/* allocate index in array but leave pointer as NULL so it
143 	 * can't be looked up until rxe_finalize() is called
144 	 */
145 	err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit,
146 			      &pool->next, GFP_KERNEL);
147 	if (err < 0)
148 		goto err_free;
149 
150 	return obj;
151 
152 err_free:
153 	kfree(obj);
154 err_cnt:
155 	atomic_dec(&pool->num_elem);
156 	return NULL;
157 }
158 
159 int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem,
160 				bool sleepable)
161 {
162 	int err;
163 	gfp_t gfp_flags;
164 
165 	if (WARN_ON(pool->type == RXE_TYPE_MR))
166 		return -EINVAL;
167 
168 	if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
169 		goto err_cnt;
170 
171 	elem->pool = pool;
172 	elem->obj = (u8 *)elem - pool->elem_offset;
173 	kref_init(&elem->ref_cnt);
174 	init_completion(&elem->complete);
175 
176 	/* AH objects are unique in that the create_ah verb
177 	 * can be called in atomic context. If the create_ah
178 	 * call is not sleepable use GFP_ATOMIC.
179 	 */
180 	gfp_flags = sleepable ? GFP_KERNEL : GFP_ATOMIC;
181 
182 	if (sleepable)
183 		might_sleep();
184 	err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit,
185 			      &pool->next, gfp_flags);
186 	if (err < 0)
187 		goto err_cnt;
188 
189 	return 0;
190 
191 err_cnt:
192 	atomic_dec(&pool->num_elem);
193 	return -EINVAL;
194 }
195 
196 void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
197 {
198 	struct rxe_pool_elem *elem;
199 	struct xarray *xa = &pool->xa;
200 	void *obj;
201 
202 	rcu_read_lock();
203 	elem = xa_load(xa, index);
204 	if (elem && kref_get_unless_zero(&elem->ref_cnt))
205 		obj = elem->obj;
206 	else
207 		obj = NULL;
208 	rcu_read_unlock();
209 
210 	return obj;
211 }
212 
213 static void rxe_elem_release(struct kref *kref)
214 {
215 	struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt);
216 
217 	complete(&elem->complete);
218 }
219 
220 int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable)
221 {
222 	struct rxe_pool *pool = elem->pool;
223 	struct xarray *xa = &pool->xa;
224 	static int timeout = RXE_POOL_TIMEOUT;
225 	int ret, err = 0;
226 	void *xa_ret;
227 
228 	if (sleepable)
229 		might_sleep();
230 
231 	/* erase xarray entry to prevent looking up
232 	 * the pool elem from its index
233 	 */
234 	xa_ret = xa_erase(xa, elem->index);
235 	WARN_ON(xa_err(xa_ret));
236 
237 	/* if this is the last call to rxe_put complete the
238 	 * object. It is safe to touch obj->elem after this since
239 	 * it is freed below
240 	 */
241 	__rxe_put(elem);
242 
243 	/* wait until all references to the object have been
244 	 * dropped before final object specific cleanup and
245 	 * return to rdma-core
246 	 */
247 	if (sleepable) {
248 		if (!completion_done(&elem->complete) && timeout) {
249 			ret = wait_for_completion_timeout(&elem->complete,
250 					timeout);
251 
252 			/* Shouldn't happen. There are still references to
253 			 * the object but, rather than deadlock, free the
254 			 * object or pass back to rdma-core.
255 			 */
256 			if (WARN_ON(!ret))
257 				err = -EINVAL;
258 		}
259 	} else {
260 		unsigned long until = jiffies + timeout;
261 
262 		/* AH objects are unique in that the destroy_ah verb
263 		 * can be called in atomic context. This delay
264 		 * replaces the wait_for_completion call above
265 		 * when the destroy_ah call is not sleepable
266 		 */
267 		while (!completion_done(&elem->complete) &&
268 				time_before(jiffies, until))
269 			mdelay(1);
270 
271 		if (WARN_ON(!completion_done(&elem->complete)))
272 			err = -EINVAL;
273 	}
274 
275 	if (pool->cleanup)
276 		pool->cleanup(elem);
277 
278 	if (pool->type == RXE_TYPE_MR)
279 		kfree_rcu(elem->obj);
280 
281 	atomic_dec(&pool->num_elem);
282 
283 	return err;
284 }
285 
286 int __rxe_get(struct rxe_pool_elem *elem)
287 {
288 	return kref_get_unless_zero(&elem->ref_cnt);
289 }
290 
291 int __rxe_put(struct rxe_pool_elem *elem)
292 {
293 	return kref_put(&elem->ref_cnt, rxe_elem_release);
294 }
295 
296 void __rxe_finalize(struct rxe_pool_elem *elem)
297 {
298 	void *xa_ret;
299 
300 	xa_ret = xa_store(&elem->pool->xa, elem->index, elem, GFP_KERNEL);
301 	WARN_ON(xa_err(xa_ret));
302 }
303