1 /*
2  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *	   Redistribution and use in source and binary forms, with or
12  *	   without modification, are permitted provided that the following
13  *	   conditions are met:
14  *
15  *		- Redistributions of source code must retain the above
16  *		  copyright notice, this list of conditions and the following
17  *		  disclaimer.
18  *
19  *		- Redistributions in binary form must reproduce the above
20  *		  copyright notice, this list of conditions and the following
21  *		  disclaimer in the documentation and/or other materials
22  *		  provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include "rxe.h"
35 #include "rxe_loc.h"
36 
37 /* info about object pools
38  * note that mr and mw share a single index space
39  * so that one can map an lkey to the correct type of object
40  */
41 struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
42 	[RXE_TYPE_UC] = {
43 		.name		= "rxe-uc",
44 		.size		= sizeof(struct rxe_ucontext),
45 		.flags          = RXE_POOL_NO_ALLOC,
46 	},
47 	[RXE_TYPE_PD] = {
48 		.name		= "rxe-pd",
49 		.size		= sizeof(struct rxe_pd),
50 		.flags		= RXE_POOL_NO_ALLOC,
51 	},
52 	[RXE_TYPE_AH] = {
53 		.name		= "rxe-ah",
54 		.size		= sizeof(struct rxe_ah),
55 		.flags		= RXE_POOL_ATOMIC,
56 	},
57 	[RXE_TYPE_SRQ] = {
58 		.name		= "rxe-srq",
59 		.size		= sizeof(struct rxe_srq),
60 		.flags		= RXE_POOL_INDEX,
61 		.min_index	= RXE_MIN_SRQ_INDEX,
62 		.max_index	= RXE_MAX_SRQ_INDEX,
63 	},
64 	[RXE_TYPE_QP] = {
65 		.name		= "rxe-qp",
66 		.size		= sizeof(struct rxe_qp),
67 		.cleanup	= rxe_qp_cleanup,
68 		.flags		= RXE_POOL_INDEX,
69 		.min_index	= RXE_MIN_QP_INDEX,
70 		.max_index	= RXE_MAX_QP_INDEX,
71 	},
72 	[RXE_TYPE_CQ] = {
73 		.name		= "rxe-cq",
74 		.size		= sizeof(struct rxe_cq),
75 		.cleanup	= rxe_cq_cleanup,
76 	},
77 	[RXE_TYPE_MR] = {
78 		.name		= "rxe-mr",
79 		.size		= sizeof(struct rxe_mem),
80 		.cleanup	= rxe_mem_cleanup,
81 		.flags		= RXE_POOL_INDEX,
82 		.max_index	= RXE_MAX_MR_INDEX,
83 		.min_index	= RXE_MIN_MR_INDEX,
84 	},
85 	[RXE_TYPE_MW] = {
86 		.name		= "rxe-mw",
87 		.size		= sizeof(struct rxe_mem),
88 		.flags		= RXE_POOL_INDEX,
89 		.max_index	= RXE_MAX_MW_INDEX,
90 		.min_index	= RXE_MIN_MW_INDEX,
91 	},
92 	[RXE_TYPE_MC_GRP] = {
93 		.name		= "rxe-mc_grp",
94 		.size		= sizeof(struct rxe_mc_grp),
95 		.cleanup	= rxe_mc_cleanup,
96 		.flags		= RXE_POOL_KEY,
97 		.key_offset	= offsetof(struct rxe_mc_grp, mgid),
98 		.key_size	= sizeof(union ib_gid),
99 	},
100 	[RXE_TYPE_MC_ELEM] = {
101 		.name		= "rxe-mc_elem",
102 		.size		= sizeof(struct rxe_mc_elem),
103 		.flags		= RXE_POOL_ATOMIC,
104 	},
105 };
106 
107 static inline const char *pool_name(struct rxe_pool *pool)
108 {
109 	return rxe_type_info[pool->type].name;
110 }
111 
112 static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)
113 {
114 	return rxe_type_info[pool->type].cache;
115 }
116 
117 static void rxe_cache_clean(size_t cnt)
118 {
119 	int i;
120 	struct rxe_type_info *type;
121 
122 	for (i = 0; i < cnt; i++) {
123 		type = &rxe_type_info[i];
124 		if (!(type->flags & RXE_POOL_NO_ALLOC)) {
125 			kmem_cache_destroy(type->cache);
126 			type->cache = NULL;
127 		}
128 	}
129 }
130 
131 int rxe_cache_init(void)
132 {
133 	int err;
134 	int i;
135 	size_t size;
136 	struct rxe_type_info *type;
137 
138 	for (i = 0; i < RXE_NUM_TYPES; i++) {
139 		type = &rxe_type_info[i];
140 		size = ALIGN(type->size, RXE_POOL_ALIGN);
141 		if (!(type->flags & RXE_POOL_NO_ALLOC)) {
142 			type->cache =
143 				kmem_cache_create(type->name, size,
144 						  RXE_POOL_ALIGN,
145 						  RXE_POOL_CACHE_FLAGS, NULL);
146 			if (!type->cache) {
147 				pr_err("Unable to init kmem cache for %s\n",
148 				       type->name);
149 				err = -ENOMEM;
150 				goto err1;
151 			}
152 		}
153 	}
154 
155 	return 0;
156 
157 err1:
158 	rxe_cache_clean(i);
159 
160 	return err;
161 }
162 
163 void rxe_cache_exit(void)
164 {
165 	rxe_cache_clean(RXE_NUM_TYPES);
166 }
167 
168 static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
169 {
170 	int err = 0;
171 	size_t size;
172 
173 	if ((max - min + 1) < pool->max_elem) {
174 		pr_warn("not enough indices for max_elem\n");
175 		err = -EINVAL;
176 		goto out;
177 	}
178 
179 	pool->max_index = max;
180 	pool->min_index = min;
181 
182 	size = BITS_TO_LONGS(max - min + 1) * sizeof(long);
183 	pool->table = kmalloc(size, GFP_KERNEL);
184 	if (!pool->table) {
185 		err = -ENOMEM;
186 		goto out;
187 	}
188 
189 	pool->table_size = size;
190 	bitmap_zero(pool->table, max - min + 1);
191 
192 out:
193 	return err;
194 }
195 
196 int rxe_pool_init(
197 	struct rxe_dev		*rxe,
198 	struct rxe_pool		*pool,
199 	enum rxe_elem_type	type,
200 	unsigned int		max_elem)
201 {
202 	int			err = 0;
203 	size_t			size = rxe_type_info[type].size;
204 
205 	memset(pool, 0, sizeof(*pool));
206 
207 	pool->rxe		= rxe;
208 	pool->type		= type;
209 	pool->max_elem		= max_elem;
210 	pool->elem_size		= ALIGN(size, RXE_POOL_ALIGN);
211 	pool->flags		= rxe_type_info[type].flags;
212 	pool->tree		= RB_ROOT;
213 	pool->cleanup		= rxe_type_info[type].cleanup;
214 
215 	atomic_set(&pool->num_elem, 0);
216 
217 	kref_init(&pool->ref_cnt);
218 
219 	rwlock_init(&pool->pool_lock);
220 
221 	if (rxe_type_info[type].flags & RXE_POOL_INDEX) {
222 		err = rxe_pool_init_index(pool,
223 					  rxe_type_info[type].max_index,
224 					  rxe_type_info[type].min_index);
225 		if (err)
226 			goto out;
227 	}
228 
229 	if (rxe_type_info[type].flags & RXE_POOL_KEY) {
230 		pool->key_offset = rxe_type_info[type].key_offset;
231 		pool->key_size = rxe_type_info[type].key_size;
232 	}
233 
234 	pool->state = RXE_POOL_STATE_VALID;
235 
236 out:
237 	return err;
238 }
239 
240 static void rxe_pool_release(struct kref *kref)
241 {
242 	struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
243 
244 	pool->state = RXE_POOL_STATE_INVALID;
245 	kfree(pool->table);
246 }
247 
248 static void rxe_pool_put(struct rxe_pool *pool)
249 {
250 	kref_put(&pool->ref_cnt, rxe_pool_release);
251 }
252 
253 void rxe_pool_cleanup(struct rxe_pool *pool)
254 {
255 	unsigned long flags;
256 
257 	write_lock_irqsave(&pool->pool_lock, flags);
258 	pool->state = RXE_POOL_STATE_INVALID;
259 	if (atomic_read(&pool->num_elem) > 0)
260 		pr_warn("%s pool destroyed with unfree'd elem\n",
261 			pool_name(pool));
262 	write_unlock_irqrestore(&pool->pool_lock, flags);
263 
264 	rxe_pool_put(pool);
265 }
266 
267 static u32 alloc_index(struct rxe_pool *pool)
268 {
269 	u32 index;
270 	u32 range = pool->max_index - pool->min_index + 1;
271 
272 	index = find_next_zero_bit(pool->table, range, pool->last);
273 	if (index >= range)
274 		index = find_first_zero_bit(pool->table, range);
275 
276 	WARN_ON_ONCE(index >= range);
277 	set_bit(index, pool->table);
278 	pool->last = index;
279 	return index + pool->min_index;
280 }
281 
282 static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
283 {
284 	struct rb_node **link = &pool->tree.rb_node;
285 	struct rb_node *parent = NULL;
286 	struct rxe_pool_entry *elem;
287 
288 	while (*link) {
289 		parent = *link;
290 		elem = rb_entry(parent, struct rxe_pool_entry, node);
291 
292 		if (elem->index == new->index) {
293 			pr_warn("element already exists!\n");
294 			goto out;
295 		}
296 
297 		if (elem->index > new->index)
298 			link = &(*link)->rb_left;
299 		else
300 			link = &(*link)->rb_right;
301 	}
302 
303 	rb_link_node(&new->node, parent, link);
304 	rb_insert_color(&new->node, &pool->tree);
305 out:
306 	return;
307 }
308 
309 static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
310 {
311 	struct rb_node **link = &pool->tree.rb_node;
312 	struct rb_node *parent = NULL;
313 	struct rxe_pool_entry *elem;
314 	int cmp;
315 
316 	while (*link) {
317 		parent = *link;
318 		elem = rb_entry(parent, struct rxe_pool_entry, node);
319 
320 		cmp = memcmp((u8 *)elem + pool->key_offset,
321 			     (u8 *)new + pool->key_offset, pool->key_size);
322 
323 		if (cmp == 0) {
324 			pr_warn("key already exists!\n");
325 			goto out;
326 		}
327 
328 		if (cmp > 0)
329 			link = &(*link)->rb_left;
330 		else
331 			link = &(*link)->rb_right;
332 	}
333 
334 	rb_link_node(&new->node, parent, link);
335 	rb_insert_color(&new->node, &pool->tree);
336 out:
337 	return;
338 }
339 
340 void rxe_add_key(void *arg, void *key)
341 {
342 	struct rxe_pool_entry *elem = arg;
343 	struct rxe_pool *pool = elem->pool;
344 	unsigned long flags;
345 
346 	write_lock_irqsave(&pool->pool_lock, flags);
347 	memcpy((u8 *)elem + pool->key_offset, key, pool->key_size);
348 	insert_key(pool, elem);
349 	write_unlock_irqrestore(&pool->pool_lock, flags);
350 }
351 
352 void rxe_drop_key(void *arg)
353 {
354 	struct rxe_pool_entry *elem = arg;
355 	struct rxe_pool *pool = elem->pool;
356 	unsigned long flags;
357 
358 	write_lock_irqsave(&pool->pool_lock, flags);
359 	rb_erase(&elem->node, &pool->tree);
360 	write_unlock_irqrestore(&pool->pool_lock, flags);
361 }
362 
363 void rxe_add_index(void *arg)
364 {
365 	struct rxe_pool_entry *elem = arg;
366 	struct rxe_pool *pool = elem->pool;
367 	unsigned long flags;
368 
369 	write_lock_irqsave(&pool->pool_lock, flags);
370 	elem->index = alloc_index(pool);
371 	insert_index(pool, elem);
372 	write_unlock_irqrestore(&pool->pool_lock, flags);
373 }
374 
375 void rxe_drop_index(void *arg)
376 {
377 	struct rxe_pool_entry *elem = arg;
378 	struct rxe_pool *pool = elem->pool;
379 	unsigned long flags;
380 
381 	write_lock_irqsave(&pool->pool_lock, flags);
382 	clear_bit(elem->index - pool->min_index, pool->table);
383 	rb_erase(&elem->node, &pool->tree);
384 	write_unlock_irqrestore(&pool->pool_lock, flags);
385 }
386 
387 void *rxe_alloc(struct rxe_pool *pool)
388 {
389 	struct rxe_pool_entry *elem;
390 	unsigned long flags;
391 
392 	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
393 
394 	read_lock_irqsave(&pool->pool_lock, flags);
395 	if (pool->state != RXE_POOL_STATE_VALID) {
396 		read_unlock_irqrestore(&pool->pool_lock, flags);
397 		return NULL;
398 	}
399 	kref_get(&pool->ref_cnt);
400 	read_unlock_irqrestore(&pool->pool_lock, flags);
401 
402 	if (!ib_device_try_get(&pool->rxe->ib_dev))
403 		goto out_put_pool;
404 
405 	if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
406 		goto out_cnt;
407 
408 	elem = kmem_cache_zalloc(pool_cache(pool),
409 				 (pool->flags & RXE_POOL_ATOMIC) ?
410 				 GFP_ATOMIC : GFP_KERNEL);
411 	if (!elem)
412 		goto out_cnt;
413 
414 	elem->pool = pool;
415 	kref_init(&elem->ref_cnt);
416 
417 	return elem;
418 
419 out_cnt:
420 	atomic_dec(&pool->num_elem);
421 	ib_device_put(&pool->rxe->ib_dev);
422 out_put_pool:
423 	rxe_pool_put(pool);
424 	return NULL;
425 }
426 
427 int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
428 {
429 	unsigned long flags;
430 
431 	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
432 
433 	read_lock_irqsave(&pool->pool_lock, flags);
434 	if (pool->state != RXE_POOL_STATE_VALID) {
435 		read_unlock_irqrestore(&pool->pool_lock, flags);
436 		return -EINVAL;
437 	}
438 	kref_get(&pool->ref_cnt);
439 	read_unlock_irqrestore(&pool->pool_lock, flags);
440 
441 	if (!ib_device_try_get(&pool->rxe->ib_dev))
442 		goto out_put_pool;
443 
444 	if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
445 		goto out_cnt;
446 
447 	elem->pool = pool;
448 	kref_init(&elem->ref_cnt);
449 
450 	return 0;
451 
452 out_cnt:
453 	atomic_dec(&pool->num_elem);
454 	ib_device_put(&pool->rxe->ib_dev);
455 out_put_pool:
456 	rxe_pool_put(pool);
457 	return -EINVAL;
458 }
459 
460 void rxe_elem_release(struct kref *kref)
461 {
462 	struct rxe_pool_entry *elem =
463 		container_of(kref, struct rxe_pool_entry, ref_cnt);
464 	struct rxe_pool *pool = elem->pool;
465 
466 	if (pool->cleanup)
467 		pool->cleanup(elem);
468 
469 	if (!(pool->flags & RXE_POOL_NO_ALLOC))
470 		kmem_cache_free(pool_cache(pool), elem);
471 	atomic_dec(&pool->num_elem);
472 	ib_device_put(&pool->rxe->ib_dev);
473 	rxe_pool_put(pool);
474 }
475 
476 void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
477 {
478 	struct rb_node *node = NULL;
479 	struct rxe_pool_entry *elem = NULL;
480 	unsigned long flags;
481 
482 	read_lock_irqsave(&pool->pool_lock, flags);
483 
484 	if (pool->state != RXE_POOL_STATE_VALID)
485 		goto out;
486 
487 	node = pool->tree.rb_node;
488 
489 	while (node) {
490 		elem = rb_entry(node, struct rxe_pool_entry, node);
491 
492 		if (elem->index > index)
493 			node = node->rb_left;
494 		else if (elem->index < index)
495 			node = node->rb_right;
496 		else {
497 			kref_get(&elem->ref_cnt);
498 			break;
499 		}
500 	}
501 
502 out:
503 	read_unlock_irqrestore(&pool->pool_lock, flags);
504 	return node ? elem : NULL;
505 }
506 
507 void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
508 {
509 	struct rb_node *node = NULL;
510 	struct rxe_pool_entry *elem = NULL;
511 	int cmp;
512 	unsigned long flags;
513 
514 	read_lock_irqsave(&pool->pool_lock, flags);
515 
516 	if (pool->state != RXE_POOL_STATE_VALID)
517 		goto out;
518 
519 	node = pool->tree.rb_node;
520 
521 	while (node) {
522 		elem = rb_entry(node, struct rxe_pool_entry, node);
523 
524 		cmp = memcmp((u8 *)elem + pool->key_offset,
525 			     key, pool->key_size);
526 
527 		if (cmp > 0)
528 			node = node->rb_left;
529 		else if (cmp < 0)
530 			node = node->rb_right;
531 		else
532 			break;
533 	}
534 
535 	if (node)
536 		kref_get(&elem->ref_cnt);
537 
538 out:
539 	read_unlock_irqrestore(&pool->pool_lock, flags);
540 	return node ? elem : NULL;
541 }
542