1 /*
2  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *	   Redistribution and use in source and binary forms, with or
12  *	   without modification, are permitted provided that the following
13  *	   conditions are met:
14  *
15  *		- Redistributions of source code must retain the above
16  *		  copyright notice, this list of conditions and the following
17  *		  disclaimer.
18  *
19  *		- Redistributions in binary form must reproduce the above
20  *		  copyright notice, this list of conditions and the following
21  *		  disclaimer in the documentation and/or other materials
22  *		  provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include "rxe.h"
35 #include "rxe_loc.h"
36 
37 /* info about object pools
38  * note that mr and mw share a single index space
39  * so that one can map an lkey to the correct type of object
40  */
41 struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
42 	[RXE_TYPE_UC] = {
43 		.name		= "rxe-uc",
44 		.size		= sizeof(struct rxe_ucontext),
45 		.flags          = RXE_POOL_NO_ALLOC,
46 	},
47 	[RXE_TYPE_PD] = {
48 		.name		= "rxe-pd",
49 		.size		= sizeof(struct rxe_pd),
50 		.flags		= RXE_POOL_NO_ALLOC,
51 	},
52 	[RXE_TYPE_AH] = {
53 		.name		= "rxe-ah",
54 		.size		= sizeof(struct rxe_ah),
55 		.flags		= RXE_POOL_ATOMIC | RXE_POOL_NO_ALLOC,
56 	},
57 	[RXE_TYPE_SRQ] = {
58 		.name		= "rxe-srq",
59 		.size		= sizeof(struct rxe_srq),
60 		.flags		= RXE_POOL_INDEX | RXE_POOL_NO_ALLOC,
61 		.min_index	= RXE_MIN_SRQ_INDEX,
62 		.max_index	= RXE_MAX_SRQ_INDEX,
63 	},
64 	[RXE_TYPE_QP] = {
65 		.name		= "rxe-qp",
66 		.size		= sizeof(struct rxe_qp),
67 		.cleanup	= rxe_qp_cleanup,
68 		.flags		= RXE_POOL_INDEX,
69 		.min_index	= RXE_MIN_QP_INDEX,
70 		.max_index	= RXE_MAX_QP_INDEX,
71 	},
72 	[RXE_TYPE_CQ] = {
73 		.name		= "rxe-cq",
74 		.size		= sizeof(struct rxe_cq),
75 		.flags          = RXE_POOL_NO_ALLOC,
76 		.cleanup	= rxe_cq_cleanup,
77 	},
78 	[RXE_TYPE_MR] = {
79 		.name		= "rxe-mr",
80 		.size		= sizeof(struct rxe_mem),
81 		.cleanup	= rxe_mem_cleanup,
82 		.flags		= RXE_POOL_INDEX,
83 		.max_index	= RXE_MAX_MR_INDEX,
84 		.min_index	= RXE_MIN_MR_INDEX,
85 	},
86 	[RXE_TYPE_MW] = {
87 		.name		= "rxe-mw",
88 		.size		= sizeof(struct rxe_mem),
89 		.flags		= RXE_POOL_INDEX,
90 		.max_index	= RXE_MAX_MW_INDEX,
91 		.min_index	= RXE_MIN_MW_INDEX,
92 	},
93 	[RXE_TYPE_MC_GRP] = {
94 		.name		= "rxe-mc_grp",
95 		.size		= sizeof(struct rxe_mc_grp),
96 		.cleanup	= rxe_mc_cleanup,
97 		.flags		= RXE_POOL_KEY,
98 		.key_offset	= offsetof(struct rxe_mc_grp, mgid),
99 		.key_size	= sizeof(union ib_gid),
100 	},
101 	[RXE_TYPE_MC_ELEM] = {
102 		.name		= "rxe-mc_elem",
103 		.size		= sizeof(struct rxe_mc_elem),
104 		.flags		= RXE_POOL_ATOMIC,
105 	},
106 };
107 
108 static inline const char *pool_name(struct rxe_pool *pool)
109 {
110 	return rxe_type_info[pool->type].name;
111 }
112 
113 static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)
114 {
115 	return rxe_type_info[pool->type].cache;
116 }
117 
118 static void rxe_cache_clean(size_t cnt)
119 {
120 	int i;
121 	struct rxe_type_info *type;
122 
123 	for (i = 0; i < cnt; i++) {
124 		type = &rxe_type_info[i];
125 		if (!(type->flags & RXE_POOL_NO_ALLOC)) {
126 			kmem_cache_destroy(type->cache);
127 			type->cache = NULL;
128 		}
129 	}
130 }
131 
132 int rxe_cache_init(void)
133 {
134 	int err;
135 	int i;
136 	size_t size;
137 	struct rxe_type_info *type;
138 
139 	for (i = 0; i < RXE_NUM_TYPES; i++) {
140 		type = &rxe_type_info[i];
141 		size = ALIGN(type->size, RXE_POOL_ALIGN);
142 		if (!(type->flags & RXE_POOL_NO_ALLOC)) {
143 			type->cache =
144 				kmem_cache_create(type->name, size,
145 						  RXE_POOL_ALIGN,
146 						  RXE_POOL_CACHE_FLAGS, NULL);
147 			if (!type->cache) {
148 				pr_err("Unable to init kmem cache for %s\n",
149 				       type->name);
150 				err = -ENOMEM;
151 				goto err1;
152 			}
153 		}
154 	}
155 
156 	return 0;
157 
158 err1:
159 	rxe_cache_clean(i);
160 
161 	return err;
162 }
163 
164 void rxe_cache_exit(void)
165 {
166 	rxe_cache_clean(RXE_NUM_TYPES);
167 }
168 
169 static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
170 {
171 	int err = 0;
172 	size_t size;
173 
174 	if ((max - min + 1) < pool->max_elem) {
175 		pr_warn("not enough indices for max_elem\n");
176 		err = -EINVAL;
177 		goto out;
178 	}
179 
180 	pool->max_index = max;
181 	pool->min_index = min;
182 
183 	size = BITS_TO_LONGS(max - min + 1) * sizeof(long);
184 	pool->table = kmalloc(size, GFP_KERNEL);
185 	if (!pool->table) {
186 		err = -ENOMEM;
187 		goto out;
188 	}
189 
190 	pool->table_size = size;
191 	bitmap_zero(pool->table, max - min + 1);
192 
193 out:
194 	return err;
195 }
196 
197 int rxe_pool_init(
198 	struct rxe_dev		*rxe,
199 	struct rxe_pool		*pool,
200 	enum rxe_elem_type	type,
201 	unsigned int		max_elem)
202 {
203 	int			err = 0;
204 	size_t			size = rxe_type_info[type].size;
205 
206 	memset(pool, 0, sizeof(*pool));
207 
208 	pool->rxe		= rxe;
209 	pool->type		= type;
210 	pool->max_elem		= max_elem;
211 	pool->elem_size		= ALIGN(size, RXE_POOL_ALIGN);
212 	pool->flags		= rxe_type_info[type].flags;
213 	pool->tree		= RB_ROOT;
214 	pool->cleanup		= rxe_type_info[type].cleanup;
215 
216 	atomic_set(&pool->num_elem, 0);
217 
218 	kref_init(&pool->ref_cnt);
219 
220 	rwlock_init(&pool->pool_lock);
221 
222 	if (rxe_type_info[type].flags & RXE_POOL_INDEX) {
223 		err = rxe_pool_init_index(pool,
224 					  rxe_type_info[type].max_index,
225 					  rxe_type_info[type].min_index);
226 		if (err)
227 			goto out;
228 	}
229 
230 	if (rxe_type_info[type].flags & RXE_POOL_KEY) {
231 		pool->key_offset = rxe_type_info[type].key_offset;
232 		pool->key_size = rxe_type_info[type].key_size;
233 	}
234 
235 	pool->state = RXE_POOL_STATE_VALID;
236 
237 out:
238 	return err;
239 }
240 
241 static void rxe_pool_release(struct kref *kref)
242 {
243 	struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
244 
245 	pool->state = RXE_POOL_STATE_INVALID;
246 	kfree(pool->table);
247 }
248 
249 static void rxe_pool_put(struct rxe_pool *pool)
250 {
251 	kref_put(&pool->ref_cnt, rxe_pool_release);
252 }
253 
254 void rxe_pool_cleanup(struct rxe_pool *pool)
255 {
256 	unsigned long flags;
257 
258 	write_lock_irqsave(&pool->pool_lock, flags);
259 	pool->state = RXE_POOL_STATE_INVALID;
260 	if (atomic_read(&pool->num_elem) > 0)
261 		pr_warn("%s pool destroyed with unfree'd elem\n",
262 			pool_name(pool));
263 	write_unlock_irqrestore(&pool->pool_lock, flags);
264 
265 	rxe_pool_put(pool);
266 }
267 
268 static u32 alloc_index(struct rxe_pool *pool)
269 {
270 	u32 index;
271 	u32 range = pool->max_index - pool->min_index + 1;
272 
273 	index = find_next_zero_bit(pool->table, range, pool->last);
274 	if (index >= range)
275 		index = find_first_zero_bit(pool->table, range);
276 
277 	WARN_ON_ONCE(index >= range);
278 	set_bit(index, pool->table);
279 	pool->last = index;
280 	return index + pool->min_index;
281 }
282 
283 static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
284 {
285 	struct rb_node **link = &pool->tree.rb_node;
286 	struct rb_node *parent = NULL;
287 	struct rxe_pool_entry *elem;
288 
289 	while (*link) {
290 		parent = *link;
291 		elem = rb_entry(parent, struct rxe_pool_entry, node);
292 
293 		if (elem->index == new->index) {
294 			pr_warn("element already exists!\n");
295 			goto out;
296 		}
297 
298 		if (elem->index > new->index)
299 			link = &(*link)->rb_left;
300 		else
301 			link = &(*link)->rb_right;
302 	}
303 
304 	rb_link_node(&new->node, parent, link);
305 	rb_insert_color(&new->node, &pool->tree);
306 out:
307 	return;
308 }
309 
310 static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
311 {
312 	struct rb_node **link = &pool->tree.rb_node;
313 	struct rb_node *parent = NULL;
314 	struct rxe_pool_entry *elem;
315 	int cmp;
316 
317 	while (*link) {
318 		parent = *link;
319 		elem = rb_entry(parent, struct rxe_pool_entry, node);
320 
321 		cmp = memcmp((u8 *)elem + pool->key_offset,
322 			     (u8 *)new + pool->key_offset, pool->key_size);
323 
324 		if (cmp == 0) {
325 			pr_warn("key already exists!\n");
326 			goto out;
327 		}
328 
329 		if (cmp > 0)
330 			link = &(*link)->rb_left;
331 		else
332 			link = &(*link)->rb_right;
333 	}
334 
335 	rb_link_node(&new->node, parent, link);
336 	rb_insert_color(&new->node, &pool->tree);
337 out:
338 	return;
339 }
340 
341 void rxe_add_key(void *arg, void *key)
342 {
343 	struct rxe_pool_entry *elem = arg;
344 	struct rxe_pool *pool = elem->pool;
345 	unsigned long flags;
346 
347 	write_lock_irqsave(&pool->pool_lock, flags);
348 	memcpy((u8 *)elem + pool->key_offset, key, pool->key_size);
349 	insert_key(pool, elem);
350 	write_unlock_irqrestore(&pool->pool_lock, flags);
351 }
352 
353 void rxe_drop_key(void *arg)
354 {
355 	struct rxe_pool_entry *elem = arg;
356 	struct rxe_pool *pool = elem->pool;
357 	unsigned long flags;
358 
359 	write_lock_irqsave(&pool->pool_lock, flags);
360 	rb_erase(&elem->node, &pool->tree);
361 	write_unlock_irqrestore(&pool->pool_lock, flags);
362 }
363 
364 void rxe_add_index(void *arg)
365 {
366 	struct rxe_pool_entry *elem = arg;
367 	struct rxe_pool *pool = elem->pool;
368 	unsigned long flags;
369 
370 	write_lock_irqsave(&pool->pool_lock, flags);
371 	elem->index = alloc_index(pool);
372 	insert_index(pool, elem);
373 	write_unlock_irqrestore(&pool->pool_lock, flags);
374 }
375 
376 void rxe_drop_index(void *arg)
377 {
378 	struct rxe_pool_entry *elem = arg;
379 	struct rxe_pool *pool = elem->pool;
380 	unsigned long flags;
381 
382 	write_lock_irqsave(&pool->pool_lock, flags);
383 	clear_bit(elem->index - pool->min_index, pool->table);
384 	rb_erase(&elem->node, &pool->tree);
385 	write_unlock_irqrestore(&pool->pool_lock, flags);
386 }
387 
388 void *rxe_alloc(struct rxe_pool *pool)
389 {
390 	struct rxe_pool_entry *elem;
391 	unsigned long flags;
392 
393 	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
394 
395 	read_lock_irqsave(&pool->pool_lock, flags);
396 	if (pool->state != RXE_POOL_STATE_VALID) {
397 		read_unlock_irqrestore(&pool->pool_lock, flags);
398 		return NULL;
399 	}
400 	kref_get(&pool->ref_cnt);
401 	read_unlock_irqrestore(&pool->pool_lock, flags);
402 
403 	if (!ib_device_try_get(&pool->rxe->ib_dev))
404 		goto out_put_pool;
405 
406 	if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
407 		goto out_cnt;
408 
409 	elem = kmem_cache_zalloc(pool_cache(pool),
410 				 (pool->flags & RXE_POOL_ATOMIC) ?
411 				 GFP_ATOMIC : GFP_KERNEL);
412 	if (!elem)
413 		goto out_cnt;
414 
415 	elem->pool = pool;
416 	kref_init(&elem->ref_cnt);
417 
418 	return elem;
419 
420 out_cnt:
421 	atomic_dec(&pool->num_elem);
422 	ib_device_put(&pool->rxe->ib_dev);
423 out_put_pool:
424 	rxe_pool_put(pool);
425 	return NULL;
426 }
427 
428 int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
429 {
430 	unsigned long flags;
431 
432 	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
433 
434 	read_lock_irqsave(&pool->pool_lock, flags);
435 	if (pool->state != RXE_POOL_STATE_VALID) {
436 		read_unlock_irqrestore(&pool->pool_lock, flags);
437 		return -EINVAL;
438 	}
439 	kref_get(&pool->ref_cnt);
440 	read_unlock_irqrestore(&pool->pool_lock, flags);
441 
442 	if (!ib_device_try_get(&pool->rxe->ib_dev))
443 		goto out_put_pool;
444 
445 	if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
446 		goto out_cnt;
447 
448 	elem->pool = pool;
449 	kref_init(&elem->ref_cnt);
450 
451 	return 0;
452 
453 out_cnt:
454 	atomic_dec(&pool->num_elem);
455 	ib_device_put(&pool->rxe->ib_dev);
456 out_put_pool:
457 	rxe_pool_put(pool);
458 	return -EINVAL;
459 }
460 
461 void rxe_elem_release(struct kref *kref)
462 {
463 	struct rxe_pool_entry *elem =
464 		container_of(kref, struct rxe_pool_entry, ref_cnt);
465 	struct rxe_pool *pool = elem->pool;
466 
467 	if (pool->cleanup)
468 		pool->cleanup(elem);
469 
470 	if (!(pool->flags & RXE_POOL_NO_ALLOC))
471 		kmem_cache_free(pool_cache(pool), elem);
472 	atomic_dec(&pool->num_elem);
473 	ib_device_put(&pool->rxe->ib_dev);
474 	rxe_pool_put(pool);
475 }
476 
477 void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
478 {
479 	struct rb_node *node = NULL;
480 	struct rxe_pool_entry *elem = NULL;
481 	unsigned long flags;
482 
483 	read_lock_irqsave(&pool->pool_lock, flags);
484 
485 	if (pool->state != RXE_POOL_STATE_VALID)
486 		goto out;
487 
488 	node = pool->tree.rb_node;
489 
490 	while (node) {
491 		elem = rb_entry(node, struct rxe_pool_entry, node);
492 
493 		if (elem->index > index)
494 			node = node->rb_left;
495 		else if (elem->index < index)
496 			node = node->rb_right;
497 		else {
498 			kref_get(&elem->ref_cnt);
499 			break;
500 		}
501 	}
502 
503 out:
504 	read_unlock_irqrestore(&pool->pool_lock, flags);
505 	return node ? elem : NULL;
506 }
507 
508 void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
509 {
510 	struct rb_node *node = NULL;
511 	struct rxe_pool_entry *elem = NULL;
512 	int cmp;
513 	unsigned long flags;
514 
515 	read_lock_irqsave(&pool->pool_lock, flags);
516 
517 	if (pool->state != RXE_POOL_STATE_VALID)
518 		goto out;
519 
520 	node = pool->tree.rb_node;
521 
522 	while (node) {
523 		elem = rb_entry(node, struct rxe_pool_entry, node);
524 
525 		cmp = memcmp((u8 *)elem + pool->key_offset,
526 			     key, pool->key_size);
527 
528 		if (cmp > 0)
529 			node = node->rb_left;
530 		else if (cmp < 0)
531 			node = node->rb_right;
532 		else
533 			break;
534 	}
535 
536 	if (node)
537 		kref_get(&elem->ref_cnt);
538 
539 out:
540 	read_unlock_irqrestore(&pool->pool_lock, flags);
541 	return node ? elem : NULL;
542 }
543