1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include "dr_types.h"
5 
6 #define DR_ICM_MODIFY_HDR_ALIGN_BASE 64
7 #define DR_ICM_SYNC_THRESHOLD_POOL (64 * 1024 * 1024)
8 
9 struct mlx5dr_icm_pool {
10 	enum mlx5dr_icm_type icm_type;
11 	enum mlx5dr_icm_chunk_size max_log_chunk_sz;
12 	struct mlx5dr_domain *dmn;
13 	/* memory management */
14 	struct mutex mutex; /* protect the ICM pool and ICM buddy */
15 	struct list_head buddy_mem_list;
16 	u64 hot_memory_size;
17 };
18 
19 struct mlx5dr_icm_dm {
20 	u32 obj_id;
21 	enum mlx5_sw_icm_type type;
22 	phys_addr_t addr;
23 	size_t length;
24 };
25 
26 struct mlx5dr_icm_mr {
27 	u32 mkey;
28 	struct mlx5dr_icm_dm dm;
29 	struct mlx5dr_domain *dmn;
30 	size_t length;
31 	u64 icm_start_addr;
32 };
33 
34 static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev,
35 				 u32 pd, u64 length, u64 start_addr, int mode,
36 				 u32 *mkey)
37 {
38 	u32 inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
39 	u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
40 	void *mkc;
41 
42 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
43 
44 	MLX5_SET(mkc, mkc, access_mode_1_0, mode);
45 	MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
46 	MLX5_SET(mkc, mkc, lw, 1);
47 	MLX5_SET(mkc, mkc, lr, 1);
48 	if (mode == MLX5_MKC_ACCESS_MODE_SW_ICM) {
49 		MLX5_SET(mkc, mkc, rw, 1);
50 		MLX5_SET(mkc, mkc, rr, 1);
51 	}
52 
53 	MLX5_SET64(mkc, mkc, len, length);
54 	MLX5_SET(mkc, mkc, pd, pd);
55 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
56 	MLX5_SET64(mkc, mkc, start_addr, start_addr);
57 
58 	return mlx5_core_create_mkey(mdev, mkey, in, inlen);
59 }
60 
61 static struct mlx5dr_icm_mr *
62 dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool)
63 {
64 	struct mlx5_core_dev *mdev = pool->dmn->mdev;
65 	enum mlx5_sw_icm_type dm_type;
66 	struct mlx5dr_icm_mr *icm_mr;
67 	size_t log_align_base;
68 	int err;
69 
70 	icm_mr = kvzalloc(sizeof(*icm_mr), GFP_KERNEL);
71 	if (!icm_mr)
72 		return NULL;
73 
74 	icm_mr->dmn = pool->dmn;
75 
76 	icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
77 							       pool->icm_type);
78 
79 	if (pool->icm_type == DR_ICM_TYPE_STE) {
80 		dm_type = MLX5_SW_ICM_TYPE_STEERING;
81 		log_align_base = ilog2(icm_mr->dm.length);
82 	} else {
83 		dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY;
84 		/* Align base is 64B */
85 		log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE);
86 	}
87 	icm_mr->dm.type = dm_type;
88 
89 	err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length,
90 				   log_align_base, 0, &icm_mr->dm.addr,
91 				   &icm_mr->dm.obj_id);
92 	if (err) {
93 		mlx5dr_err(pool->dmn, "Failed to allocate SW ICM memory, err (%d)\n", err);
94 		goto free_icm_mr;
95 	}
96 
97 	/* Register device memory */
98 	err = dr_icm_create_dm_mkey(mdev, pool->dmn->pdn,
99 				    icm_mr->dm.length,
100 				    icm_mr->dm.addr,
101 				    MLX5_MKC_ACCESS_MODE_SW_ICM,
102 				    &icm_mr->mkey);
103 	if (err) {
104 		mlx5dr_err(pool->dmn, "Failed to create SW ICM MKEY, err (%d)\n", err);
105 		goto free_dm;
106 	}
107 
108 	icm_mr->icm_start_addr = icm_mr->dm.addr;
109 
110 	if (icm_mr->icm_start_addr & (BIT(log_align_base) - 1)) {
111 		mlx5dr_err(pool->dmn, "Failed to get Aligned ICM mem (asked: %zu)\n",
112 			   log_align_base);
113 		goto free_mkey;
114 	}
115 
116 	return icm_mr;
117 
118 free_mkey:
119 	mlx5_core_destroy_mkey(mdev, icm_mr->mkey);
120 free_dm:
121 	mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0,
122 			       icm_mr->dm.addr, icm_mr->dm.obj_id);
123 free_icm_mr:
124 	kvfree(icm_mr);
125 	return NULL;
126 }
127 
128 static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr)
129 {
130 	struct mlx5_core_dev *mdev = icm_mr->dmn->mdev;
131 	struct mlx5dr_icm_dm *dm = &icm_mr->dm;
132 
133 	mlx5_core_destroy_mkey(mdev, icm_mr->mkey);
134 	mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0,
135 			       dm->addr, dm->obj_id);
136 	kvfree(icm_mr);
137 }
138 
139 static int dr_icm_buddy_get_ste_size(struct mlx5dr_icm_buddy_mem *buddy)
140 {
141 	/* We support only one type of STE size, both for ConnectX-5 and later
142 	 * devices. Once the support for match STE which has a larger tag is
143 	 * added (32B instead of 16B), the STE size for devices later than
144 	 * ConnectX-5 needs to account for that.
145 	 */
146 	return DR_STE_SIZE_REDUCED;
147 }
148 
149 static void dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk, int offset)
150 {
151 	struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
152 	int index = offset / DR_STE_SIZE;
153 
154 	chunk->ste_arr = &buddy->ste_arr[index];
155 	chunk->miss_list = &buddy->miss_list[index];
156 	chunk->hw_ste_arr = buddy->hw_ste_arr +
157 			    index * dr_icm_buddy_get_ste_size(buddy);
158 }
159 
160 static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk)
161 {
162 	struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
163 
164 	memset(chunk->hw_ste_arr, 0,
165 	       chunk->num_of_entries * dr_icm_buddy_get_ste_size(buddy));
166 	memset(chunk->ste_arr, 0,
167 	       chunk->num_of_entries * sizeof(chunk->ste_arr[0]));
168 }
169 
170 static enum mlx5dr_icm_type
171 get_chunk_icm_type(struct mlx5dr_icm_chunk *chunk)
172 {
173 	return chunk->buddy_mem->pool->icm_type;
174 }
175 
176 static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk,
177 				 struct mlx5dr_icm_buddy_mem *buddy)
178 {
179 	enum mlx5dr_icm_type icm_type = get_chunk_icm_type(chunk);
180 
181 	buddy->used_memory -= chunk->byte_size;
182 	list_del(&chunk->chunk_list);
183 
184 	if (icm_type == DR_ICM_TYPE_STE)
185 		dr_icm_chunk_ste_cleanup(chunk);
186 
187 	kvfree(chunk);
188 }
189 
190 static int dr_icm_buddy_init_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
191 {
192 	int num_of_entries =
193 		mlx5dr_icm_pool_chunk_size_to_entries(buddy->pool->max_log_chunk_sz);
194 
195 	buddy->ste_arr = kvcalloc(num_of_entries,
196 				  sizeof(struct mlx5dr_ste), GFP_KERNEL);
197 	if (!buddy->ste_arr)
198 		return -ENOMEM;
199 
200 	/* Preallocate full STE size on non-ConnectX-5 devices since
201 	 * we need to support both full and reduced with the same cache.
202 	 */
203 	buddy->hw_ste_arr = kvcalloc(num_of_entries,
204 				     dr_icm_buddy_get_ste_size(buddy), GFP_KERNEL);
205 	if (!buddy->hw_ste_arr)
206 		goto free_ste_arr;
207 
208 	buddy->miss_list = kvmalloc(num_of_entries * sizeof(struct list_head), GFP_KERNEL);
209 	if (!buddy->miss_list)
210 		goto free_hw_ste_arr;
211 
212 	return 0;
213 
214 free_hw_ste_arr:
215 	kvfree(buddy->hw_ste_arr);
216 free_ste_arr:
217 	kvfree(buddy->ste_arr);
218 	return -ENOMEM;
219 }
220 
221 static void dr_icm_buddy_cleanup_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
222 {
223 	kvfree(buddy->ste_arr);
224 	kvfree(buddy->hw_ste_arr);
225 	kvfree(buddy->miss_list);
226 }
227 
228 static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool)
229 {
230 	struct mlx5dr_icm_buddy_mem *buddy;
231 	struct mlx5dr_icm_mr *icm_mr;
232 
233 	icm_mr = dr_icm_pool_mr_create(pool);
234 	if (!icm_mr)
235 		return -ENOMEM;
236 
237 	buddy = kvzalloc(sizeof(*buddy), GFP_KERNEL);
238 	if (!buddy)
239 		goto free_mr;
240 
241 	if (mlx5dr_buddy_init(buddy, pool->max_log_chunk_sz))
242 		goto err_free_buddy;
243 
244 	buddy->icm_mr = icm_mr;
245 	buddy->pool = pool;
246 
247 	if (pool->icm_type == DR_ICM_TYPE_STE) {
248 		/* Reduce allocations by preallocating and reusing the STE structures */
249 		if (dr_icm_buddy_init_ste_cache(buddy))
250 			goto err_cleanup_buddy;
251 	}
252 
253 	/* add it to the -start- of the list in order to search in it first */
254 	list_add(&buddy->list_node, &pool->buddy_mem_list);
255 
256 	return 0;
257 
258 err_cleanup_buddy:
259 	mlx5dr_buddy_cleanup(buddy);
260 err_free_buddy:
261 	kvfree(buddy);
262 free_mr:
263 	dr_icm_pool_mr_destroy(icm_mr);
264 	return -ENOMEM;
265 }
266 
267 static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy)
268 {
269 	struct mlx5dr_icm_chunk *chunk, *next;
270 
271 	list_for_each_entry_safe(chunk, next, &buddy->hot_list, chunk_list)
272 		dr_icm_chunk_destroy(chunk, buddy);
273 
274 	list_for_each_entry_safe(chunk, next, &buddy->used_list, chunk_list)
275 		dr_icm_chunk_destroy(chunk, buddy);
276 
277 	dr_icm_pool_mr_destroy(buddy->icm_mr);
278 
279 	mlx5dr_buddy_cleanup(buddy);
280 
281 	if (buddy->pool->icm_type == DR_ICM_TYPE_STE)
282 		dr_icm_buddy_cleanup_ste_cache(buddy);
283 
284 	kvfree(buddy);
285 }
286 
287 static struct mlx5dr_icm_chunk *
288 dr_icm_chunk_create(struct mlx5dr_icm_pool *pool,
289 		    enum mlx5dr_icm_chunk_size chunk_size,
290 		    struct mlx5dr_icm_buddy_mem *buddy_mem_pool,
291 		    unsigned int seg)
292 {
293 	struct mlx5dr_icm_chunk *chunk;
294 	int offset;
295 
296 	chunk = kvzalloc(sizeof(*chunk), GFP_KERNEL);
297 	if (!chunk)
298 		return NULL;
299 
300 	offset = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type) * seg;
301 
302 	chunk->rkey = buddy_mem_pool->icm_mr->mkey;
303 	chunk->mr_addr = offset;
304 	chunk->icm_addr =
305 		(uintptr_t)buddy_mem_pool->icm_mr->icm_start_addr + offset;
306 	chunk->num_of_entries =
307 		mlx5dr_icm_pool_chunk_size_to_entries(chunk_size);
308 	chunk->byte_size =
309 		mlx5dr_icm_pool_chunk_size_to_byte(chunk_size, pool->icm_type);
310 	chunk->seg = seg;
311 	chunk->buddy_mem = buddy_mem_pool;
312 
313 	if (pool->icm_type == DR_ICM_TYPE_STE)
314 		dr_icm_chunk_ste_init(chunk, offset);
315 
316 	buddy_mem_pool->used_memory += chunk->byte_size;
317 	INIT_LIST_HEAD(&chunk->chunk_list);
318 
319 	/* chunk now is part of the used_list */
320 	list_add_tail(&chunk->chunk_list, &buddy_mem_pool->used_list);
321 
322 	return chunk;
323 }
324 
325 static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool)
326 {
327 	if (pool->hot_memory_size > DR_ICM_SYNC_THRESHOLD_POOL)
328 		return true;
329 
330 	return false;
331 }
332 
333 static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool)
334 {
335 	struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy;
336 	int err;
337 
338 	err = mlx5dr_cmd_sync_steering(pool->dmn->mdev);
339 	if (err) {
340 		mlx5dr_err(pool->dmn, "Failed to sync to HW (err: %d)\n", err);
341 		return err;
342 	}
343 
344 	list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node) {
345 		struct mlx5dr_icm_chunk *chunk, *tmp_chunk;
346 
347 		list_for_each_entry_safe(chunk, tmp_chunk, &buddy->hot_list, chunk_list) {
348 			mlx5dr_buddy_free_mem(buddy, chunk->seg,
349 					      ilog2(chunk->num_of_entries));
350 			pool->hot_memory_size -= chunk->byte_size;
351 			dr_icm_chunk_destroy(chunk, buddy);
352 		}
353 
354 		if (!buddy->used_memory && pool->icm_type == DR_ICM_TYPE_STE)
355 			dr_icm_buddy_destroy(buddy);
356 	}
357 
358 	return 0;
359 }
360 
361 static int dr_icm_handle_buddies_get_mem(struct mlx5dr_icm_pool *pool,
362 					 enum mlx5dr_icm_chunk_size chunk_size,
363 					 struct mlx5dr_icm_buddy_mem **buddy,
364 					 unsigned int *seg)
365 {
366 	struct mlx5dr_icm_buddy_mem *buddy_mem_pool;
367 	bool new_mem = false;
368 	int err;
369 
370 alloc_buddy_mem:
371 	/* find the next free place from the buddy list */
372 	list_for_each_entry(buddy_mem_pool, &pool->buddy_mem_list, list_node) {
373 		err = mlx5dr_buddy_alloc_mem(buddy_mem_pool,
374 					     chunk_size, seg);
375 		if (!err)
376 			goto found;
377 
378 		if (WARN_ON(new_mem)) {
379 			/* We have new memory pool, first in the list */
380 			mlx5dr_err(pool->dmn,
381 				   "No memory for order: %d\n",
382 				   chunk_size);
383 			goto out;
384 		}
385 	}
386 
387 	/* no more available allocators in that pool, create new */
388 	err = dr_icm_buddy_create(pool);
389 	if (err) {
390 		mlx5dr_err(pool->dmn,
391 			   "Failed creating buddy for order %d\n",
392 			   chunk_size);
393 		goto out;
394 	}
395 
396 	/* mark we have new memory, first in list */
397 	new_mem = true;
398 	goto alloc_buddy_mem;
399 
400 found:
401 	*buddy = buddy_mem_pool;
402 out:
403 	return err;
404 }
405 
406 /* Allocate an ICM chunk, each chunk holds a piece of ICM memory and
407  * also memory used for HW STE management for optimizations.
408  */
409 struct mlx5dr_icm_chunk *
410 mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool,
411 		       enum mlx5dr_icm_chunk_size chunk_size)
412 {
413 	struct mlx5dr_icm_chunk *chunk = NULL;
414 	struct mlx5dr_icm_buddy_mem *buddy;
415 	unsigned int seg;
416 	int ret;
417 
418 	if (chunk_size > pool->max_log_chunk_sz)
419 		return NULL;
420 
421 	mutex_lock(&pool->mutex);
422 	/* find mem, get back the relevant buddy pool and seg in that mem */
423 	ret = dr_icm_handle_buddies_get_mem(pool, chunk_size, &buddy, &seg);
424 	if (ret)
425 		goto out;
426 
427 	chunk = dr_icm_chunk_create(pool, chunk_size, buddy, seg);
428 	if (!chunk)
429 		goto out_err;
430 
431 	goto out;
432 
433 out_err:
434 	mlx5dr_buddy_free_mem(buddy, seg, chunk_size);
435 out:
436 	mutex_unlock(&pool->mutex);
437 	return chunk;
438 }
439 
440 void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk)
441 {
442 	struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
443 	struct mlx5dr_icm_pool *pool = buddy->pool;
444 
445 	/* move the memory to the waiting list AKA "hot" */
446 	mutex_lock(&pool->mutex);
447 	list_move_tail(&chunk->chunk_list, &buddy->hot_list);
448 	pool->hot_memory_size += chunk->byte_size;
449 
450 	/* Check if we have chunks that are waiting for sync-ste */
451 	if (dr_icm_pool_is_sync_required(pool))
452 		dr_icm_pool_sync_all_buddy_pools(pool);
453 
454 	mutex_unlock(&pool->mutex);
455 }
456 
457 struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn,
458 					       enum mlx5dr_icm_type icm_type)
459 {
460 	enum mlx5dr_icm_chunk_size max_log_chunk_sz;
461 	struct mlx5dr_icm_pool *pool;
462 
463 	if (icm_type == DR_ICM_TYPE_STE)
464 		max_log_chunk_sz = dmn->info.max_log_sw_icm_sz;
465 	else
466 		max_log_chunk_sz = dmn->info.max_log_action_icm_sz;
467 
468 	pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
469 	if (!pool)
470 		return NULL;
471 
472 	pool->dmn = dmn;
473 	pool->icm_type = icm_type;
474 	pool->max_log_chunk_sz = max_log_chunk_sz;
475 
476 	INIT_LIST_HEAD(&pool->buddy_mem_list);
477 
478 	mutex_init(&pool->mutex);
479 
480 	return pool;
481 }
482 
483 void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool)
484 {
485 	struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy;
486 
487 	list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node)
488 		dr_icm_buddy_destroy(buddy);
489 
490 	mutex_destroy(&pool->mutex);
491 	kvfree(pool);
492 }
493