1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include "dr_types.h" 5 6 #define DR_ICM_MODIFY_HDR_ALIGN_BASE 64 7 #define DR_ICM_SYNC_THRESHOLD_POOL (64 * 1024 * 1024) 8 9 struct mlx5dr_icm_pool { 10 enum mlx5dr_icm_type icm_type; 11 enum mlx5dr_icm_chunk_size max_log_chunk_sz; 12 struct mlx5dr_domain *dmn; 13 /* memory management */ 14 struct mutex mutex; /* protect the ICM pool and ICM buddy */ 15 struct list_head buddy_mem_list; 16 u64 hot_memory_size; 17 }; 18 19 struct mlx5dr_icm_dm { 20 u32 obj_id; 21 enum mlx5_sw_icm_type type; 22 phys_addr_t addr; 23 size_t length; 24 }; 25 26 struct mlx5dr_icm_mr { 27 u32 mkey; 28 struct mlx5dr_icm_dm dm; 29 struct mlx5dr_domain *dmn; 30 size_t length; 31 u64 icm_start_addr; 32 }; 33 34 static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev, 35 u32 pd, u64 length, u64 start_addr, int mode, 36 u32 *mkey) 37 { 38 u32 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 39 u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; 40 void *mkc; 41 42 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 43 44 MLX5_SET(mkc, mkc, access_mode_1_0, mode); 45 MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); 46 MLX5_SET(mkc, mkc, lw, 1); 47 MLX5_SET(mkc, mkc, lr, 1); 48 if (mode == MLX5_MKC_ACCESS_MODE_SW_ICM) { 49 MLX5_SET(mkc, mkc, rw, 1); 50 MLX5_SET(mkc, mkc, rr, 1); 51 } 52 53 MLX5_SET64(mkc, mkc, len, length); 54 MLX5_SET(mkc, mkc, pd, pd); 55 MLX5_SET(mkc, mkc, qpn, 0xffffff); 56 MLX5_SET64(mkc, mkc, start_addr, start_addr); 57 58 return mlx5_core_create_mkey(mdev, mkey, in, inlen); 59 } 60 61 static struct mlx5dr_icm_mr * 62 dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool) 63 { 64 struct mlx5_core_dev *mdev = pool->dmn->mdev; 65 enum mlx5_sw_icm_type dm_type; 66 struct mlx5dr_icm_mr *icm_mr; 67 size_t log_align_base; 68 int err; 69 70 icm_mr = kvzalloc(sizeof(*icm_mr), GFP_KERNEL); 71 if (!icm_mr) 72 return NULL; 73 74 icm_mr->dmn = pool->dmn; 75 76 icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, 77 pool->icm_type); 78 79 if (pool->icm_type == DR_ICM_TYPE_STE) { 80 dm_type = MLX5_SW_ICM_TYPE_STEERING; 81 log_align_base = ilog2(icm_mr->dm.length); 82 } else { 83 dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY; 84 /* Align base is 64B */ 85 log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE); 86 } 87 icm_mr->dm.type = dm_type; 88 89 err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 90 log_align_base, 0, &icm_mr->dm.addr, 91 &icm_mr->dm.obj_id); 92 if (err) { 93 mlx5dr_err(pool->dmn, "Failed to allocate SW ICM memory, err (%d)\n", err); 94 goto free_icm_mr; 95 } 96 97 /* Register device memory */ 98 err = dr_icm_create_dm_mkey(mdev, pool->dmn->pdn, 99 icm_mr->dm.length, 100 icm_mr->dm.addr, 101 MLX5_MKC_ACCESS_MODE_SW_ICM, 102 &icm_mr->mkey); 103 if (err) { 104 mlx5dr_err(pool->dmn, "Failed to create SW ICM MKEY, err (%d)\n", err); 105 goto free_dm; 106 } 107 108 icm_mr->icm_start_addr = icm_mr->dm.addr; 109 110 if (icm_mr->icm_start_addr & (BIT(log_align_base) - 1)) { 111 mlx5dr_err(pool->dmn, "Failed to get Aligned ICM mem (asked: %zu)\n", 112 log_align_base); 113 goto free_mkey; 114 } 115 116 return icm_mr; 117 118 free_mkey: 119 mlx5_core_destroy_mkey(mdev, icm_mr->mkey); 120 free_dm: 121 mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0, 122 icm_mr->dm.addr, icm_mr->dm.obj_id); 123 free_icm_mr: 124 kvfree(icm_mr); 125 return NULL; 126 } 127 128 static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr) 129 { 130 struct mlx5_core_dev *mdev = icm_mr->dmn->mdev; 131 struct mlx5dr_icm_dm *dm = &icm_mr->dm; 132 133 mlx5_core_destroy_mkey(mdev, icm_mr->mkey); 134 mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0, 135 dm->addr, dm->obj_id); 136 kvfree(icm_mr); 137 } 138 139 static int dr_icm_buddy_get_ste_size(struct mlx5dr_icm_buddy_mem *buddy) 140 { 141 /* We support only one type of STE size, both for ConnectX-5 and later 142 * devices. Once the support for match STE which has a larger tag is 143 * added (32B instead of 16B), the STE size for devices later than 144 * ConnectX-5 needs to account for that. 145 */ 146 return DR_STE_SIZE_REDUCED; 147 } 148 149 static void dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk, int offset) 150 { 151 struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; 152 int index = offset / DR_STE_SIZE; 153 154 chunk->ste_arr = &buddy->ste_arr[index]; 155 chunk->miss_list = &buddy->miss_list[index]; 156 chunk->hw_ste_arr = buddy->hw_ste_arr + 157 index * dr_icm_buddy_get_ste_size(buddy); 158 } 159 160 static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk) 161 { 162 struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; 163 164 memset(chunk->hw_ste_arr, 0, 165 chunk->num_of_entries * dr_icm_buddy_get_ste_size(buddy)); 166 memset(chunk->ste_arr, 0, 167 chunk->num_of_entries * sizeof(chunk->ste_arr[0])); 168 } 169 170 static enum mlx5dr_icm_type 171 get_chunk_icm_type(struct mlx5dr_icm_chunk *chunk) 172 { 173 return chunk->buddy_mem->pool->icm_type; 174 } 175 176 static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk, 177 struct mlx5dr_icm_buddy_mem *buddy) 178 { 179 enum mlx5dr_icm_type icm_type = get_chunk_icm_type(chunk); 180 181 buddy->used_memory -= chunk->byte_size; 182 list_del(&chunk->chunk_list); 183 184 if (icm_type == DR_ICM_TYPE_STE) 185 dr_icm_chunk_ste_cleanup(chunk); 186 187 kvfree(chunk); 188 } 189 190 static int dr_icm_buddy_init_ste_cache(struct mlx5dr_icm_buddy_mem *buddy) 191 { 192 int num_of_entries = 193 mlx5dr_icm_pool_chunk_size_to_entries(buddy->pool->max_log_chunk_sz); 194 195 buddy->ste_arr = kvcalloc(num_of_entries, 196 sizeof(struct mlx5dr_ste), GFP_KERNEL); 197 if (!buddy->ste_arr) 198 return -ENOMEM; 199 200 /* Preallocate full STE size on non-ConnectX-5 devices since 201 * we need to support both full and reduced with the same cache. 202 */ 203 buddy->hw_ste_arr = kvcalloc(num_of_entries, 204 dr_icm_buddy_get_ste_size(buddy), GFP_KERNEL); 205 if (!buddy->hw_ste_arr) 206 goto free_ste_arr; 207 208 buddy->miss_list = kvmalloc(num_of_entries * sizeof(struct list_head), GFP_KERNEL); 209 if (!buddy->miss_list) 210 goto free_hw_ste_arr; 211 212 return 0; 213 214 free_hw_ste_arr: 215 kvfree(buddy->hw_ste_arr); 216 free_ste_arr: 217 kvfree(buddy->ste_arr); 218 return -ENOMEM; 219 } 220 221 static void dr_icm_buddy_cleanup_ste_cache(struct mlx5dr_icm_buddy_mem *buddy) 222 { 223 kvfree(buddy->ste_arr); 224 kvfree(buddy->hw_ste_arr); 225 kvfree(buddy->miss_list); 226 } 227 228 static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool) 229 { 230 struct mlx5dr_icm_buddy_mem *buddy; 231 struct mlx5dr_icm_mr *icm_mr; 232 233 icm_mr = dr_icm_pool_mr_create(pool); 234 if (!icm_mr) 235 return -ENOMEM; 236 237 buddy = kvzalloc(sizeof(*buddy), GFP_KERNEL); 238 if (!buddy) 239 goto free_mr; 240 241 if (mlx5dr_buddy_init(buddy, pool->max_log_chunk_sz)) 242 goto err_free_buddy; 243 244 buddy->icm_mr = icm_mr; 245 buddy->pool = pool; 246 247 if (pool->icm_type == DR_ICM_TYPE_STE) { 248 /* Reduce allocations by preallocating and reusing the STE structures */ 249 if (dr_icm_buddy_init_ste_cache(buddy)) 250 goto err_cleanup_buddy; 251 } 252 253 /* add it to the -start- of the list in order to search in it first */ 254 list_add(&buddy->list_node, &pool->buddy_mem_list); 255 256 return 0; 257 258 err_cleanup_buddy: 259 mlx5dr_buddy_cleanup(buddy); 260 err_free_buddy: 261 kvfree(buddy); 262 free_mr: 263 dr_icm_pool_mr_destroy(icm_mr); 264 return -ENOMEM; 265 } 266 267 static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy) 268 { 269 struct mlx5dr_icm_chunk *chunk, *next; 270 271 list_for_each_entry_safe(chunk, next, &buddy->hot_list, chunk_list) 272 dr_icm_chunk_destroy(chunk, buddy); 273 274 list_for_each_entry_safe(chunk, next, &buddy->used_list, chunk_list) 275 dr_icm_chunk_destroy(chunk, buddy); 276 277 dr_icm_pool_mr_destroy(buddy->icm_mr); 278 279 mlx5dr_buddy_cleanup(buddy); 280 281 if (buddy->pool->icm_type == DR_ICM_TYPE_STE) 282 dr_icm_buddy_cleanup_ste_cache(buddy); 283 284 kvfree(buddy); 285 } 286 287 static struct mlx5dr_icm_chunk * 288 dr_icm_chunk_create(struct mlx5dr_icm_pool *pool, 289 enum mlx5dr_icm_chunk_size chunk_size, 290 struct mlx5dr_icm_buddy_mem *buddy_mem_pool, 291 unsigned int seg) 292 { 293 struct mlx5dr_icm_chunk *chunk; 294 int offset; 295 296 chunk = kvzalloc(sizeof(*chunk), GFP_KERNEL); 297 if (!chunk) 298 return NULL; 299 300 offset = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type) * seg; 301 302 chunk->rkey = buddy_mem_pool->icm_mr->mkey; 303 chunk->mr_addr = offset; 304 chunk->icm_addr = 305 (uintptr_t)buddy_mem_pool->icm_mr->icm_start_addr + offset; 306 chunk->num_of_entries = 307 mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); 308 chunk->byte_size = 309 mlx5dr_icm_pool_chunk_size_to_byte(chunk_size, pool->icm_type); 310 chunk->seg = seg; 311 chunk->buddy_mem = buddy_mem_pool; 312 313 if (pool->icm_type == DR_ICM_TYPE_STE) 314 dr_icm_chunk_ste_init(chunk, offset); 315 316 buddy_mem_pool->used_memory += chunk->byte_size; 317 INIT_LIST_HEAD(&chunk->chunk_list); 318 319 /* chunk now is part of the used_list */ 320 list_add_tail(&chunk->chunk_list, &buddy_mem_pool->used_list); 321 322 return chunk; 323 } 324 325 static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool) 326 { 327 if (pool->hot_memory_size > DR_ICM_SYNC_THRESHOLD_POOL) 328 return true; 329 330 return false; 331 } 332 333 static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool) 334 { 335 struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy; 336 int err; 337 338 err = mlx5dr_cmd_sync_steering(pool->dmn->mdev); 339 if (err) { 340 mlx5dr_err(pool->dmn, "Failed to sync to HW (err: %d)\n", err); 341 return err; 342 } 343 344 list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node) { 345 struct mlx5dr_icm_chunk *chunk, *tmp_chunk; 346 347 list_for_each_entry_safe(chunk, tmp_chunk, &buddy->hot_list, chunk_list) { 348 mlx5dr_buddy_free_mem(buddy, chunk->seg, 349 ilog2(chunk->num_of_entries)); 350 pool->hot_memory_size -= chunk->byte_size; 351 dr_icm_chunk_destroy(chunk, buddy); 352 } 353 354 if (!buddy->used_memory && pool->icm_type == DR_ICM_TYPE_STE) 355 dr_icm_buddy_destroy(buddy); 356 } 357 358 return 0; 359 } 360 361 static int dr_icm_handle_buddies_get_mem(struct mlx5dr_icm_pool *pool, 362 enum mlx5dr_icm_chunk_size chunk_size, 363 struct mlx5dr_icm_buddy_mem **buddy, 364 unsigned int *seg) 365 { 366 struct mlx5dr_icm_buddy_mem *buddy_mem_pool; 367 bool new_mem = false; 368 int err; 369 370 alloc_buddy_mem: 371 /* find the next free place from the buddy list */ 372 list_for_each_entry(buddy_mem_pool, &pool->buddy_mem_list, list_node) { 373 err = mlx5dr_buddy_alloc_mem(buddy_mem_pool, 374 chunk_size, seg); 375 if (!err) 376 goto found; 377 378 if (WARN_ON(new_mem)) { 379 /* We have new memory pool, first in the list */ 380 mlx5dr_err(pool->dmn, 381 "No memory for order: %d\n", 382 chunk_size); 383 goto out; 384 } 385 } 386 387 /* no more available allocators in that pool, create new */ 388 err = dr_icm_buddy_create(pool); 389 if (err) { 390 mlx5dr_err(pool->dmn, 391 "Failed creating buddy for order %d\n", 392 chunk_size); 393 goto out; 394 } 395 396 /* mark we have new memory, first in list */ 397 new_mem = true; 398 goto alloc_buddy_mem; 399 400 found: 401 *buddy = buddy_mem_pool; 402 out: 403 return err; 404 } 405 406 /* Allocate an ICM chunk, each chunk holds a piece of ICM memory and 407 * also memory used for HW STE management for optimizations. 408 */ 409 struct mlx5dr_icm_chunk * 410 mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, 411 enum mlx5dr_icm_chunk_size chunk_size) 412 { 413 struct mlx5dr_icm_chunk *chunk = NULL; 414 struct mlx5dr_icm_buddy_mem *buddy; 415 unsigned int seg; 416 int ret; 417 418 if (chunk_size > pool->max_log_chunk_sz) 419 return NULL; 420 421 mutex_lock(&pool->mutex); 422 /* find mem, get back the relevant buddy pool and seg in that mem */ 423 ret = dr_icm_handle_buddies_get_mem(pool, chunk_size, &buddy, &seg); 424 if (ret) 425 goto out; 426 427 chunk = dr_icm_chunk_create(pool, chunk_size, buddy, seg); 428 if (!chunk) 429 goto out_err; 430 431 goto out; 432 433 out_err: 434 mlx5dr_buddy_free_mem(buddy, seg, chunk_size); 435 out: 436 mutex_unlock(&pool->mutex); 437 return chunk; 438 } 439 440 void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk) 441 { 442 struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; 443 struct mlx5dr_icm_pool *pool = buddy->pool; 444 445 /* move the memory to the waiting list AKA "hot" */ 446 mutex_lock(&pool->mutex); 447 list_move_tail(&chunk->chunk_list, &buddy->hot_list); 448 pool->hot_memory_size += chunk->byte_size; 449 450 /* Check if we have chunks that are waiting for sync-ste */ 451 if (dr_icm_pool_is_sync_required(pool)) 452 dr_icm_pool_sync_all_buddy_pools(pool); 453 454 mutex_unlock(&pool->mutex); 455 } 456 457 struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, 458 enum mlx5dr_icm_type icm_type) 459 { 460 enum mlx5dr_icm_chunk_size max_log_chunk_sz; 461 struct mlx5dr_icm_pool *pool; 462 463 if (icm_type == DR_ICM_TYPE_STE) 464 max_log_chunk_sz = dmn->info.max_log_sw_icm_sz; 465 else 466 max_log_chunk_sz = dmn->info.max_log_action_icm_sz; 467 468 pool = kvzalloc(sizeof(*pool), GFP_KERNEL); 469 if (!pool) 470 return NULL; 471 472 pool->dmn = dmn; 473 pool->icm_type = icm_type; 474 pool->max_log_chunk_sz = max_log_chunk_sz; 475 476 INIT_LIST_HEAD(&pool->buddy_mem_list); 477 478 mutex_init(&pool->mutex); 479 480 return pool; 481 } 482 483 void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool) 484 { 485 struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy; 486 487 list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node) 488 dr_icm_buddy_destroy(buddy); 489 490 mutex_destroy(&pool->mutex); 491 kvfree(pool); 492 } 493