1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <linux/smp.h> 5 #include "dr_types.h" 6 7 #define QUEUE_SIZE 128 8 #define SIGNAL_PER_DIV_QUEUE 16 9 #define TH_NUMS_TO_DRAIN 2 10 #define DR_SEND_INFO_POOL_SIZE 1000 11 12 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; 13 14 struct dr_data_seg { 15 u64 addr; 16 u32 length; 17 u32 lkey; 18 unsigned int send_flags; 19 }; 20 21 enum send_info_type { 22 WRITE_ICM = 0, 23 GTA_ARG = 1, 24 }; 25 26 struct postsend_info { 27 enum send_info_type type; 28 struct dr_data_seg write; 29 struct dr_data_seg read; 30 u64 remote_addr; 31 u32 rkey; 32 }; 33 34 struct dr_qp_rtr_attr { 35 struct mlx5dr_cmd_gid_attr dgid_attr; 36 enum ib_mtu mtu; 37 u32 qp_num; 38 u16 port_num; 39 u8 min_rnr_timer; 40 u8 sgid_index; 41 u16 udp_src_port; 42 u8 fl:1; 43 }; 44 45 struct dr_qp_rts_attr { 46 u8 timeout; 47 u8 retry_cnt; 48 u8 rnr_retry; 49 }; 50 51 struct dr_qp_init_attr { 52 u32 cqn; 53 u32 pdn; 54 u32 max_send_wr; 55 u32 max_send_sge; 56 struct mlx5_uars_page *uar; 57 u8 isolate_vl_tc:1; 58 }; 59 60 struct mlx5dr_send_info_pool_obj { 61 struct mlx5dr_ste_send_info ste_send_info; 62 struct mlx5dr_send_info_pool *pool; 63 struct list_head list_node; 64 }; 65 66 struct mlx5dr_send_info_pool { 67 struct list_head free_list; 68 }; 69 70 static int dr_send_info_pool_fill(struct mlx5dr_send_info_pool *pool) 71 { 72 struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj; 73 int i; 74 75 for (i = 0; i < DR_SEND_INFO_POOL_SIZE; i++) { 76 pool_obj = kzalloc(sizeof(*pool_obj), GFP_KERNEL); 77 if (!pool_obj) 78 goto clean_pool; 79 80 pool_obj->pool = pool; 81 list_add_tail(&pool_obj->list_node, &pool->free_list); 82 } 83 84 return 0; 85 86 clean_pool: 87 list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) { 88 list_del(&pool_obj->list_node); 89 kfree(pool_obj); 90 } 91 92 return -ENOMEM; 93 } 94 95 static void dr_send_info_pool_destroy(struct mlx5dr_send_info_pool *pool) 96 { 97 struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj; 98 99 list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) { 100 list_del(&pool_obj->list_node); 101 kfree(pool_obj); 102 } 103 104 kfree(pool); 105 } 106 107 void mlx5dr_send_info_pool_destroy(struct mlx5dr_domain *dmn) 108 { 109 dr_send_info_pool_destroy(dmn->send_info_pool_tx); 110 dr_send_info_pool_destroy(dmn->send_info_pool_rx); 111 } 112 113 static struct mlx5dr_send_info_pool *dr_send_info_pool_create(void) 114 { 115 struct mlx5dr_send_info_pool *pool; 116 int ret; 117 118 pool = kzalloc(sizeof(*pool), GFP_KERNEL); 119 if (!pool) 120 return NULL; 121 122 INIT_LIST_HEAD(&pool->free_list); 123 124 ret = dr_send_info_pool_fill(pool); 125 if (ret) { 126 kfree(pool); 127 return NULL; 128 } 129 130 return pool; 131 } 132 133 int mlx5dr_send_info_pool_create(struct mlx5dr_domain *dmn) 134 { 135 dmn->send_info_pool_rx = dr_send_info_pool_create(); 136 if (!dmn->send_info_pool_rx) 137 return -ENOMEM; 138 139 dmn->send_info_pool_tx = dr_send_info_pool_create(); 140 if (!dmn->send_info_pool_tx) { 141 dr_send_info_pool_destroy(dmn->send_info_pool_rx); 142 return -ENOMEM; 143 } 144 145 return 0; 146 } 147 148 struct mlx5dr_ste_send_info 149 *mlx5dr_send_info_alloc(struct mlx5dr_domain *dmn, 150 enum mlx5dr_domain_nic_type nic_type) 151 { 152 struct mlx5dr_send_info_pool_obj *pool_obj; 153 struct mlx5dr_send_info_pool *pool; 154 int ret; 155 156 pool = nic_type == DR_DOMAIN_NIC_TYPE_RX ? dmn->send_info_pool_rx : 157 dmn->send_info_pool_tx; 158 159 if (unlikely(list_empty(&pool->free_list))) { 160 ret = dr_send_info_pool_fill(pool); 161 if (ret) 162 return NULL; 163 } 164 165 pool_obj = list_first_entry_or_null(&pool->free_list, 166 struct mlx5dr_send_info_pool_obj, 167 list_node); 168 169 if (likely(pool_obj)) { 170 list_del_init(&pool_obj->list_node); 171 } else { 172 WARN_ONCE(!pool_obj, "Failed getting ste send info obj from pool"); 173 return NULL; 174 } 175 176 return &pool_obj->ste_send_info; 177 } 178 179 void mlx5dr_send_info_free(struct mlx5dr_ste_send_info *ste_send_info) 180 { 181 struct mlx5dr_send_info_pool_obj *pool_obj; 182 183 pool_obj = container_of(ste_send_info, 184 struct mlx5dr_send_info_pool_obj, 185 ste_send_info); 186 187 list_add(&pool_obj->list_node, &pool_obj->pool->free_list); 188 } 189 190 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64) 191 { 192 unsigned int idx; 193 u8 opcode; 194 195 opcode = get_cqe_opcode(cqe64); 196 if (opcode == MLX5_CQE_REQ_ERR) { 197 idx = be16_to_cpu(cqe64->wqe_counter) & 198 (dr_cq->qp->sq.wqe_cnt - 1); 199 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; 200 } else if (opcode == MLX5_CQE_RESP_ERR) { 201 ++dr_cq->qp->sq.cc; 202 } else { 203 idx = be16_to_cpu(cqe64->wqe_counter) & 204 (dr_cq->qp->sq.wqe_cnt - 1); 205 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; 206 207 return CQ_OK; 208 } 209 210 return CQ_POLL_ERR; 211 } 212 213 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq) 214 { 215 struct mlx5_cqe64 *cqe64; 216 int err; 217 218 cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq); 219 if (!cqe64) { 220 if (unlikely(dr_cq->mdev->state == 221 MLX5_DEVICE_STATE_INTERNAL_ERROR)) { 222 mlx5_core_dbg_once(dr_cq->mdev, 223 "Polling CQ while device is shutting down\n"); 224 return CQ_POLL_ERR; 225 } 226 return CQ_EMPTY; 227 } 228 229 mlx5_cqwq_pop(&dr_cq->wq); 230 err = dr_parse_cqe(dr_cq, cqe64); 231 mlx5_cqwq_update_db_record(&dr_cq->wq); 232 233 return err; 234 } 235 236 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) 237 { 238 int npolled; 239 int err = 0; 240 241 for (npolled = 0; npolled < ne; ++npolled) { 242 err = dr_cq_poll_one(dr_cq); 243 if (err != CQ_OK) 244 break; 245 } 246 247 return err == CQ_POLL_ERR ? err : npolled; 248 } 249 250 static int dr_qp_get_args_update_send_wqe_size(struct dr_qp_init_attr *attr) 251 { 252 return roundup_pow_of_two(sizeof(struct mlx5_wqe_ctrl_seg) + 253 sizeof(struct mlx5_wqe_flow_update_ctrl_seg) + 254 sizeof(struct mlx5_wqe_header_modify_argument_update_seg)); 255 } 256 257 /* We calculate for specific RC QP with the required functionality */ 258 static int dr_qp_calc_rc_send_wqe(struct dr_qp_init_attr *attr) 259 { 260 int update_arg_size; 261 int inl_size = 0; 262 int tot_size; 263 int size; 264 265 update_arg_size = dr_qp_get_args_update_send_wqe_size(attr); 266 267 size = sizeof(struct mlx5_wqe_ctrl_seg) + 268 sizeof(struct mlx5_wqe_raddr_seg); 269 inl_size = size + ALIGN(sizeof(struct mlx5_wqe_inline_seg) + 270 DR_STE_SIZE, 16); 271 272 size += attr->max_send_sge * sizeof(struct mlx5_wqe_data_seg); 273 274 size = max(size, update_arg_size); 275 276 tot_size = max(size, inl_size); 277 278 return ALIGN(tot_size, MLX5_SEND_WQE_BB); 279 } 280 281 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, 282 struct dr_qp_init_attr *attr) 283 { 284 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 285 u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; 286 struct mlx5_wq_param wqp; 287 struct mlx5dr_qp *dr_qp; 288 int wqe_size; 289 int inlen; 290 void *qpc; 291 void *in; 292 int err; 293 294 dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL); 295 if (!dr_qp) 296 return NULL; 297 298 wqp.buf_numa_node = mdev->priv.numa_node; 299 wqp.db_numa_node = mdev->priv.numa_node; 300 301 dr_qp->rq.pc = 0; 302 dr_qp->rq.cc = 0; 303 dr_qp->rq.wqe_cnt = 256; 304 dr_qp->sq.pc = 0; 305 dr_qp->sq.cc = 0; 306 dr_qp->sq.head = 0; 307 dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr); 308 309 MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); 310 MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); 311 MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); 312 err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq, 313 &dr_qp->wq_ctrl); 314 if (err) { 315 mlx5_core_warn(mdev, "Can't create QP WQ\n"); 316 goto err_wq; 317 } 318 319 dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt, 320 sizeof(dr_qp->sq.wqe_head[0]), 321 GFP_KERNEL); 322 323 if (!dr_qp->sq.wqe_head) { 324 mlx5_core_warn(mdev, "Can't allocate wqe head\n"); 325 goto err_wqe_head; 326 } 327 328 inlen = MLX5_ST_SZ_BYTES(create_qp_in) + 329 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * 330 dr_qp->wq_ctrl.buf.npages; 331 in = kvzalloc(inlen, GFP_KERNEL); 332 if (!in) { 333 err = -ENOMEM; 334 goto err_in; 335 } 336 337 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 338 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 339 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 340 MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc); 341 MLX5_SET(qpc, qpc, pd, attr->pdn); 342 MLX5_SET(qpc, qpc, uar_page, attr->uar->index); 343 MLX5_SET(qpc, qpc, log_page_size, 344 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 345 MLX5_SET(qpc, qpc, fre, 1); 346 MLX5_SET(qpc, qpc, rlky, 1); 347 MLX5_SET(qpc, qpc, cqn_snd, attr->cqn); 348 MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn); 349 MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); 350 MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); 351 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 352 MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); 353 MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev)); 354 MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma); 355 if (MLX5_CAP_GEN(mdev, cqe_version) == 1) 356 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); 357 mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf, 358 (__be64 *)MLX5_ADDR_OF(create_qp_in, 359 in, pas)); 360 361 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 362 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 363 dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn); 364 kvfree(in); 365 if (err) 366 goto err_in; 367 dr_qp->uar = attr->uar; 368 wqe_size = dr_qp_calc_rc_send_wqe(attr); 369 dr_qp->max_inline_data = min(wqe_size - 370 (sizeof(struct mlx5_wqe_ctrl_seg) + 371 sizeof(struct mlx5_wqe_raddr_seg) + 372 sizeof(struct mlx5_wqe_inline_seg)), 373 (2 * MLX5_SEND_WQE_BB - 374 (sizeof(struct mlx5_wqe_ctrl_seg) + 375 sizeof(struct mlx5_wqe_raddr_seg) + 376 sizeof(struct mlx5_wqe_inline_seg)))); 377 378 return dr_qp; 379 380 err_in: 381 kfree(dr_qp->sq.wqe_head); 382 err_wqe_head: 383 mlx5_wq_destroy(&dr_qp->wq_ctrl); 384 err_wq: 385 kfree(dr_qp); 386 return NULL; 387 } 388 389 static void dr_destroy_qp(struct mlx5_core_dev *mdev, 390 struct mlx5dr_qp *dr_qp) 391 { 392 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 393 394 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 395 MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn); 396 mlx5_cmd_exec_in(mdev, destroy_qp, in); 397 398 kfree(dr_qp->sq.wqe_head); 399 mlx5_wq_destroy(&dr_qp->wq_ctrl); 400 kfree(dr_qp); 401 } 402 403 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) 404 { 405 dma_wmb(); 406 *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff); 407 408 /* After wmb() the hw aware of new work */ 409 wmb(); 410 411 mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET); 412 } 413 414 static void 415 dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, 416 u32 remote_addr, 417 struct dr_data_seg *data_seg, 418 int *size) 419 { 420 struct mlx5_wqe_header_modify_argument_update_seg *wq_arg_seg; 421 struct mlx5_wqe_flow_update_ctrl_seg *wq_flow_seg; 422 423 wq_ctrl->general_id = cpu_to_be32(remote_addr); 424 wq_flow_seg = (void *)(wq_ctrl + 1); 425 426 /* mlx5_wqe_flow_update_ctrl_seg - all reserved */ 427 memset(wq_flow_seg, 0, sizeof(*wq_flow_seg)); 428 wq_arg_seg = (void *)(wq_flow_seg + 1); 429 430 memcpy(wq_arg_seg->argument_list, 431 (void *)(uintptr_t)data_seg->addr, 432 data_seg->length); 433 434 *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */ 435 sizeof(*wq_flow_seg) + /* WQE flow update ctrl seg - reserved */ 436 sizeof(*wq_arg_seg)) / /* WQE hdr modify arg seg - data */ 437 MLX5_SEND_WQE_DS; 438 } 439 440 static int dr_set_data_inl_seg(struct mlx5dr_qp *dr_qp, 441 struct dr_data_seg *data_seg, void *wqe) 442 { 443 int inline_header_size = sizeof(struct mlx5_wqe_ctrl_seg) + 444 sizeof(struct mlx5_wqe_raddr_seg) + 445 sizeof(struct mlx5_wqe_inline_seg); 446 struct mlx5_wqe_inline_seg *seg; 447 int left_space; 448 int inl = 0; 449 void *addr; 450 int len; 451 int idx; 452 453 seg = wqe; 454 wqe += sizeof(*seg); 455 addr = (void *)(unsigned long)(data_seg->addr); 456 len = data_seg->length; 457 inl += len; 458 left_space = MLX5_SEND_WQE_BB - inline_header_size; 459 460 if (likely(len > left_space)) { 461 memcpy(wqe, addr, left_space); 462 len -= left_space; 463 addr += left_space; 464 idx = (dr_qp->sq.pc + 1) & (dr_qp->sq.wqe_cnt - 1); 465 wqe = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); 466 } 467 468 memcpy(wqe, addr, len); 469 470 if (likely(inl)) { 471 seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); 472 return DIV_ROUND_UP(inl + sizeof(seg->byte_count), 473 MLX5_SEND_WQE_DS); 474 } else { 475 return 0; 476 } 477 } 478 479 static void 480 dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp, 481 struct mlx5_wqe_ctrl_seg *wq_ctrl, 482 u64 remote_addr, 483 u32 rkey, 484 struct dr_data_seg *data_seg, 485 unsigned int *size) 486 { 487 struct mlx5_wqe_raddr_seg *wq_raddr; 488 struct mlx5_wqe_data_seg *wq_dseg; 489 490 wq_raddr = (void *)(wq_ctrl + 1); 491 492 wq_raddr->raddr = cpu_to_be64(remote_addr); 493 wq_raddr->rkey = cpu_to_be32(rkey); 494 wq_raddr->reserved = 0; 495 496 wq_dseg = (void *)(wq_raddr + 1); 497 /* WQE ctrl segment + WQE remote addr segment */ 498 *size = (sizeof(*wq_ctrl) + sizeof(*wq_raddr)) / MLX5_SEND_WQE_DS; 499 500 if (data_seg->send_flags & IB_SEND_INLINE) { 501 *size += dr_set_data_inl_seg(dr_qp, data_seg, wq_dseg); 502 } else { 503 wq_dseg->byte_count = cpu_to_be32(data_seg->length); 504 wq_dseg->lkey = cpu_to_be32(data_seg->lkey); 505 wq_dseg->addr = cpu_to_be64(data_seg->addr); 506 *size += sizeof(*wq_dseg) / MLX5_SEND_WQE_DS; /* WQE data segment */ 507 } 508 } 509 510 static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl, 511 struct dr_data_seg *data_seg) 512 { 513 wq_ctrl->signature = 0; 514 wq_ctrl->rsvd[0] = 0; 515 wq_ctrl->rsvd[1] = 0; 516 wq_ctrl->fm_ce_se = data_seg->send_flags & IB_SEND_SIGNALED ? 517 MLX5_WQE_CTRL_CQ_UPDATE : 0; 518 wq_ctrl->imm = 0; 519 } 520 521 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, 522 u32 rkey, struct dr_data_seg *data_seg, 523 u32 opcode, bool notify_hw) 524 { 525 struct mlx5_wqe_ctrl_seg *wq_ctrl; 526 int opcode_mod = 0; 527 unsigned int size; 528 unsigned int idx; 529 530 idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1); 531 532 wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); 533 dr_set_ctrl_seg(wq_ctrl, data_seg); 534 535 switch (opcode) { 536 case MLX5_OPCODE_RDMA_READ: 537 case MLX5_OPCODE_RDMA_WRITE: 538 dr_rdma_handle_icm_write_segments(dr_qp, wq_ctrl, remote_addr, 539 rkey, data_seg, &size); 540 break; 541 case MLX5_OPCODE_FLOW_TBL_ACCESS: 542 opcode_mod = MLX5_CMD_OP_MOD_UPDATE_HEADER_MODIFY_ARGUMENT; 543 dr_rdma_handle_flow_access_arg_segments(wq_ctrl, remote_addr, 544 data_seg, &size); 545 break; 546 default: 547 WARN(true, "illegal opcode %d", opcode); 548 return; 549 } 550 551 /* -------------------------------------------------------- 552 * |opcode_mod (8 bit)|wqe_index (16 bits)| opcod (8 bits)| 553 * -------------------------------------------------------- 554 */ 555 wq_ctrl->opmod_idx_opcode = 556 cpu_to_be32((opcode_mod << 24) | 557 ((dr_qp->sq.pc & 0xffff) << 8) | 558 opcode); 559 wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8); 560 561 dr_qp->sq.pc += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); 562 dr_qp->sq.wqe_head[idx] = dr_qp->sq.head++; 563 564 if (notify_hw) 565 dr_cmd_notify_hw(dr_qp, wq_ctrl); 566 } 567 568 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info) 569 { 570 if (send_info->type == WRITE_ICM) { 571 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, 572 &send_info->write, MLX5_OPCODE_RDMA_WRITE, false); 573 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, 574 &send_info->read, MLX5_OPCODE_RDMA_READ, true); 575 } else { /* GTA_ARG */ 576 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, 577 &send_info->write, MLX5_OPCODE_FLOW_TBL_ACCESS, true); 578 } 579 580 } 581 582 /** 583 * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent 584 * with send_list parameters: 585 * 586 * @ste: The data that attached to this specific ste 587 * @size: of data to write 588 * @offset: of the data from start of the hw_ste entry 589 * @data: data 590 * @ste_info: ste to be sent with send_list 591 * @send_list: to append into it 592 * @copy_data: if true indicates that the data should be kept because 593 * it's not backuped any where (like in re-hash). 594 * if false, it lets the data to be updated after 595 * it was added to the list. 596 */ 597 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, 598 u16 offset, u8 *data, 599 struct mlx5dr_ste_send_info *ste_info, 600 struct list_head *send_list, 601 bool copy_data) 602 { 603 ste_info->size = size; 604 ste_info->ste = ste; 605 ste_info->offset = offset; 606 607 if (copy_data) { 608 memcpy(ste_info->data_cont, data, size); 609 ste_info->data = ste_info->data_cont; 610 } else { 611 ste_info->data = data; 612 } 613 614 list_add_tail(&ste_info->send_list, send_list); 615 } 616 617 /* The function tries to consume one wc each time, unless the queue is full, in 618 * that case, which means that the hw is behind the sw in a full queue len 619 * the function will drain the cq till it empty. 620 */ 621 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn, 622 struct mlx5dr_send_ring *send_ring) 623 { 624 bool is_drain = false; 625 int ne; 626 627 if (send_ring->pending_wqe < send_ring->signal_th) 628 return 0; 629 630 /* Queue is full start drain it */ 631 if (send_ring->pending_wqe >= 632 dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN) 633 is_drain = true; 634 635 do { 636 ne = dr_poll_cq(send_ring->cq, 1); 637 if (unlikely(ne < 0)) { 638 mlx5_core_warn_once(dmn->mdev, "SMFS QPN 0x%x is disabled/limited", 639 send_ring->qp->qpn); 640 send_ring->err_state = true; 641 return ne; 642 } else if (ne == 1) { 643 send_ring->pending_wqe -= send_ring->signal_th; 644 } 645 } while (ne == 1 || 646 (is_drain && send_ring->pending_wqe >= send_ring->signal_th)); 647 648 return 0; 649 } 650 651 static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring, 652 struct postsend_info *send_info) 653 { 654 send_ring->pending_wqe++; 655 656 if (send_ring->pending_wqe % send_ring->signal_th == 0) 657 send_info->write.send_flags |= IB_SEND_SIGNALED; 658 else 659 send_info->write.send_flags &= ~IB_SEND_SIGNALED; 660 } 661 662 static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, 663 struct mlx5dr_send_ring *send_ring, 664 struct postsend_info *send_info) 665 { 666 u32 buff_offset; 667 668 if (send_info->write.length > dmn->info.max_inline_size) { 669 buff_offset = (send_ring->tx_head & 670 (dmn->send_ring->signal_th - 1)) * 671 send_ring->max_post_send_size; 672 /* Copy to ring mr */ 673 memcpy(send_ring->buf + buff_offset, 674 (void *)(uintptr_t)send_info->write.addr, 675 send_info->write.length); 676 send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset; 677 send_info->write.lkey = send_ring->mr->mkey; 678 679 send_ring->tx_head++; 680 } 681 682 send_ring->pending_wqe++; 683 if (!send_info->write.lkey) 684 send_info->write.send_flags |= IB_SEND_INLINE; 685 686 if (send_ring->pending_wqe % send_ring->signal_th == 0) 687 send_info->write.send_flags |= IB_SEND_SIGNALED; 688 else 689 send_info->write.send_flags &= ~IB_SEND_SIGNALED; 690 691 send_ring->pending_wqe++; 692 send_info->read.length = send_info->write.length; 693 694 /* Read into dedicated sync buffer */ 695 send_info->read.addr = (uintptr_t)send_ring->sync_mr->dma_addr; 696 send_info->read.lkey = send_ring->sync_mr->mkey; 697 698 if (send_ring->pending_wqe % send_ring->signal_th == 0) 699 send_info->read.send_flags |= IB_SEND_SIGNALED; 700 else 701 send_info->read.send_flags &= ~IB_SEND_SIGNALED; 702 } 703 704 static void dr_fill_data_segs(struct mlx5dr_domain *dmn, 705 struct mlx5dr_send_ring *send_ring, 706 struct postsend_info *send_info) 707 { 708 if (send_info->type == WRITE_ICM) 709 dr_fill_write_icm_segs(dmn, send_ring, send_info); 710 else /* args */ 711 dr_fill_write_args_segs(send_ring, send_info); 712 } 713 714 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, 715 struct postsend_info *send_info) 716 { 717 struct mlx5dr_send_ring *send_ring = dmn->send_ring; 718 int ret; 719 720 if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || 721 send_ring->err_state)) { 722 mlx5_core_dbg_once(dmn->mdev, 723 "Skipping post send: QP err state: %d, device state: %d\n", 724 send_ring->err_state, dmn->mdev->state); 725 return 0; 726 } 727 728 spin_lock(&send_ring->lock); 729 730 ret = dr_handle_pending_wc(dmn, send_ring); 731 if (ret) 732 goto out_unlock; 733 734 dr_fill_data_segs(dmn, send_ring, send_info); 735 dr_post_send(send_ring->qp, send_info); 736 737 out_unlock: 738 spin_unlock(&send_ring->lock); 739 return ret; 740 } 741 742 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, 743 struct mlx5dr_ste_htbl *htbl, 744 u8 **data, 745 u32 *byte_size, 746 int *iterations, 747 int *num_stes) 748 { 749 u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); 750 int alloc_size; 751 752 if (chunk_byte_size > dmn->send_ring->max_post_send_size) { 753 *iterations = chunk_byte_size / dmn->send_ring->max_post_send_size; 754 *byte_size = dmn->send_ring->max_post_send_size; 755 alloc_size = *byte_size; 756 *num_stes = *byte_size / DR_STE_SIZE; 757 } else { 758 *iterations = 1; 759 *num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk); 760 alloc_size = *num_stes * DR_STE_SIZE; 761 } 762 763 *data = kvzalloc(alloc_size, GFP_KERNEL); 764 if (!*data) 765 return -ENOMEM; 766 767 return 0; 768 } 769 770 /** 771 * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm. 772 * 773 * @dmn: Domain 774 * @ste: The ste struct that contains the data (at 775 * least part of it) 776 * @data: The real data to send size data 777 * @size: for writing. 778 * @offset: The offset from the icm mapped data to 779 * start write to this for write only part of the 780 * buffer. 781 * 782 * Return: 0 on success. 783 */ 784 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, 785 u8 *data, u16 size, u16 offset) 786 { 787 struct postsend_info send_info = {}; 788 789 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data, size); 790 791 send_info.write.addr = (uintptr_t)data; 792 send_info.write.length = size; 793 send_info.write.lkey = 0; 794 send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset; 795 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk); 796 797 return dr_postsend_icm_data(dmn, &send_info); 798 } 799 800 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, 801 struct mlx5dr_ste_htbl *htbl, 802 u8 *formatted_ste, u8 *mask) 803 { 804 u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); 805 int num_stes_per_iter; 806 int iterations; 807 u8 *data; 808 int ret; 809 int i; 810 int j; 811 812 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, 813 &iterations, &num_stes_per_iter); 814 if (ret) 815 return ret; 816 817 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, formatted_ste, DR_STE_SIZE); 818 819 /* Send the data iteration times */ 820 for (i = 0; i < iterations; i++) { 821 u32 ste_index = i * (byte_size / DR_STE_SIZE); 822 struct postsend_info send_info = {}; 823 824 /* Copy all ste's on the data buffer 825 * need to add the bit_mask 826 */ 827 for (j = 0; j < num_stes_per_iter; j++) { 828 struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j]; 829 u32 ste_off = j * DR_STE_SIZE; 830 831 if (mlx5dr_ste_is_not_used(ste)) { 832 memcpy(data + ste_off, 833 formatted_ste, DR_STE_SIZE); 834 } else { 835 /* Copy data */ 836 memcpy(data + ste_off, 837 htbl->chunk->hw_ste_arr + 838 DR_STE_SIZE_REDUCED * (ste_index + j), 839 DR_STE_SIZE_REDUCED); 840 /* Copy bit_mask */ 841 memcpy(data + ste_off + DR_STE_SIZE_REDUCED, 842 mask, DR_STE_SIZE_MASK); 843 /* Only when we have mask we need to re-arrange the STE */ 844 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, 845 data + (j * DR_STE_SIZE), 846 DR_STE_SIZE); 847 } 848 } 849 850 send_info.write.addr = (uintptr_t)data; 851 send_info.write.length = byte_size; 852 send_info.write.lkey = 0; 853 send_info.remote_addr = 854 mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index); 855 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk); 856 857 ret = dr_postsend_icm_data(dmn, &send_info); 858 if (ret) 859 goto out_free; 860 } 861 862 out_free: 863 kvfree(data); 864 return ret; 865 } 866 867 /* Initialize htble with default STEs */ 868 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, 869 struct mlx5dr_ste_htbl *htbl, 870 u8 *ste_init_data, 871 bool update_hw_ste) 872 { 873 u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); 874 int iterations; 875 int num_stes; 876 u8 *copy_dst; 877 u8 *data; 878 int ret; 879 int i; 880 881 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, 882 &iterations, &num_stes); 883 if (ret) 884 return ret; 885 886 if (update_hw_ste) { 887 /* Copy the reduced STE to hash table ste_arr */ 888 for (i = 0; i < num_stes; i++) { 889 copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED; 890 memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED); 891 } 892 } 893 894 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, ste_init_data, DR_STE_SIZE); 895 896 /* Copy the same STE on the data buffer */ 897 for (i = 0; i < num_stes; i++) { 898 copy_dst = data + i * DR_STE_SIZE; 899 memcpy(copy_dst, ste_init_data, DR_STE_SIZE); 900 } 901 902 /* Send the data iteration times */ 903 for (i = 0; i < iterations; i++) { 904 u8 ste_index = i * (byte_size / DR_STE_SIZE); 905 struct postsend_info send_info = {}; 906 907 send_info.write.addr = (uintptr_t)data; 908 send_info.write.length = byte_size; 909 send_info.write.lkey = 0; 910 send_info.remote_addr = 911 mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index); 912 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk); 913 914 ret = dr_postsend_icm_data(dmn, &send_info); 915 if (ret) 916 goto out_free; 917 } 918 919 out_free: 920 kvfree(data); 921 return ret; 922 } 923 924 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, 925 struct mlx5dr_action *action) 926 { 927 struct postsend_info send_info = {}; 928 929 send_info.write.addr = (uintptr_t)action->rewrite->data; 930 send_info.write.length = action->rewrite->num_of_actions * 931 DR_MODIFY_ACTION_SIZE; 932 send_info.write.lkey = 0; 933 send_info.remote_addr = 934 mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk); 935 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk); 936 937 return dr_postsend_icm_data(dmn, &send_info); 938 } 939 940 int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn, 941 struct mlx5dr_icm_chunk *chunk, 942 u16 num_of_actions, 943 u8 *data) 944 { 945 struct postsend_info send_info = {}; 946 int ret; 947 948 send_info.write.addr = (uintptr_t)data; 949 send_info.write.length = num_of_actions * DR_MODIFY_ACTION_SIZE; 950 send_info.remote_addr = mlx5dr_icm_pool_get_chunk_mr_addr(chunk); 951 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(chunk); 952 953 ret = dr_postsend_icm_data(dmn, &send_info); 954 if (ret) 955 return ret; 956 957 return 0; 958 } 959 960 int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id, 961 u16 num_of_actions, u8 *actions_data) 962 { 963 int data_len, iter = 0, cur_sent; 964 u64 addr; 965 int ret; 966 967 addr = (uintptr_t)actions_data; 968 data_len = num_of_actions * DR_MODIFY_ACTION_SIZE; 969 970 do { 971 struct postsend_info send_info = {}; 972 973 send_info.type = GTA_ARG; 974 send_info.write.addr = addr; 975 cur_sent = min_t(u32, data_len, DR_ACTION_CACHE_LINE_SIZE); 976 send_info.write.length = cur_sent; 977 send_info.write.lkey = 0; 978 send_info.remote_addr = arg_id + iter; 979 980 ret = dr_postsend_icm_data(dmn, &send_info); 981 if (ret) 982 goto out; 983 984 iter++; 985 addr += cur_sent; 986 data_len -= cur_sent; 987 } while (data_len > 0); 988 989 out: 990 return ret; 991 } 992 993 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev, 994 struct mlx5dr_qp *dr_qp, 995 int port) 996 { 997 u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; 998 void *qpc; 999 1000 qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); 1001 1002 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port); 1003 MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); 1004 MLX5_SET(qpc, qpc, rre, 1); 1005 MLX5_SET(qpc, qpc, rwe, 1); 1006 1007 MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP); 1008 MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn); 1009 1010 return mlx5_cmd_exec_in(mdev, rst2init_qp, in); 1011 } 1012 1013 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev, 1014 struct mlx5dr_qp *dr_qp, 1015 struct dr_qp_rts_attr *attr) 1016 { 1017 u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; 1018 void *qpc; 1019 1020 qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc); 1021 1022 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn); 1023 1024 MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt); 1025 MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry); 1026 MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */ 1027 1028 MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP); 1029 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn); 1030 1031 return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in); 1032 } 1033 1034 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, 1035 struct mlx5dr_qp *dr_qp, 1036 struct dr_qp_rtr_attr *attr) 1037 { 1038 u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; 1039 void *qpc; 1040 1041 qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc); 1042 1043 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn); 1044 1045 MLX5_SET(qpc, qpc, mtu, attr->mtu); 1046 MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1); 1047 MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num); 1048 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), 1049 attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac)); 1050 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), 1051 attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid)); 1052 MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, 1053 attr->sgid_index); 1054 1055 if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2) 1056 MLX5_SET(qpc, qpc, primary_address_path.udp_sport, 1057 attr->udp_src_port); 1058 1059 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num); 1060 MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl); 1061 MLX5_SET(qpc, qpc, min_rnr_nak, 1); 1062 1063 MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP); 1064 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn); 1065 1066 return mlx5_cmd_exec_in(mdev, init2rtr_qp, in); 1067 } 1068 1069 static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps) 1070 { 1071 /* Check whether RC RoCE QP creation with force loopback is allowed. 1072 * There are two separate capability bits for this: 1073 * - force loopback when RoCE is enabled 1074 * - force loopback when RoCE is disabled 1075 */ 1076 return ((caps->roce_caps.roce_en && 1077 caps->roce_caps.fl_rc_qp_when_roce_enabled) || 1078 (!caps->roce_caps.roce_en && 1079 caps->roce_caps.fl_rc_qp_when_roce_disabled)); 1080 } 1081 1082 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) 1083 { 1084 struct mlx5dr_qp *dr_qp = dmn->send_ring->qp; 1085 struct dr_qp_rts_attr rts_attr = {}; 1086 struct dr_qp_rtr_attr rtr_attr = {}; 1087 enum ib_mtu mtu = IB_MTU_1024; 1088 u16 gid_index = 0; 1089 int port = 1; 1090 int ret; 1091 1092 /* Init */ 1093 ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port); 1094 if (ret) { 1095 mlx5dr_err(dmn, "Failed modify QP rst2init\n"); 1096 return ret; 1097 } 1098 1099 /* RTR */ 1100 rtr_attr.mtu = mtu; 1101 rtr_attr.qp_num = dr_qp->qpn; 1102 rtr_attr.min_rnr_timer = 12; 1103 rtr_attr.port_num = port; 1104 rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; 1105 1106 /* If QP creation with force loopback is allowed, then there 1107 * is no need for GID index when creating the QP. 1108 * Otherwise we query GID attributes and use GID index. 1109 */ 1110 rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps); 1111 if (!rtr_attr.fl) { 1112 ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, 1113 &rtr_attr.dgid_attr); 1114 if (ret) 1115 return ret; 1116 1117 rtr_attr.sgid_index = gid_index; 1118 } 1119 1120 ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); 1121 if (ret) { 1122 mlx5dr_err(dmn, "Failed modify QP init2rtr\n"); 1123 return ret; 1124 } 1125 1126 /* RTS */ 1127 rts_attr.timeout = 14; 1128 rts_attr.retry_cnt = 7; 1129 rts_attr.rnr_retry = 7; 1130 1131 ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr); 1132 if (ret) { 1133 mlx5dr_err(dmn, "Failed modify QP rtr2rts\n"); 1134 return ret; 1135 } 1136 1137 return 0; 1138 } 1139 1140 static void dr_cq_complete(struct mlx5_core_cq *mcq, 1141 struct mlx5_eqe *eqe) 1142 { 1143 pr_err("CQ completion CQ: #%u\n", mcq->cqn); 1144 } 1145 1146 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, 1147 struct mlx5_uars_page *uar, 1148 size_t ncqe) 1149 { 1150 u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {}; 1151 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 1152 struct mlx5_wq_param wqp; 1153 struct mlx5_cqe64 *cqe; 1154 struct mlx5dr_cq *cq; 1155 int inlen, err, eqn; 1156 void *cqc, *in; 1157 __be64 *pas; 1158 int vector; 1159 u32 i; 1160 1161 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 1162 if (!cq) 1163 return NULL; 1164 1165 ncqe = roundup_pow_of_two(ncqe); 1166 MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe)); 1167 1168 wqp.buf_numa_node = mdev->priv.numa_node; 1169 wqp.db_numa_node = mdev->priv.numa_node; 1170 1171 err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq, 1172 &cq->wq_ctrl); 1173 if (err) 1174 goto out; 1175 1176 for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { 1177 cqe = mlx5_cqwq_get_wqe(&cq->wq, i); 1178 cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; 1179 } 1180 1181 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 1182 sizeof(u64) * cq->wq_ctrl.buf.npages; 1183 in = kvzalloc(inlen, GFP_KERNEL); 1184 if (!in) 1185 goto err_cqwq; 1186 1187 vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev); 1188 err = mlx5_comp_eqn_get(mdev, vector, &eqn); 1189 if (err) { 1190 kvfree(in); 1191 goto err_cqwq; 1192 } 1193 1194 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 1195 MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); 1196 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 1197 MLX5_SET(cqc, cqc, uar_page, uar->index); 1198 MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - 1199 MLX5_ADAPTER_PAGE_SHIFT); 1200 MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); 1201 1202 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 1203 mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); 1204 1205 cq->mcq.comp = dr_cq_complete; 1206 1207 err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); 1208 kvfree(in); 1209 1210 if (err) 1211 goto err_cqwq; 1212 1213 cq->mcq.cqe_sz = 64; 1214 cq->mcq.set_ci_db = cq->wq_ctrl.db.db; 1215 cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; 1216 *cq->mcq.set_ci_db = 0; 1217 1218 /* set no-zero value, in order to avoid the HW to run db-recovery on 1219 * CQ that used in polling mode. 1220 */ 1221 *cq->mcq.arm_db = cpu_to_be32(2 << 28); 1222 1223 cq->mcq.vector = 0; 1224 cq->mcq.uar = uar; 1225 cq->mdev = mdev; 1226 1227 return cq; 1228 1229 err_cqwq: 1230 mlx5_wq_destroy(&cq->wq_ctrl); 1231 out: 1232 kfree(cq); 1233 return NULL; 1234 } 1235 1236 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq) 1237 { 1238 mlx5_core_destroy_cq(mdev, &cq->mcq); 1239 mlx5_wq_destroy(&cq->wq_ctrl); 1240 kfree(cq); 1241 } 1242 1243 static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey) 1244 { 1245 u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; 1246 void *mkc; 1247 1248 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1249 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 1250 MLX5_SET(mkc, mkc, a, 1); 1251 MLX5_SET(mkc, mkc, rw, 1); 1252 MLX5_SET(mkc, mkc, rr, 1); 1253 MLX5_SET(mkc, mkc, lw, 1); 1254 MLX5_SET(mkc, mkc, lr, 1); 1255 1256 MLX5_SET(mkc, mkc, pd, pdn); 1257 MLX5_SET(mkc, mkc, length64, 1); 1258 MLX5_SET(mkc, mkc, qpn, 0xffffff); 1259 1260 return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in)); 1261 } 1262 1263 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev, 1264 u32 pdn, void *buf, size_t size) 1265 { 1266 struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1267 struct device *dma_device; 1268 dma_addr_t dma_addr; 1269 int err; 1270 1271 if (!mr) 1272 return NULL; 1273 1274 dma_device = mlx5_core_dma_dev(mdev); 1275 dma_addr = dma_map_single(dma_device, buf, size, 1276 DMA_BIDIRECTIONAL); 1277 err = dma_mapping_error(dma_device, dma_addr); 1278 if (err) { 1279 mlx5_core_warn(mdev, "Can't dma buf\n"); 1280 kfree(mr); 1281 return NULL; 1282 } 1283 1284 err = dr_create_mkey(mdev, pdn, &mr->mkey); 1285 if (err) { 1286 mlx5_core_warn(mdev, "Can't create mkey\n"); 1287 dma_unmap_single(dma_device, dma_addr, size, 1288 DMA_BIDIRECTIONAL); 1289 kfree(mr); 1290 return NULL; 1291 } 1292 1293 mr->dma_addr = dma_addr; 1294 mr->size = size; 1295 mr->addr = buf; 1296 1297 return mr; 1298 } 1299 1300 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr) 1301 { 1302 mlx5_core_destroy_mkey(mdev, mr->mkey); 1303 dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size, 1304 DMA_BIDIRECTIONAL); 1305 kfree(mr); 1306 } 1307 1308 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) 1309 { 1310 struct dr_qp_init_attr init_attr = {}; 1311 int cq_size; 1312 int size; 1313 int ret; 1314 1315 dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL); 1316 if (!dmn->send_ring) 1317 return -ENOMEM; 1318 1319 cq_size = QUEUE_SIZE + 1; 1320 dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size); 1321 if (!dmn->send_ring->cq) { 1322 mlx5dr_err(dmn, "Failed creating CQ\n"); 1323 ret = -ENOMEM; 1324 goto free_send_ring; 1325 } 1326 1327 init_attr.cqn = dmn->send_ring->cq->mcq.cqn; 1328 init_attr.pdn = dmn->pdn; 1329 init_attr.uar = dmn->uar; 1330 init_attr.max_send_wr = QUEUE_SIZE; 1331 1332 /* Isolated VL is applicable only if force loopback is supported */ 1333 if (dr_send_allow_fl(&dmn->info.caps)) 1334 init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc; 1335 1336 spin_lock_init(&dmn->send_ring->lock); 1337 1338 dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); 1339 if (!dmn->send_ring->qp) { 1340 mlx5dr_err(dmn, "Failed creating QP\n"); 1341 ret = -ENOMEM; 1342 goto clean_cq; 1343 } 1344 1345 dmn->send_ring->cq->qp = dmn->send_ring->qp; 1346 1347 dmn->info.max_send_wr = QUEUE_SIZE; 1348 init_attr.max_send_sge = 1; 1349 dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, 1350 DR_STE_SIZE); 1351 1352 dmn->send_ring->signal_th = dmn->info.max_send_wr / 1353 SIGNAL_PER_DIV_QUEUE; 1354 1355 /* Prepare qp to be used */ 1356 ret = dr_prepare_qp_to_rts(dmn); 1357 if (ret) 1358 goto clean_qp; 1359 1360 dmn->send_ring->max_post_send_size = 1361 mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K, 1362 DR_ICM_TYPE_STE); 1363 1364 /* Allocating the max size as a buffer for writing */ 1365 size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size; 1366 dmn->send_ring->buf = kzalloc(size, GFP_KERNEL); 1367 if (!dmn->send_ring->buf) { 1368 ret = -ENOMEM; 1369 goto clean_qp; 1370 } 1371 1372 dmn->send_ring->buf_size = size; 1373 1374 dmn->send_ring->mr = dr_reg_mr(dmn->mdev, 1375 dmn->pdn, dmn->send_ring->buf, size); 1376 if (!dmn->send_ring->mr) { 1377 ret = -ENOMEM; 1378 goto free_mem; 1379 } 1380 1381 dmn->send_ring->sync_buff = kzalloc(dmn->send_ring->max_post_send_size, 1382 GFP_KERNEL); 1383 if (!dmn->send_ring->sync_buff) { 1384 ret = -ENOMEM; 1385 goto clean_mr; 1386 } 1387 1388 dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev, 1389 dmn->pdn, dmn->send_ring->sync_buff, 1390 dmn->send_ring->max_post_send_size); 1391 if (!dmn->send_ring->sync_mr) { 1392 ret = -ENOMEM; 1393 goto free_sync_mem; 1394 } 1395 1396 return 0; 1397 1398 free_sync_mem: 1399 kfree(dmn->send_ring->sync_buff); 1400 clean_mr: 1401 dr_dereg_mr(dmn->mdev, dmn->send_ring->mr); 1402 free_mem: 1403 kfree(dmn->send_ring->buf); 1404 clean_qp: 1405 dr_destroy_qp(dmn->mdev, dmn->send_ring->qp); 1406 clean_cq: 1407 dr_destroy_cq(dmn->mdev, dmn->send_ring->cq); 1408 free_send_ring: 1409 kfree(dmn->send_ring); 1410 1411 return ret; 1412 } 1413 1414 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, 1415 struct mlx5dr_send_ring *send_ring) 1416 { 1417 dr_destroy_qp(dmn->mdev, send_ring->qp); 1418 dr_destroy_cq(dmn->mdev, send_ring->cq); 1419 dr_dereg_mr(dmn->mdev, send_ring->sync_mr); 1420 dr_dereg_mr(dmn->mdev, send_ring->mr); 1421 kfree(send_ring->buf); 1422 kfree(send_ring->sync_buff); 1423 kfree(send_ring); 1424 } 1425 1426 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) 1427 { 1428 struct mlx5dr_send_ring *send_ring = dmn->send_ring; 1429 struct postsend_info send_info = {}; 1430 u8 data[DR_STE_SIZE]; 1431 int num_of_sends_req; 1432 int ret; 1433 int i; 1434 1435 /* Sending this amount of requests makes sure we will get drain */ 1436 num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; 1437 1438 /* Send fake requests forcing the last to be signaled */ 1439 send_info.write.addr = (uintptr_t)data; 1440 send_info.write.length = DR_STE_SIZE; 1441 send_info.write.lkey = 0; 1442 /* Using the sync_mr in order to write/read */ 1443 send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; 1444 send_info.rkey = send_ring->sync_mr->mkey; 1445 1446 for (i = 0; i < num_of_sends_req; i++) { 1447 ret = dr_postsend_icm_data(dmn, &send_info); 1448 if (ret) 1449 return ret; 1450 } 1451 1452 spin_lock(&send_ring->lock); 1453 ret = dr_handle_pending_wc(dmn, send_ring); 1454 spin_unlock(&send_ring->lock); 1455 1456 return ret; 1457 } 1458