1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <linux/smp.h> 5 #include "dr_types.h" 6 7 #define QUEUE_SIZE 128 8 #define SIGNAL_PER_DIV_QUEUE 16 9 #define TH_NUMS_TO_DRAIN 2 10 #define DR_SEND_INFO_POOL_SIZE 1000 11 12 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; 13 14 struct dr_data_seg { 15 u64 addr; 16 u32 length; 17 u32 lkey; 18 unsigned int send_flags; 19 }; 20 21 enum send_info_type { 22 WRITE_ICM = 0, 23 GTA_ARG = 1, 24 }; 25 26 struct postsend_info { 27 enum send_info_type type; 28 struct dr_data_seg write; 29 struct dr_data_seg read; 30 u64 remote_addr; 31 u32 rkey; 32 }; 33 34 struct dr_qp_rtr_attr { 35 struct mlx5dr_cmd_gid_attr dgid_attr; 36 enum ib_mtu mtu; 37 u32 qp_num; 38 u16 port_num; 39 u8 min_rnr_timer; 40 u8 sgid_index; 41 u16 udp_src_port; 42 u8 fl:1; 43 }; 44 45 struct dr_qp_rts_attr { 46 u8 timeout; 47 u8 retry_cnt; 48 u8 rnr_retry; 49 }; 50 51 struct dr_qp_init_attr { 52 u32 cqn; 53 u32 pdn; 54 u32 max_send_wr; 55 struct mlx5_uars_page *uar; 56 u8 isolate_vl_tc:1; 57 }; 58 59 struct mlx5dr_send_info_pool_obj { 60 struct mlx5dr_ste_send_info ste_send_info; 61 struct mlx5dr_send_info_pool *pool; 62 struct list_head list_node; 63 }; 64 65 struct mlx5dr_send_info_pool { 66 struct list_head free_list; 67 }; 68 69 static int dr_send_info_pool_fill(struct mlx5dr_send_info_pool *pool) 70 { 71 struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj; 72 int i; 73 74 for (i = 0; i < DR_SEND_INFO_POOL_SIZE; i++) { 75 pool_obj = kzalloc(sizeof(*pool_obj), GFP_KERNEL); 76 if (!pool_obj) 77 goto clean_pool; 78 79 pool_obj->pool = pool; 80 list_add_tail(&pool_obj->list_node, &pool->free_list); 81 } 82 83 return 0; 84 85 clean_pool: 86 list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) { 87 list_del(&pool_obj->list_node); 88 kfree(pool_obj); 89 } 90 91 return -ENOMEM; 92 } 93 94 static void dr_send_info_pool_destroy(struct mlx5dr_send_info_pool *pool) 95 { 96 struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj; 97 98 list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) { 99 list_del(&pool_obj->list_node); 100 kfree(pool_obj); 101 } 102 103 kfree(pool); 104 } 105 106 void mlx5dr_send_info_pool_destroy(struct mlx5dr_domain *dmn) 107 { 108 dr_send_info_pool_destroy(dmn->send_info_pool_tx); 109 dr_send_info_pool_destroy(dmn->send_info_pool_rx); 110 } 111 112 static struct mlx5dr_send_info_pool *dr_send_info_pool_create(void) 113 { 114 struct mlx5dr_send_info_pool *pool; 115 int ret; 116 117 pool = kzalloc(sizeof(*pool), GFP_KERNEL); 118 if (!pool) 119 return NULL; 120 121 INIT_LIST_HEAD(&pool->free_list); 122 123 ret = dr_send_info_pool_fill(pool); 124 if (ret) { 125 kfree(pool); 126 return NULL; 127 } 128 129 return pool; 130 } 131 132 int mlx5dr_send_info_pool_create(struct mlx5dr_domain *dmn) 133 { 134 dmn->send_info_pool_rx = dr_send_info_pool_create(); 135 if (!dmn->send_info_pool_rx) 136 return -ENOMEM; 137 138 dmn->send_info_pool_tx = dr_send_info_pool_create(); 139 if (!dmn->send_info_pool_tx) { 140 dr_send_info_pool_destroy(dmn->send_info_pool_rx); 141 return -ENOMEM; 142 } 143 144 return 0; 145 } 146 147 struct mlx5dr_ste_send_info 148 *mlx5dr_send_info_alloc(struct mlx5dr_domain *dmn, 149 enum mlx5dr_domain_nic_type nic_type) 150 { 151 struct mlx5dr_send_info_pool_obj *pool_obj; 152 struct mlx5dr_send_info_pool *pool; 153 int ret; 154 155 pool = nic_type == DR_DOMAIN_NIC_TYPE_RX ? dmn->send_info_pool_rx : 156 dmn->send_info_pool_tx; 157 158 if (unlikely(list_empty(&pool->free_list))) { 159 ret = dr_send_info_pool_fill(pool); 160 if (ret) 161 return NULL; 162 } 163 164 pool_obj = list_first_entry_or_null(&pool->free_list, 165 struct mlx5dr_send_info_pool_obj, 166 list_node); 167 168 if (likely(pool_obj)) { 169 list_del_init(&pool_obj->list_node); 170 } else { 171 WARN_ONCE(!pool_obj, "Failed getting ste send info obj from pool"); 172 return NULL; 173 } 174 175 return &pool_obj->ste_send_info; 176 } 177 178 void mlx5dr_send_info_free(struct mlx5dr_ste_send_info *ste_send_info) 179 { 180 struct mlx5dr_send_info_pool_obj *pool_obj; 181 182 pool_obj = container_of(ste_send_info, 183 struct mlx5dr_send_info_pool_obj, 184 ste_send_info); 185 186 list_add(&pool_obj->list_node, &pool_obj->pool->free_list); 187 } 188 189 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64) 190 { 191 unsigned int idx; 192 u8 opcode; 193 194 opcode = get_cqe_opcode(cqe64); 195 if (opcode == MLX5_CQE_REQ_ERR) { 196 idx = be16_to_cpu(cqe64->wqe_counter) & 197 (dr_cq->qp->sq.wqe_cnt - 1); 198 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; 199 } else if (opcode == MLX5_CQE_RESP_ERR) { 200 ++dr_cq->qp->sq.cc; 201 } else { 202 idx = be16_to_cpu(cqe64->wqe_counter) & 203 (dr_cq->qp->sq.wqe_cnt - 1); 204 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; 205 206 return CQ_OK; 207 } 208 209 return CQ_POLL_ERR; 210 } 211 212 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq) 213 { 214 struct mlx5_cqe64 *cqe64; 215 int err; 216 217 cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq); 218 if (!cqe64) { 219 if (unlikely(dr_cq->mdev->state == 220 MLX5_DEVICE_STATE_INTERNAL_ERROR)) { 221 mlx5_core_dbg_once(dr_cq->mdev, 222 "Polling CQ while device is shutting down\n"); 223 return CQ_POLL_ERR; 224 } 225 return CQ_EMPTY; 226 } 227 228 mlx5_cqwq_pop(&dr_cq->wq); 229 err = dr_parse_cqe(dr_cq, cqe64); 230 mlx5_cqwq_update_db_record(&dr_cq->wq); 231 232 return err; 233 } 234 235 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) 236 { 237 int npolled; 238 int err = 0; 239 240 for (npolled = 0; npolled < ne; ++npolled) { 241 err = dr_cq_poll_one(dr_cq); 242 if (err != CQ_OK) 243 break; 244 } 245 246 return err == CQ_POLL_ERR ? err : npolled; 247 } 248 249 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, 250 struct dr_qp_init_attr *attr) 251 { 252 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 253 u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; 254 struct mlx5_wq_param wqp; 255 struct mlx5dr_qp *dr_qp; 256 int inlen; 257 void *qpc; 258 void *in; 259 int err; 260 261 dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL); 262 if (!dr_qp) 263 return NULL; 264 265 wqp.buf_numa_node = mdev->priv.numa_node; 266 wqp.db_numa_node = mdev->priv.numa_node; 267 268 dr_qp->rq.pc = 0; 269 dr_qp->rq.cc = 0; 270 dr_qp->rq.wqe_cnt = 256; 271 dr_qp->sq.pc = 0; 272 dr_qp->sq.cc = 0; 273 dr_qp->sq.head = 0; 274 dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr); 275 276 MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); 277 MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); 278 MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); 279 err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq, 280 &dr_qp->wq_ctrl); 281 if (err) { 282 mlx5_core_warn(mdev, "Can't create QP WQ\n"); 283 goto err_wq; 284 } 285 286 dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt, 287 sizeof(dr_qp->sq.wqe_head[0]), 288 GFP_KERNEL); 289 290 if (!dr_qp->sq.wqe_head) { 291 mlx5_core_warn(mdev, "Can't allocate wqe head\n"); 292 goto err_wqe_head; 293 } 294 295 inlen = MLX5_ST_SZ_BYTES(create_qp_in) + 296 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * 297 dr_qp->wq_ctrl.buf.npages; 298 in = kvzalloc(inlen, GFP_KERNEL); 299 if (!in) { 300 err = -ENOMEM; 301 goto err_in; 302 } 303 304 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 305 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 306 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 307 MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc); 308 MLX5_SET(qpc, qpc, pd, attr->pdn); 309 MLX5_SET(qpc, qpc, uar_page, attr->uar->index); 310 MLX5_SET(qpc, qpc, log_page_size, 311 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 312 MLX5_SET(qpc, qpc, fre, 1); 313 MLX5_SET(qpc, qpc, rlky, 1); 314 MLX5_SET(qpc, qpc, cqn_snd, attr->cqn); 315 MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn); 316 MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); 317 MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); 318 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 319 MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); 320 MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev)); 321 MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma); 322 if (MLX5_CAP_GEN(mdev, cqe_version) == 1) 323 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); 324 mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf, 325 (__be64 *)MLX5_ADDR_OF(create_qp_in, 326 in, pas)); 327 328 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 329 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 330 dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn); 331 kvfree(in); 332 if (err) 333 goto err_in; 334 dr_qp->uar = attr->uar; 335 336 return dr_qp; 337 338 err_in: 339 kfree(dr_qp->sq.wqe_head); 340 err_wqe_head: 341 mlx5_wq_destroy(&dr_qp->wq_ctrl); 342 err_wq: 343 kfree(dr_qp); 344 return NULL; 345 } 346 347 static void dr_destroy_qp(struct mlx5_core_dev *mdev, 348 struct mlx5dr_qp *dr_qp) 349 { 350 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 351 352 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 353 MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn); 354 mlx5_cmd_exec_in(mdev, destroy_qp, in); 355 356 kfree(dr_qp->sq.wqe_head); 357 mlx5_wq_destroy(&dr_qp->wq_ctrl); 358 kfree(dr_qp); 359 } 360 361 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) 362 { 363 dma_wmb(); 364 *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff); 365 366 /* After wmb() the hw aware of new work */ 367 wmb(); 368 369 mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET); 370 } 371 372 static void 373 dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, 374 u32 remote_addr, 375 struct dr_data_seg *data_seg, 376 int *size) 377 { 378 struct mlx5_wqe_header_modify_argument_update_seg *wq_arg_seg; 379 struct mlx5_wqe_flow_update_ctrl_seg *wq_flow_seg; 380 381 wq_ctrl->general_id = cpu_to_be32(remote_addr); 382 wq_flow_seg = (void *)(wq_ctrl + 1); 383 384 /* mlx5_wqe_flow_update_ctrl_seg - all reserved */ 385 memset(wq_flow_seg, 0, sizeof(*wq_flow_seg)); 386 wq_arg_seg = (void *)(wq_flow_seg + 1); 387 388 memcpy(wq_arg_seg->argument_list, 389 (void *)(uintptr_t)data_seg->addr, 390 data_seg->length); 391 392 *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */ 393 sizeof(*wq_flow_seg) + /* WQE flow update ctrl seg - reserved */ 394 sizeof(*wq_arg_seg)) / /* WQE hdr modify arg seg - data */ 395 MLX5_SEND_WQE_DS; 396 } 397 398 static void 399 dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, 400 u64 remote_addr, 401 u32 rkey, 402 struct dr_data_seg *data_seg, 403 unsigned int *size) 404 { 405 struct mlx5_wqe_raddr_seg *wq_raddr; 406 struct mlx5_wqe_data_seg *wq_dseg; 407 408 wq_raddr = (void *)(wq_ctrl + 1); 409 410 wq_raddr->raddr = cpu_to_be64(remote_addr); 411 wq_raddr->rkey = cpu_to_be32(rkey); 412 wq_raddr->reserved = 0; 413 414 wq_dseg = (void *)(wq_raddr + 1); 415 416 wq_dseg->byte_count = cpu_to_be32(data_seg->length); 417 wq_dseg->lkey = cpu_to_be32(data_seg->lkey); 418 wq_dseg->addr = cpu_to_be64(data_seg->addr); 419 420 *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */ 421 sizeof(*wq_dseg) + /* WQE data segment */ 422 sizeof(*wq_raddr)) / /* WQE remote addr segment */ 423 MLX5_SEND_WQE_DS; 424 } 425 426 static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl, 427 struct dr_data_seg *data_seg) 428 { 429 wq_ctrl->signature = 0; 430 wq_ctrl->rsvd[0] = 0; 431 wq_ctrl->rsvd[1] = 0; 432 wq_ctrl->fm_ce_se = data_seg->send_flags & IB_SEND_SIGNALED ? 433 MLX5_WQE_CTRL_CQ_UPDATE : 0; 434 wq_ctrl->imm = 0; 435 } 436 437 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, 438 u32 rkey, struct dr_data_seg *data_seg, 439 u32 opcode, bool notify_hw) 440 { 441 struct mlx5_wqe_ctrl_seg *wq_ctrl; 442 int opcode_mod = 0; 443 unsigned int size; 444 unsigned int idx; 445 446 idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1); 447 448 wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); 449 dr_set_ctrl_seg(wq_ctrl, data_seg); 450 451 switch (opcode) { 452 case MLX5_OPCODE_RDMA_READ: 453 case MLX5_OPCODE_RDMA_WRITE: 454 dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr, 455 rkey, data_seg, &size); 456 break; 457 case MLX5_OPCODE_FLOW_TBL_ACCESS: 458 opcode_mod = MLX5_CMD_OP_MOD_UPDATE_HEADER_MODIFY_ARGUMENT; 459 dr_rdma_handle_flow_access_arg_segments(wq_ctrl, remote_addr, 460 data_seg, &size); 461 break; 462 default: 463 WARN(true, "illegal opcode %d", opcode); 464 return; 465 } 466 467 /* -------------------------------------------------------- 468 * |opcode_mod (8 bit)|wqe_index (16 bits)| opcod (8 bits)| 469 * -------------------------------------------------------- 470 */ 471 wq_ctrl->opmod_idx_opcode = 472 cpu_to_be32((opcode_mod << 24) | 473 ((dr_qp->sq.pc & 0xffff) << 8) | 474 opcode); 475 wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8); 476 477 dr_qp->sq.pc += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); 478 dr_qp->sq.wqe_head[idx] = dr_qp->sq.head++; 479 480 if (notify_hw) 481 dr_cmd_notify_hw(dr_qp, wq_ctrl); 482 } 483 484 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info) 485 { 486 if (send_info->type == WRITE_ICM) { 487 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, 488 &send_info->write, MLX5_OPCODE_RDMA_WRITE, false); 489 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, 490 &send_info->read, MLX5_OPCODE_RDMA_READ, true); 491 } else { /* GTA_ARG */ 492 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, 493 &send_info->write, MLX5_OPCODE_FLOW_TBL_ACCESS, true); 494 } 495 496 } 497 498 /** 499 * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent 500 * with send_list parameters: 501 * 502 * @ste: The data that attached to this specific ste 503 * @size: of data to write 504 * @offset: of the data from start of the hw_ste entry 505 * @data: data 506 * @ste_info: ste to be sent with send_list 507 * @send_list: to append into it 508 * @copy_data: if true indicates that the data should be kept because 509 * it's not backuped any where (like in re-hash). 510 * if false, it lets the data to be updated after 511 * it was added to the list. 512 */ 513 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, 514 u16 offset, u8 *data, 515 struct mlx5dr_ste_send_info *ste_info, 516 struct list_head *send_list, 517 bool copy_data) 518 { 519 ste_info->size = size; 520 ste_info->ste = ste; 521 ste_info->offset = offset; 522 523 if (copy_data) { 524 memcpy(ste_info->data_cont, data, size); 525 ste_info->data = ste_info->data_cont; 526 } else { 527 ste_info->data = data; 528 } 529 530 list_add_tail(&ste_info->send_list, send_list); 531 } 532 533 /* The function tries to consume one wc each time, unless the queue is full, in 534 * that case, which means that the hw is behind the sw in a full queue len 535 * the function will drain the cq till it empty. 536 */ 537 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn, 538 struct mlx5dr_send_ring *send_ring) 539 { 540 bool is_drain = false; 541 int ne; 542 543 if (send_ring->pending_wqe < send_ring->signal_th) 544 return 0; 545 546 /* Queue is full start drain it */ 547 if (send_ring->pending_wqe >= 548 dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN) 549 is_drain = true; 550 551 do { 552 ne = dr_poll_cq(send_ring->cq, 1); 553 if (unlikely(ne < 0)) { 554 mlx5_core_warn_once(dmn->mdev, "SMFS QPN 0x%x is disabled/limited", 555 send_ring->qp->qpn); 556 send_ring->err_state = true; 557 return ne; 558 } else if (ne == 1) { 559 send_ring->pending_wqe -= send_ring->signal_th; 560 } 561 } while (ne == 1 || 562 (is_drain && send_ring->pending_wqe >= send_ring->signal_th)); 563 564 return 0; 565 } 566 567 static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring, 568 struct postsend_info *send_info) 569 { 570 send_ring->pending_wqe++; 571 572 if (send_ring->pending_wqe % send_ring->signal_th == 0) 573 send_info->write.send_flags |= IB_SEND_SIGNALED; 574 else 575 send_info->write.send_flags = 0; 576 } 577 578 static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, 579 struct mlx5dr_send_ring *send_ring, 580 struct postsend_info *send_info) 581 { 582 u32 buff_offset; 583 584 if (send_info->write.length > dmn->info.max_inline_size) { 585 buff_offset = (send_ring->tx_head & 586 (dmn->send_ring->signal_th - 1)) * 587 send_ring->max_post_send_size; 588 /* Copy to ring mr */ 589 memcpy(send_ring->buf + buff_offset, 590 (void *)(uintptr_t)send_info->write.addr, 591 send_info->write.length); 592 send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset; 593 send_info->write.lkey = send_ring->mr->mkey; 594 595 send_ring->tx_head++; 596 } 597 598 send_ring->pending_wqe++; 599 600 if (send_ring->pending_wqe % send_ring->signal_th == 0) 601 send_info->write.send_flags |= IB_SEND_SIGNALED; 602 603 send_ring->pending_wqe++; 604 send_info->read.length = send_info->write.length; 605 606 /* Read into dedicated sync buffer */ 607 send_info->read.addr = (uintptr_t)send_ring->sync_mr->dma_addr; 608 send_info->read.lkey = send_ring->sync_mr->mkey; 609 610 if (send_ring->pending_wqe % send_ring->signal_th == 0) 611 send_info->read.send_flags = IB_SEND_SIGNALED; 612 else 613 send_info->read.send_flags = 0; 614 } 615 616 static void dr_fill_data_segs(struct mlx5dr_domain *dmn, 617 struct mlx5dr_send_ring *send_ring, 618 struct postsend_info *send_info) 619 { 620 if (send_info->type == WRITE_ICM) 621 dr_fill_write_icm_segs(dmn, send_ring, send_info); 622 else /* args */ 623 dr_fill_write_args_segs(send_ring, send_info); 624 } 625 626 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, 627 struct postsend_info *send_info) 628 { 629 struct mlx5dr_send_ring *send_ring = dmn->send_ring; 630 int ret; 631 632 if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || 633 send_ring->err_state)) { 634 mlx5_core_dbg_once(dmn->mdev, 635 "Skipping post send: QP err state: %d, device state: %d\n", 636 send_ring->err_state, dmn->mdev->state); 637 return 0; 638 } 639 640 spin_lock(&send_ring->lock); 641 642 ret = dr_handle_pending_wc(dmn, send_ring); 643 if (ret) 644 goto out_unlock; 645 646 dr_fill_data_segs(dmn, send_ring, send_info); 647 dr_post_send(send_ring->qp, send_info); 648 649 out_unlock: 650 spin_unlock(&send_ring->lock); 651 return ret; 652 } 653 654 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, 655 struct mlx5dr_ste_htbl *htbl, 656 u8 **data, 657 u32 *byte_size, 658 int *iterations, 659 int *num_stes) 660 { 661 u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); 662 int alloc_size; 663 664 if (chunk_byte_size > dmn->send_ring->max_post_send_size) { 665 *iterations = chunk_byte_size / dmn->send_ring->max_post_send_size; 666 *byte_size = dmn->send_ring->max_post_send_size; 667 alloc_size = *byte_size; 668 *num_stes = *byte_size / DR_STE_SIZE; 669 } else { 670 *iterations = 1; 671 *num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk); 672 alloc_size = *num_stes * DR_STE_SIZE; 673 } 674 675 *data = kvzalloc(alloc_size, GFP_KERNEL); 676 if (!*data) 677 return -ENOMEM; 678 679 return 0; 680 } 681 682 /** 683 * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm. 684 * 685 * @dmn: Domain 686 * @ste: The ste struct that contains the data (at 687 * least part of it) 688 * @data: The real data to send size data 689 * @size: for writing. 690 * @offset: The offset from the icm mapped data to 691 * start write to this for write only part of the 692 * buffer. 693 * 694 * Return: 0 on success. 695 */ 696 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, 697 u8 *data, u16 size, u16 offset) 698 { 699 struct postsend_info send_info = {}; 700 701 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data, size); 702 703 send_info.write.addr = (uintptr_t)data; 704 send_info.write.length = size; 705 send_info.write.lkey = 0; 706 send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset; 707 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk); 708 709 return dr_postsend_icm_data(dmn, &send_info); 710 } 711 712 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, 713 struct mlx5dr_ste_htbl *htbl, 714 u8 *formatted_ste, u8 *mask) 715 { 716 u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); 717 int num_stes_per_iter; 718 int iterations; 719 u8 *data; 720 int ret; 721 int i; 722 int j; 723 724 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, 725 &iterations, &num_stes_per_iter); 726 if (ret) 727 return ret; 728 729 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, formatted_ste, DR_STE_SIZE); 730 731 /* Send the data iteration times */ 732 for (i = 0; i < iterations; i++) { 733 u32 ste_index = i * (byte_size / DR_STE_SIZE); 734 struct postsend_info send_info = {}; 735 736 /* Copy all ste's on the data buffer 737 * need to add the bit_mask 738 */ 739 for (j = 0; j < num_stes_per_iter; j++) { 740 struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j]; 741 u32 ste_off = j * DR_STE_SIZE; 742 743 if (mlx5dr_ste_is_not_used(ste)) { 744 memcpy(data + ste_off, 745 formatted_ste, DR_STE_SIZE); 746 } else { 747 /* Copy data */ 748 memcpy(data + ste_off, 749 htbl->chunk->hw_ste_arr + 750 DR_STE_SIZE_REDUCED * (ste_index + j), 751 DR_STE_SIZE_REDUCED); 752 /* Copy bit_mask */ 753 memcpy(data + ste_off + DR_STE_SIZE_REDUCED, 754 mask, DR_STE_SIZE_MASK); 755 /* Only when we have mask we need to re-arrange the STE */ 756 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, 757 data + (j * DR_STE_SIZE), 758 DR_STE_SIZE); 759 } 760 } 761 762 send_info.write.addr = (uintptr_t)data; 763 send_info.write.length = byte_size; 764 send_info.write.lkey = 0; 765 send_info.remote_addr = 766 mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index); 767 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk); 768 769 ret = dr_postsend_icm_data(dmn, &send_info); 770 if (ret) 771 goto out_free; 772 } 773 774 out_free: 775 kvfree(data); 776 return ret; 777 } 778 779 /* Initialize htble with default STEs */ 780 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, 781 struct mlx5dr_ste_htbl *htbl, 782 u8 *ste_init_data, 783 bool update_hw_ste) 784 { 785 u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); 786 int iterations; 787 int num_stes; 788 u8 *copy_dst; 789 u8 *data; 790 int ret; 791 int i; 792 793 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, 794 &iterations, &num_stes); 795 if (ret) 796 return ret; 797 798 if (update_hw_ste) { 799 /* Copy the reduced STE to hash table ste_arr */ 800 for (i = 0; i < num_stes; i++) { 801 copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED; 802 memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED); 803 } 804 } 805 806 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, ste_init_data, DR_STE_SIZE); 807 808 /* Copy the same STE on the data buffer */ 809 for (i = 0; i < num_stes; i++) { 810 copy_dst = data + i * DR_STE_SIZE; 811 memcpy(copy_dst, ste_init_data, DR_STE_SIZE); 812 } 813 814 /* Send the data iteration times */ 815 for (i = 0; i < iterations; i++) { 816 u8 ste_index = i * (byte_size / DR_STE_SIZE); 817 struct postsend_info send_info = {}; 818 819 send_info.write.addr = (uintptr_t)data; 820 send_info.write.length = byte_size; 821 send_info.write.lkey = 0; 822 send_info.remote_addr = 823 mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index); 824 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk); 825 826 ret = dr_postsend_icm_data(dmn, &send_info); 827 if (ret) 828 goto out_free; 829 } 830 831 out_free: 832 kvfree(data); 833 return ret; 834 } 835 836 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, 837 struct mlx5dr_action *action) 838 { 839 struct postsend_info send_info = {}; 840 841 send_info.write.addr = (uintptr_t)action->rewrite->data; 842 send_info.write.length = action->rewrite->num_of_actions * 843 DR_MODIFY_ACTION_SIZE; 844 send_info.write.lkey = 0; 845 send_info.remote_addr = 846 mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk); 847 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk); 848 849 return dr_postsend_icm_data(dmn, &send_info); 850 } 851 852 int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn, 853 struct mlx5dr_icm_chunk *chunk, 854 u16 num_of_actions, 855 u8 *data) 856 { 857 struct postsend_info send_info = {}; 858 int ret; 859 860 send_info.write.addr = (uintptr_t)data; 861 send_info.write.length = num_of_actions * DR_MODIFY_ACTION_SIZE; 862 send_info.remote_addr = mlx5dr_icm_pool_get_chunk_mr_addr(chunk); 863 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(chunk); 864 865 ret = dr_postsend_icm_data(dmn, &send_info); 866 if (ret) 867 return ret; 868 869 return 0; 870 } 871 872 int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id, 873 u16 num_of_actions, u8 *actions_data) 874 { 875 int data_len, iter = 0, cur_sent; 876 u64 addr; 877 int ret; 878 879 addr = (uintptr_t)actions_data; 880 data_len = num_of_actions * DR_MODIFY_ACTION_SIZE; 881 882 do { 883 struct postsend_info send_info = {}; 884 885 send_info.type = GTA_ARG; 886 send_info.write.addr = addr; 887 cur_sent = min_t(u32, data_len, DR_ACTION_CACHE_LINE_SIZE); 888 send_info.write.length = cur_sent; 889 send_info.write.lkey = 0; 890 send_info.remote_addr = arg_id + iter; 891 892 ret = dr_postsend_icm_data(dmn, &send_info); 893 if (ret) 894 goto out; 895 896 iter++; 897 addr += cur_sent; 898 data_len -= cur_sent; 899 } while (data_len > 0); 900 901 out: 902 return ret; 903 } 904 905 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev, 906 struct mlx5dr_qp *dr_qp, 907 int port) 908 { 909 u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; 910 void *qpc; 911 912 qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); 913 914 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port); 915 MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); 916 MLX5_SET(qpc, qpc, rre, 1); 917 MLX5_SET(qpc, qpc, rwe, 1); 918 919 MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP); 920 MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn); 921 922 return mlx5_cmd_exec_in(mdev, rst2init_qp, in); 923 } 924 925 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev, 926 struct mlx5dr_qp *dr_qp, 927 struct dr_qp_rts_attr *attr) 928 { 929 u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; 930 void *qpc; 931 932 qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc); 933 934 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn); 935 936 MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt); 937 MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry); 938 MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */ 939 940 MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP); 941 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn); 942 943 return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in); 944 } 945 946 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, 947 struct mlx5dr_qp *dr_qp, 948 struct dr_qp_rtr_attr *attr) 949 { 950 u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; 951 void *qpc; 952 953 qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc); 954 955 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn); 956 957 MLX5_SET(qpc, qpc, mtu, attr->mtu); 958 MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1); 959 MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num); 960 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), 961 attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac)); 962 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), 963 attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid)); 964 MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, 965 attr->sgid_index); 966 967 if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2) 968 MLX5_SET(qpc, qpc, primary_address_path.udp_sport, 969 attr->udp_src_port); 970 971 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num); 972 MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl); 973 MLX5_SET(qpc, qpc, min_rnr_nak, 1); 974 975 MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP); 976 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn); 977 978 return mlx5_cmd_exec_in(mdev, init2rtr_qp, in); 979 } 980 981 static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps) 982 { 983 /* Check whether RC RoCE QP creation with force loopback is allowed. 984 * There are two separate capability bits for this: 985 * - force loopback when RoCE is enabled 986 * - force loopback when RoCE is disabled 987 */ 988 return ((caps->roce_caps.roce_en && 989 caps->roce_caps.fl_rc_qp_when_roce_enabled) || 990 (!caps->roce_caps.roce_en && 991 caps->roce_caps.fl_rc_qp_when_roce_disabled)); 992 } 993 994 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) 995 { 996 struct mlx5dr_qp *dr_qp = dmn->send_ring->qp; 997 struct dr_qp_rts_attr rts_attr = {}; 998 struct dr_qp_rtr_attr rtr_attr = {}; 999 enum ib_mtu mtu = IB_MTU_1024; 1000 u16 gid_index = 0; 1001 int port = 1; 1002 int ret; 1003 1004 /* Init */ 1005 ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port); 1006 if (ret) { 1007 mlx5dr_err(dmn, "Failed modify QP rst2init\n"); 1008 return ret; 1009 } 1010 1011 /* RTR */ 1012 rtr_attr.mtu = mtu; 1013 rtr_attr.qp_num = dr_qp->qpn; 1014 rtr_attr.min_rnr_timer = 12; 1015 rtr_attr.port_num = port; 1016 rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; 1017 1018 /* If QP creation with force loopback is allowed, then there 1019 * is no need for GID index when creating the QP. 1020 * Otherwise we query GID attributes and use GID index. 1021 */ 1022 rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps); 1023 if (!rtr_attr.fl) { 1024 ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, 1025 &rtr_attr.dgid_attr); 1026 if (ret) 1027 return ret; 1028 1029 rtr_attr.sgid_index = gid_index; 1030 } 1031 1032 ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); 1033 if (ret) { 1034 mlx5dr_err(dmn, "Failed modify QP init2rtr\n"); 1035 return ret; 1036 } 1037 1038 /* RTS */ 1039 rts_attr.timeout = 14; 1040 rts_attr.retry_cnt = 7; 1041 rts_attr.rnr_retry = 7; 1042 1043 ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr); 1044 if (ret) { 1045 mlx5dr_err(dmn, "Failed modify QP rtr2rts\n"); 1046 return ret; 1047 } 1048 1049 return 0; 1050 } 1051 1052 static void dr_cq_complete(struct mlx5_core_cq *mcq, 1053 struct mlx5_eqe *eqe) 1054 { 1055 pr_err("CQ completion CQ: #%u\n", mcq->cqn); 1056 } 1057 1058 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, 1059 struct mlx5_uars_page *uar, 1060 size_t ncqe) 1061 { 1062 u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {}; 1063 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 1064 struct mlx5_wq_param wqp; 1065 struct mlx5_cqe64 *cqe; 1066 struct mlx5dr_cq *cq; 1067 int inlen, err, eqn; 1068 void *cqc, *in; 1069 __be64 *pas; 1070 int vector; 1071 u32 i; 1072 1073 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 1074 if (!cq) 1075 return NULL; 1076 1077 ncqe = roundup_pow_of_two(ncqe); 1078 MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe)); 1079 1080 wqp.buf_numa_node = mdev->priv.numa_node; 1081 wqp.db_numa_node = mdev->priv.numa_node; 1082 1083 err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq, 1084 &cq->wq_ctrl); 1085 if (err) 1086 goto out; 1087 1088 for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { 1089 cqe = mlx5_cqwq_get_wqe(&cq->wq, i); 1090 cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; 1091 } 1092 1093 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 1094 sizeof(u64) * cq->wq_ctrl.buf.npages; 1095 in = kvzalloc(inlen, GFP_KERNEL); 1096 if (!in) 1097 goto err_cqwq; 1098 1099 vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); 1100 err = mlx5_vector2eqn(mdev, vector, &eqn); 1101 if (err) { 1102 kvfree(in); 1103 goto err_cqwq; 1104 } 1105 1106 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 1107 MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); 1108 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 1109 MLX5_SET(cqc, cqc, uar_page, uar->index); 1110 MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - 1111 MLX5_ADAPTER_PAGE_SHIFT); 1112 MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); 1113 1114 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 1115 mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); 1116 1117 cq->mcq.comp = dr_cq_complete; 1118 1119 err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); 1120 kvfree(in); 1121 1122 if (err) 1123 goto err_cqwq; 1124 1125 cq->mcq.cqe_sz = 64; 1126 cq->mcq.set_ci_db = cq->wq_ctrl.db.db; 1127 cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; 1128 *cq->mcq.set_ci_db = 0; 1129 1130 /* set no-zero value, in order to avoid the HW to run db-recovery on 1131 * CQ that used in polling mode. 1132 */ 1133 *cq->mcq.arm_db = cpu_to_be32(2 << 28); 1134 1135 cq->mcq.vector = 0; 1136 cq->mcq.uar = uar; 1137 cq->mdev = mdev; 1138 1139 return cq; 1140 1141 err_cqwq: 1142 mlx5_wq_destroy(&cq->wq_ctrl); 1143 out: 1144 kfree(cq); 1145 return NULL; 1146 } 1147 1148 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq) 1149 { 1150 mlx5_core_destroy_cq(mdev, &cq->mcq); 1151 mlx5_wq_destroy(&cq->wq_ctrl); 1152 kfree(cq); 1153 } 1154 1155 static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey) 1156 { 1157 u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; 1158 void *mkc; 1159 1160 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1161 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 1162 MLX5_SET(mkc, mkc, a, 1); 1163 MLX5_SET(mkc, mkc, rw, 1); 1164 MLX5_SET(mkc, mkc, rr, 1); 1165 MLX5_SET(mkc, mkc, lw, 1); 1166 MLX5_SET(mkc, mkc, lr, 1); 1167 1168 MLX5_SET(mkc, mkc, pd, pdn); 1169 MLX5_SET(mkc, mkc, length64, 1); 1170 MLX5_SET(mkc, mkc, qpn, 0xffffff); 1171 1172 return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in)); 1173 } 1174 1175 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev, 1176 u32 pdn, void *buf, size_t size) 1177 { 1178 struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1179 struct device *dma_device; 1180 dma_addr_t dma_addr; 1181 int err; 1182 1183 if (!mr) 1184 return NULL; 1185 1186 dma_device = mlx5_core_dma_dev(mdev); 1187 dma_addr = dma_map_single(dma_device, buf, size, 1188 DMA_BIDIRECTIONAL); 1189 err = dma_mapping_error(dma_device, dma_addr); 1190 if (err) { 1191 mlx5_core_warn(mdev, "Can't dma buf\n"); 1192 kfree(mr); 1193 return NULL; 1194 } 1195 1196 err = dr_create_mkey(mdev, pdn, &mr->mkey); 1197 if (err) { 1198 mlx5_core_warn(mdev, "Can't create mkey\n"); 1199 dma_unmap_single(dma_device, dma_addr, size, 1200 DMA_BIDIRECTIONAL); 1201 kfree(mr); 1202 return NULL; 1203 } 1204 1205 mr->dma_addr = dma_addr; 1206 mr->size = size; 1207 mr->addr = buf; 1208 1209 return mr; 1210 } 1211 1212 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr) 1213 { 1214 mlx5_core_destroy_mkey(mdev, mr->mkey); 1215 dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size, 1216 DMA_BIDIRECTIONAL); 1217 kfree(mr); 1218 } 1219 1220 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) 1221 { 1222 struct dr_qp_init_attr init_attr = {}; 1223 int cq_size; 1224 int size; 1225 int ret; 1226 1227 dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL); 1228 if (!dmn->send_ring) 1229 return -ENOMEM; 1230 1231 cq_size = QUEUE_SIZE + 1; 1232 dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size); 1233 if (!dmn->send_ring->cq) { 1234 mlx5dr_err(dmn, "Failed creating CQ\n"); 1235 ret = -ENOMEM; 1236 goto free_send_ring; 1237 } 1238 1239 init_attr.cqn = dmn->send_ring->cq->mcq.cqn; 1240 init_attr.pdn = dmn->pdn; 1241 init_attr.uar = dmn->uar; 1242 init_attr.max_send_wr = QUEUE_SIZE; 1243 1244 /* Isolated VL is applicable only if force loopback is supported */ 1245 if (dr_send_allow_fl(&dmn->info.caps)) 1246 init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc; 1247 1248 spin_lock_init(&dmn->send_ring->lock); 1249 1250 dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); 1251 if (!dmn->send_ring->qp) { 1252 mlx5dr_err(dmn, "Failed creating QP\n"); 1253 ret = -ENOMEM; 1254 goto clean_cq; 1255 } 1256 1257 dmn->send_ring->cq->qp = dmn->send_ring->qp; 1258 1259 dmn->info.max_send_wr = QUEUE_SIZE; 1260 dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, 1261 DR_STE_SIZE); 1262 1263 dmn->send_ring->signal_th = dmn->info.max_send_wr / 1264 SIGNAL_PER_DIV_QUEUE; 1265 1266 /* Prepare qp to be used */ 1267 ret = dr_prepare_qp_to_rts(dmn); 1268 if (ret) 1269 goto clean_qp; 1270 1271 dmn->send_ring->max_post_send_size = 1272 mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K, 1273 DR_ICM_TYPE_STE); 1274 1275 /* Allocating the max size as a buffer for writing */ 1276 size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size; 1277 dmn->send_ring->buf = kzalloc(size, GFP_KERNEL); 1278 if (!dmn->send_ring->buf) { 1279 ret = -ENOMEM; 1280 goto clean_qp; 1281 } 1282 1283 dmn->send_ring->buf_size = size; 1284 1285 dmn->send_ring->mr = dr_reg_mr(dmn->mdev, 1286 dmn->pdn, dmn->send_ring->buf, size); 1287 if (!dmn->send_ring->mr) { 1288 ret = -ENOMEM; 1289 goto free_mem; 1290 } 1291 1292 dmn->send_ring->sync_buff = kzalloc(dmn->send_ring->max_post_send_size, 1293 GFP_KERNEL); 1294 if (!dmn->send_ring->sync_buff) { 1295 ret = -ENOMEM; 1296 goto clean_mr; 1297 } 1298 1299 dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev, 1300 dmn->pdn, dmn->send_ring->sync_buff, 1301 dmn->send_ring->max_post_send_size); 1302 if (!dmn->send_ring->sync_mr) { 1303 ret = -ENOMEM; 1304 goto free_sync_mem; 1305 } 1306 1307 return 0; 1308 1309 free_sync_mem: 1310 kfree(dmn->send_ring->sync_buff); 1311 clean_mr: 1312 dr_dereg_mr(dmn->mdev, dmn->send_ring->mr); 1313 free_mem: 1314 kfree(dmn->send_ring->buf); 1315 clean_qp: 1316 dr_destroy_qp(dmn->mdev, dmn->send_ring->qp); 1317 clean_cq: 1318 dr_destroy_cq(dmn->mdev, dmn->send_ring->cq); 1319 free_send_ring: 1320 kfree(dmn->send_ring); 1321 1322 return ret; 1323 } 1324 1325 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, 1326 struct mlx5dr_send_ring *send_ring) 1327 { 1328 dr_destroy_qp(dmn->mdev, send_ring->qp); 1329 dr_destroy_cq(dmn->mdev, send_ring->cq); 1330 dr_dereg_mr(dmn->mdev, send_ring->sync_mr); 1331 dr_dereg_mr(dmn->mdev, send_ring->mr); 1332 kfree(send_ring->buf); 1333 kfree(send_ring->sync_buff); 1334 kfree(send_ring); 1335 } 1336 1337 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) 1338 { 1339 struct mlx5dr_send_ring *send_ring = dmn->send_ring; 1340 struct postsend_info send_info = {}; 1341 u8 data[DR_STE_SIZE]; 1342 int num_of_sends_req; 1343 int ret; 1344 int i; 1345 1346 /* Sending this amount of requests makes sure we will get drain */ 1347 num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; 1348 1349 /* Send fake requests forcing the last to be signaled */ 1350 send_info.write.addr = (uintptr_t)data; 1351 send_info.write.length = DR_STE_SIZE; 1352 send_info.write.lkey = 0; 1353 /* Using the sync_mr in order to write/read */ 1354 send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; 1355 send_info.rkey = send_ring->sync_mr->mkey; 1356 1357 for (i = 0; i < num_of_sends_req; i++) { 1358 ret = dr_postsend_icm_data(dmn, &send_info); 1359 if (ret) 1360 return ret; 1361 } 1362 1363 spin_lock(&send_ring->lock); 1364 ret = dr_handle_pending_wc(dmn, send_ring); 1365 spin_unlock(&send_ring->lock); 1366 1367 return ret; 1368 } 1369