1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <linux/smp.h> 5 #include "dr_types.h" 6 7 #define QUEUE_SIZE 128 8 #define SIGNAL_PER_DIV_QUEUE 16 9 #define TH_NUMS_TO_DRAIN 2 10 11 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; 12 13 struct dr_data_seg { 14 u64 addr; 15 u32 length; 16 u32 lkey; 17 unsigned int send_flags; 18 }; 19 20 struct postsend_info { 21 struct dr_data_seg write; 22 struct dr_data_seg read; 23 u64 remote_addr; 24 u32 rkey; 25 }; 26 27 struct dr_qp_rtr_attr { 28 struct mlx5dr_cmd_gid_attr dgid_attr; 29 enum ib_mtu mtu; 30 u32 qp_num; 31 u16 port_num; 32 u8 min_rnr_timer; 33 u8 sgid_index; 34 u16 udp_src_port; 35 }; 36 37 struct dr_qp_rts_attr { 38 u8 timeout; 39 u8 retry_cnt; 40 u8 rnr_retry; 41 }; 42 43 struct dr_qp_init_attr { 44 u32 cqn; 45 u32 pdn; 46 u32 max_send_wr; 47 struct mlx5_uars_page *uar; 48 }; 49 50 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64) 51 { 52 unsigned int idx; 53 u8 opcode; 54 55 opcode = get_cqe_opcode(cqe64); 56 if (opcode == MLX5_CQE_REQ_ERR) { 57 idx = be16_to_cpu(cqe64->wqe_counter) & 58 (dr_cq->qp->sq.wqe_cnt - 1); 59 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; 60 } else if (opcode == MLX5_CQE_RESP_ERR) { 61 ++dr_cq->qp->sq.cc; 62 } else { 63 idx = be16_to_cpu(cqe64->wqe_counter) & 64 (dr_cq->qp->sq.wqe_cnt - 1); 65 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; 66 67 return CQ_OK; 68 } 69 70 return CQ_POLL_ERR; 71 } 72 73 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq) 74 { 75 struct mlx5_cqe64 *cqe64; 76 int err; 77 78 cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq); 79 if (!cqe64) 80 return CQ_EMPTY; 81 82 mlx5_cqwq_pop(&dr_cq->wq); 83 err = dr_parse_cqe(dr_cq, cqe64); 84 mlx5_cqwq_update_db_record(&dr_cq->wq); 85 86 return err; 87 } 88 89 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) 90 { 91 int npolled; 92 int err = 0; 93 94 for (npolled = 0; npolled < ne; ++npolled) { 95 err = dr_cq_poll_one(dr_cq); 96 if (err != CQ_OK) 97 break; 98 } 99 100 return err == CQ_POLL_ERR ? err : npolled; 101 } 102 103 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, 104 struct dr_qp_init_attr *attr) 105 { 106 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 107 u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; 108 struct mlx5_wq_param wqp; 109 struct mlx5dr_qp *dr_qp; 110 int inlen; 111 void *qpc; 112 void *in; 113 int err; 114 115 dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL); 116 if (!dr_qp) 117 return NULL; 118 119 wqp.buf_numa_node = mdev->priv.numa_node; 120 wqp.db_numa_node = mdev->priv.numa_node; 121 122 dr_qp->rq.pc = 0; 123 dr_qp->rq.cc = 0; 124 dr_qp->rq.wqe_cnt = 4; 125 dr_qp->sq.pc = 0; 126 dr_qp->sq.cc = 0; 127 dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr); 128 129 MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); 130 MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); 131 MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); 132 err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq, 133 &dr_qp->wq_ctrl); 134 if (err) { 135 mlx5_core_warn(mdev, "Can't create QP WQ\n"); 136 goto err_wq; 137 } 138 139 dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt, 140 sizeof(dr_qp->sq.wqe_head[0]), 141 GFP_KERNEL); 142 143 if (!dr_qp->sq.wqe_head) { 144 mlx5_core_warn(mdev, "Can't allocate wqe head\n"); 145 goto err_wqe_head; 146 } 147 148 inlen = MLX5_ST_SZ_BYTES(create_qp_in) + 149 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * 150 dr_qp->wq_ctrl.buf.npages; 151 in = kvzalloc(inlen, GFP_KERNEL); 152 if (!in) { 153 err = -ENOMEM; 154 goto err_in; 155 } 156 157 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 158 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 159 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 160 MLX5_SET(qpc, qpc, pd, attr->pdn); 161 MLX5_SET(qpc, qpc, uar_page, attr->uar->index); 162 MLX5_SET(qpc, qpc, log_page_size, 163 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 164 MLX5_SET(qpc, qpc, fre, 1); 165 MLX5_SET(qpc, qpc, rlky, 1); 166 MLX5_SET(qpc, qpc, cqn_snd, attr->cqn); 167 MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn); 168 MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); 169 MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); 170 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 171 MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); 172 MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma); 173 if (MLX5_CAP_GEN(mdev, cqe_version) == 1) 174 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); 175 mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf, 176 (__be64 *)MLX5_ADDR_OF(create_qp_in, 177 in, pas)); 178 179 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 180 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 181 dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn); 182 kvfree(in); 183 if (err) 184 goto err_in; 185 dr_qp->uar = attr->uar; 186 187 return dr_qp; 188 189 err_in: 190 kfree(dr_qp->sq.wqe_head); 191 err_wqe_head: 192 mlx5_wq_destroy(&dr_qp->wq_ctrl); 193 err_wq: 194 kfree(dr_qp); 195 return NULL; 196 } 197 198 static void dr_destroy_qp(struct mlx5_core_dev *mdev, 199 struct mlx5dr_qp *dr_qp) 200 { 201 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 202 203 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 204 MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn); 205 mlx5_cmd_exec_in(mdev, destroy_qp, in); 206 207 kfree(dr_qp->sq.wqe_head); 208 mlx5_wq_destroy(&dr_qp->wq_ctrl); 209 kfree(dr_qp); 210 } 211 212 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) 213 { 214 dma_wmb(); 215 *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff); 216 217 /* After wmb() the hw aware of new work */ 218 wmb(); 219 220 mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET); 221 } 222 223 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, 224 u32 rkey, struct dr_data_seg *data_seg, 225 u32 opcode, int nreq) 226 { 227 struct mlx5_wqe_raddr_seg *wq_raddr; 228 struct mlx5_wqe_ctrl_seg *wq_ctrl; 229 struct mlx5_wqe_data_seg *wq_dseg; 230 unsigned int size; 231 unsigned int idx; 232 233 size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 + 234 sizeof(*wq_raddr) / 16; 235 236 idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1); 237 238 wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); 239 wq_ctrl->imm = 0; 240 wq_ctrl->fm_ce_se = (data_seg->send_flags) ? 241 MLX5_WQE_CTRL_CQ_UPDATE : 0; 242 wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) | 243 opcode); 244 wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8); 245 wq_raddr = (void *)(wq_ctrl + 1); 246 wq_raddr->raddr = cpu_to_be64(remote_addr); 247 wq_raddr->rkey = cpu_to_be32(rkey); 248 wq_raddr->reserved = 0; 249 250 wq_dseg = (void *)(wq_raddr + 1); 251 wq_dseg->byte_count = cpu_to_be32(data_seg->length); 252 wq_dseg->lkey = cpu_to_be32(data_seg->lkey); 253 wq_dseg->addr = cpu_to_be64(data_seg->addr); 254 255 dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++; 256 257 if (nreq) 258 dr_cmd_notify_hw(dr_qp, wq_ctrl); 259 } 260 261 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info) 262 { 263 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, 264 &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0); 265 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, 266 &send_info->read, MLX5_OPCODE_RDMA_READ, 1); 267 } 268 269 /** 270 * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent 271 * with send_list parameters: 272 * 273 * @ste: The data that attached to this specific ste 274 * @size: of data to write 275 * @offset: of the data from start of the hw_ste entry 276 * @data: data 277 * @ste_info: ste to be sent with send_list 278 * @send_list: to append into it 279 * @copy_data: if true indicates that the data should be kept because 280 * it's not backuped any where (like in re-hash). 281 * if false, it lets the data to be updated after 282 * it was added to the list. 283 */ 284 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, 285 u16 offset, u8 *data, 286 struct mlx5dr_ste_send_info *ste_info, 287 struct list_head *send_list, 288 bool copy_data) 289 { 290 ste_info->size = size; 291 ste_info->ste = ste; 292 ste_info->offset = offset; 293 294 if (copy_data) { 295 memcpy(ste_info->data_cont, data, size); 296 ste_info->data = ste_info->data_cont; 297 } else { 298 ste_info->data = data; 299 } 300 301 list_add_tail(&ste_info->send_list, send_list); 302 } 303 304 /* The function tries to consume one wc each time, unless the queue is full, in 305 * that case, which means that the hw is behind the sw in a full queue len 306 * the function will drain the cq till it empty. 307 */ 308 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn, 309 struct mlx5dr_send_ring *send_ring) 310 { 311 bool is_drain = false; 312 int ne; 313 314 if (send_ring->pending_wqe < send_ring->signal_th) 315 return 0; 316 317 /* Queue is full start drain it */ 318 if (send_ring->pending_wqe >= 319 dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN) 320 is_drain = true; 321 322 do { 323 ne = dr_poll_cq(send_ring->cq, 1); 324 if (ne < 0) 325 return ne; 326 else if (ne == 1) 327 send_ring->pending_wqe -= send_ring->signal_th; 328 } while (is_drain && send_ring->pending_wqe); 329 330 return 0; 331 } 332 333 static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring, 334 struct postsend_info *send_info) 335 { 336 send_ring->pending_wqe++; 337 338 if (send_ring->pending_wqe % send_ring->signal_th == 0) 339 send_info->write.send_flags |= IB_SEND_SIGNALED; 340 341 send_ring->pending_wqe++; 342 send_info->read.length = send_info->write.length; 343 /* Read into the same write area */ 344 send_info->read.addr = (uintptr_t)send_info->write.addr; 345 send_info->read.lkey = send_ring->mr->mkey.key; 346 347 if (send_ring->pending_wqe % send_ring->signal_th == 0) 348 send_info->read.send_flags = IB_SEND_SIGNALED; 349 else 350 send_info->read.send_flags = 0; 351 } 352 353 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, 354 struct postsend_info *send_info) 355 { 356 struct mlx5dr_send_ring *send_ring = dmn->send_ring; 357 u32 buff_offset; 358 int ret; 359 360 spin_lock(&send_ring->lock); 361 362 ret = dr_handle_pending_wc(dmn, send_ring); 363 if (ret) 364 goto out_unlock; 365 366 if (send_info->write.length > dmn->info.max_inline_size) { 367 buff_offset = (send_ring->tx_head & 368 (dmn->send_ring->signal_th - 1)) * 369 send_ring->max_post_send_size; 370 /* Copy to ring mr */ 371 memcpy(send_ring->buf + buff_offset, 372 (void *)(uintptr_t)send_info->write.addr, 373 send_info->write.length); 374 send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset; 375 send_info->write.lkey = send_ring->mr->mkey.key; 376 } 377 378 send_ring->tx_head++; 379 dr_fill_data_segs(send_ring, send_info); 380 dr_post_send(send_ring->qp, send_info); 381 382 out_unlock: 383 spin_unlock(&send_ring->lock); 384 return ret; 385 } 386 387 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, 388 struct mlx5dr_ste_htbl *htbl, 389 u8 **data, 390 u32 *byte_size, 391 int *iterations, 392 int *num_stes) 393 { 394 int alloc_size; 395 396 if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) { 397 *iterations = htbl->chunk->byte_size / 398 dmn->send_ring->max_post_send_size; 399 *byte_size = dmn->send_ring->max_post_send_size; 400 alloc_size = *byte_size; 401 *num_stes = *byte_size / DR_STE_SIZE; 402 } else { 403 *iterations = 1; 404 *num_stes = htbl->chunk->num_of_entries; 405 alloc_size = *num_stes * DR_STE_SIZE; 406 } 407 408 *data = kzalloc(alloc_size, GFP_KERNEL); 409 if (!*data) 410 return -ENOMEM; 411 412 return 0; 413 } 414 415 /** 416 * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm. 417 * 418 * @dmn: Domain 419 * @ste: The ste struct that contains the data (at 420 * least part of it) 421 * @data: The real data to send size data 422 * @size: for writing. 423 * @offset: The offset from the icm mapped data to 424 * start write to this for write only part of the 425 * buffer. 426 * 427 * Return: 0 on success. 428 */ 429 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, 430 u8 *data, u16 size, u16 offset) 431 { 432 struct postsend_info send_info = {}; 433 434 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data, size); 435 436 send_info.write.addr = (uintptr_t)data; 437 send_info.write.length = size; 438 send_info.write.lkey = 0; 439 send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset; 440 send_info.rkey = ste->htbl->chunk->rkey; 441 442 return dr_postsend_icm_data(dmn, &send_info); 443 } 444 445 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, 446 struct mlx5dr_ste_htbl *htbl, 447 u8 *formatted_ste, u8 *mask) 448 { 449 u32 byte_size = htbl->chunk->byte_size; 450 int num_stes_per_iter; 451 int iterations; 452 u8 *data; 453 int ret; 454 int i; 455 int j; 456 457 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, 458 &iterations, &num_stes_per_iter); 459 if (ret) 460 return ret; 461 462 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, formatted_ste, DR_STE_SIZE); 463 464 /* Send the data iteration times */ 465 for (i = 0; i < iterations; i++) { 466 u32 ste_index = i * (byte_size / DR_STE_SIZE); 467 struct postsend_info send_info = {}; 468 469 /* Copy all ste's on the data buffer 470 * need to add the bit_mask 471 */ 472 for (j = 0; j < num_stes_per_iter; j++) { 473 struct mlx5dr_ste *ste = &htbl->ste_arr[ste_index + j]; 474 u32 ste_off = j * DR_STE_SIZE; 475 476 if (mlx5dr_ste_is_not_used(ste)) { 477 memcpy(data + ste_off, 478 formatted_ste, DR_STE_SIZE); 479 } else { 480 /* Copy data */ 481 memcpy(data + ste_off, 482 htbl->ste_arr[ste_index + j].hw_ste, 483 DR_STE_SIZE_REDUCED); 484 /* Copy bit_mask */ 485 memcpy(data + ste_off + DR_STE_SIZE_REDUCED, 486 mask, DR_STE_SIZE_MASK); 487 /* Only when we have mask we need to re-arrange the STE */ 488 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, 489 data + (j * DR_STE_SIZE), 490 DR_STE_SIZE); 491 } 492 } 493 494 send_info.write.addr = (uintptr_t)data; 495 send_info.write.length = byte_size; 496 send_info.write.lkey = 0; 497 send_info.remote_addr = 498 mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); 499 send_info.rkey = htbl->chunk->rkey; 500 501 ret = dr_postsend_icm_data(dmn, &send_info); 502 if (ret) 503 goto out_free; 504 } 505 506 out_free: 507 kfree(data); 508 return ret; 509 } 510 511 /* Initialize htble with default STEs */ 512 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, 513 struct mlx5dr_ste_htbl *htbl, 514 u8 *ste_init_data, 515 bool update_hw_ste) 516 { 517 u32 byte_size = htbl->chunk->byte_size; 518 int iterations; 519 int num_stes; 520 u8 *copy_dst; 521 u8 *data; 522 int ret; 523 int i; 524 525 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, 526 &iterations, &num_stes); 527 if (ret) 528 return ret; 529 530 if (update_hw_ste) { 531 /* Copy the reduced STE to hash table ste_arr */ 532 for (i = 0; i < num_stes; i++) { 533 copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED; 534 memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED); 535 } 536 } 537 538 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, ste_init_data, DR_STE_SIZE); 539 540 /* Copy the same STE on the data buffer */ 541 for (i = 0; i < num_stes; i++) { 542 copy_dst = data + i * DR_STE_SIZE; 543 memcpy(copy_dst, ste_init_data, DR_STE_SIZE); 544 } 545 546 /* Send the data iteration times */ 547 for (i = 0; i < iterations; i++) { 548 u8 ste_index = i * (byte_size / DR_STE_SIZE); 549 struct postsend_info send_info = {}; 550 551 send_info.write.addr = (uintptr_t)data; 552 send_info.write.length = byte_size; 553 send_info.write.lkey = 0; 554 send_info.remote_addr = 555 mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); 556 send_info.rkey = htbl->chunk->rkey; 557 558 ret = dr_postsend_icm_data(dmn, &send_info); 559 if (ret) 560 goto out_free; 561 } 562 563 out_free: 564 kfree(data); 565 return ret; 566 } 567 568 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, 569 struct mlx5dr_action *action) 570 { 571 struct postsend_info send_info = {}; 572 int ret; 573 574 send_info.write.addr = (uintptr_t)action->rewrite.data; 575 send_info.write.length = action->rewrite.num_of_actions * 576 DR_MODIFY_ACTION_SIZE; 577 send_info.write.lkey = 0; 578 send_info.remote_addr = action->rewrite.chunk->mr_addr; 579 send_info.rkey = action->rewrite.chunk->rkey; 580 581 ret = dr_postsend_icm_data(dmn, &send_info); 582 583 return ret; 584 } 585 586 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev, 587 struct mlx5dr_qp *dr_qp, 588 int port) 589 { 590 u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; 591 void *qpc; 592 593 qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); 594 595 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port); 596 MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); 597 MLX5_SET(qpc, qpc, rre, 1); 598 MLX5_SET(qpc, qpc, rwe, 1); 599 600 MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP); 601 MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn); 602 603 return mlx5_cmd_exec_in(mdev, rst2init_qp, in); 604 } 605 606 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev, 607 struct mlx5dr_qp *dr_qp, 608 struct dr_qp_rts_attr *attr) 609 { 610 u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; 611 void *qpc; 612 613 qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc); 614 615 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn); 616 617 MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt); 618 MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry); 619 620 MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP); 621 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn); 622 623 return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in); 624 } 625 626 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, 627 struct mlx5dr_qp *dr_qp, 628 struct dr_qp_rtr_attr *attr) 629 { 630 u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; 631 void *qpc; 632 633 qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc); 634 635 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn); 636 637 MLX5_SET(qpc, qpc, mtu, attr->mtu); 638 MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1); 639 MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num); 640 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), 641 attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac)); 642 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), 643 attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid)); 644 MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, 645 attr->sgid_index); 646 647 if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2) 648 MLX5_SET(qpc, qpc, primary_address_path.udp_sport, 649 attr->udp_src_port); 650 651 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num); 652 MLX5_SET(qpc, qpc, min_rnr_nak, 1); 653 654 MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP); 655 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn); 656 657 return mlx5_cmd_exec_in(mdev, init2rtr_qp, in); 658 } 659 660 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) 661 { 662 struct mlx5dr_qp *dr_qp = dmn->send_ring->qp; 663 struct dr_qp_rts_attr rts_attr = {}; 664 struct dr_qp_rtr_attr rtr_attr = {}; 665 enum ib_mtu mtu = IB_MTU_1024; 666 u16 gid_index = 0; 667 int port = 1; 668 int ret; 669 670 /* Init */ 671 ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port); 672 if (ret) { 673 mlx5dr_err(dmn, "Failed modify QP rst2init\n"); 674 return ret; 675 } 676 677 /* RTR */ 678 ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr); 679 if (ret) 680 return ret; 681 682 rtr_attr.mtu = mtu; 683 rtr_attr.qp_num = dr_qp->qpn; 684 rtr_attr.min_rnr_timer = 12; 685 rtr_attr.port_num = port; 686 rtr_attr.sgid_index = gid_index; 687 rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; 688 689 ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); 690 if (ret) { 691 mlx5dr_err(dmn, "Failed modify QP init2rtr\n"); 692 return ret; 693 } 694 695 /* RTS */ 696 rts_attr.timeout = 14; 697 rts_attr.retry_cnt = 7; 698 rts_attr.rnr_retry = 7; 699 700 ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr); 701 if (ret) { 702 mlx5dr_err(dmn, "Failed modify QP rtr2rts\n"); 703 return ret; 704 } 705 706 return 0; 707 } 708 709 static void dr_cq_complete(struct mlx5_core_cq *mcq, 710 struct mlx5_eqe *eqe) 711 { 712 pr_err("CQ completion CQ: #%u\n", mcq->cqn); 713 } 714 715 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, 716 struct mlx5_uars_page *uar, 717 size_t ncqe) 718 { 719 u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {}; 720 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 721 struct mlx5_wq_param wqp; 722 struct mlx5_cqe64 *cqe; 723 struct mlx5dr_cq *cq; 724 int inlen, err, eqn; 725 unsigned int irqn; 726 void *cqc, *in; 727 __be64 *pas; 728 int vector; 729 u32 i; 730 731 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 732 if (!cq) 733 return NULL; 734 735 ncqe = roundup_pow_of_two(ncqe); 736 MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe)); 737 738 wqp.buf_numa_node = mdev->priv.numa_node; 739 wqp.db_numa_node = mdev->priv.numa_node; 740 741 err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq, 742 &cq->wq_ctrl); 743 if (err) 744 goto out; 745 746 for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { 747 cqe = mlx5_cqwq_get_wqe(&cq->wq, i); 748 cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; 749 } 750 751 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 752 sizeof(u64) * cq->wq_ctrl.buf.npages; 753 in = kvzalloc(inlen, GFP_KERNEL); 754 if (!in) 755 goto err_cqwq; 756 757 vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); 758 err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn); 759 if (err) { 760 kvfree(in); 761 goto err_cqwq; 762 } 763 764 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 765 MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); 766 MLX5_SET(cqc, cqc, c_eqn, eqn); 767 MLX5_SET(cqc, cqc, uar_page, uar->index); 768 MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - 769 MLX5_ADAPTER_PAGE_SHIFT); 770 MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); 771 772 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 773 mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); 774 775 cq->mcq.comp = dr_cq_complete; 776 777 err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); 778 kvfree(in); 779 780 if (err) 781 goto err_cqwq; 782 783 cq->mcq.cqe_sz = 64; 784 cq->mcq.set_ci_db = cq->wq_ctrl.db.db; 785 cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; 786 *cq->mcq.set_ci_db = 0; 787 788 /* set no-zero value, in order to avoid the HW to run db-recovery on 789 * CQ that used in polling mode. 790 */ 791 *cq->mcq.arm_db = cpu_to_be32(2 << 28); 792 793 cq->mcq.vector = 0; 794 cq->mcq.irqn = irqn; 795 cq->mcq.uar = uar; 796 797 return cq; 798 799 err_cqwq: 800 mlx5_wq_destroy(&cq->wq_ctrl); 801 out: 802 kfree(cq); 803 return NULL; 804 } 805 806 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq) 807 { 808 mlx5_core_destroy_cq(mdev, &cq->mcq); 809 mlx5_wq_destroy(&cq->wq_ctrl); 810 kfree(cq); 811 } 812 813 static int 814 dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey) 815 { 816 u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; 817 void *mkc; 818 819 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 820 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 821 MLX5_SET(mkc, mkc, a, 1); 822 MLX5_SET(mkc, mkc, rw, 1); 823 MLX5_SET(mkc, mkc, rr, 1); 824 MLX5_SET(mkc, mkc, lw, 1); 825 MLX5_SET(mkc, mkc, lr, 1); 826 827 MLX5_SET(mkc, mkc, pd, pdn); 828 MLX5_SET(mkc, mkc, length64, 1); 829 MLX5_SET(mkc, mkc, qpn, 0xffffff); 830 831 return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in)); 832 } 833 834 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev, 835 u32 pdn, void *buf, size_t size) 836 { 837 struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL); 838 struct device *dma_device; 839 dma_addr_t dma_addr; 840 int err; 841 842 if (!mr) 843 return NULL; 844 845 dma_device = mlx5_core_dma_dev(mdev); 846 dma_addr = dma_map_single(dma_device, buf, size, 847 DMA_BIDIRECTIONAL); 848 err = dma_mapping_error(dma_device, dma_addr); 849 if (err) { 850 mlx5_core_warn(mdev, "Can't dma buf\n"); 851 kfree(mr); 852 return NULL; 853 } 854 855 err = dr_create_mkey(mdev, pdn, &mr->mkey); 856 if (err) { 857 mlx5_core_warn(mdev, "Can't create mkey\n"); 858 dma_unmap_single(dma_device, dma_addr, size, 859 DMA_BIDIRECTIONAL); 860 kfree(mr); 861 return NULL; 862 } 863 864 mr->dma_addr = dma_addr; 865 mr->size = size; 866 mr->addr = buf; 867 868 return mr; 869 } 870 871 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr) 872 { 873 mlx5_core_destroy_mkey(mdev, &mr->mkey); 874 dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size, 875 DMA_BIDIRECTIONAL); 876 kfree(mr); 877 } 878 879 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) 880 { 881 struct dr_qp_init_attr init_attr = {}; 882 int cq_size; 883 int size; 884 int ret; 885 886 dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL); 887 if (!dmn->send_ring) 888 return -ENOMEM; 889 890 cq_size = QUEUE_SIZE + 1; 891 dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size); 892 if (!dmn->send_ring->cq) { 893 mlx5dr_err(dmn, "Failed creating CQ\n"); 894 ret = -ENOMEM; 895 goto free_send_ring; 896 } 897 898 init_attr.cqn = dmn->send_ring->cq->mcq.cqn; 899 init_attr.pdn = dmn->pdn; 900 init_attr.uar = dmn->uar; 901 init_attr.max_send_wr = QUEUE_SIZE; 902 spin_lock_init(&dmn->send_ring->lock); 903 904 dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); 905 if (!dmn->send_ring->qp) { 906 mlx5dr_err(dmn, "Failed creating QP\n"); 907 ret = -ENOMEM; 908 goto clean_cq; 909 } 910 911 dmn->send_ring->cq->qp = dmn->send_ring->qp; 912 913 dmn->info.max_send_wr = QUEUE_SIZE; 914 dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, 915 DR_STE_SIZE); 916 917 dmn->send_ring->signal_th = dmn->info.max_send_wr / 918 SIGNAL_PER_DIV_QUEUE; 919 920 /* Prepare qp to be used */ 921 ret = dr_prepare_qp_to_rts(dmn); 922 if (ret) 923 goto clean_qp; 924 925 dmn->send_ring->max_post_send_size = 926 mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K, 927 DR_ICM_TYPE_STE); 928 929 /* Allocating the max size as a buffer for writing */ 930 size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size; 931 dmn->send_ring->buf = kzalloc(size, GFP_KERNEL); 932 if (!dmn->send_ring->buf) { 933 ret = -ENOMEM; 934 goto clean_qp; 935 } 936 937 dmn->send_ring->buf_size = size; 938 939 dmn->send_ring->mr = dr_reg_mr(dmn->mdev, 940 dmn->pdn, dmn->send_ring->buf, size); 941 if (!dmn->send_ring->mr) { 942 ret = -ENOMEM; 943 goto free_mem; 944 } 945 946 dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev, 947 dmn->pdn, dmn->send_ring->sync_buff, 948 MIN_READ_SYNC); 949 if (!dmn->send_ring->sync_mr) { 950 ret = -ENOMEM; 951 goto clean_mr; 952 } 953 954 return 0; 955 956 clean_mr: 957 dr_dereg_mr(dmn->mdev, dmn->send_ring->mr); 958 free_mem: 959 kfree(dmn->send_ring->buf); 960 clean_qp: 961 dr_destroy_qp(dmn->mdev, dmn->send_ring->qp); 962 clean_cq: 963 dr_destroy_cq(dmn->mdev, dmn->send_ring->cq); 964 free_send_ring: 965 kfree(dmn->send_ring); 966 967 return ret; 968 } 969 970 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, 971 struct mlx5dr_send_ring *send_ring) 972 { 973 dr_destroy_qp(dmn->mdev, send_ring->qp); 974 dr_destroy_cq(dmn->mdev, send_ring->cq); 975 dr_dereg_mr(dmn->mdev, send_ring->sync_mr); 976 dr_dereg_mr(dmn->mdev, send_ring->mr); 977 kfree(send_ring->buf); 978 kfree(send_ring); 979 } 980 981 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) 982 { 983 struct mlx5dr_send_ring *send_ring = dmn->send_ring; 984 struct postsend_info send_info = {}; 985 u8 data[DR_STE_SIZE]; 986 int num_of_sends_req; 987 int ret; 988 int i; 989 990 /* Sending this amount of requests makes sure we will get drain */ 991 num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; 992 993 /* Send fake requests forcing the last to be signaled */ 994 send_info.write.addr = (uintptr_t)data; 995 send_info.write.length = DR_STE_SIZE; 996 send_info.write.lkey = 0; 997 /* Using the sync_mr in order to write/read */ 998 send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; 999 send_info.rkey = send_ring->sync_mr->mkey.key; 1000 1001 for (i = 0; i < num_of_sends_req; i++) { 1002 ret = dr_postsend_icm_data(dmn, &send_info); 1003 if (ret) 1004 return ret; 1005 } 1006 1007 spin_lock(&send_ring->lock); 1008 ret = dr_handle_pending_wc(dmn, send_ring); 1009 spin_unlock(&send_ring->lock); 1010 1011 return ret; 1012 } 1013