1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <linux/smp.h> 5 #include "dr_types.h" 6 7 #define QUEUE_SIZE 128 8 #define SIGNAL_PER_DIV_QUEUE 16 9 #define TH_NUMS_TO_DRAIN 2 10 11 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; 12 13 struct dr_data_seg { 14 u64 addr; 15 u32 length; 16 u32 lkey; 17 unsigned int send_flags; 18 }; 19 20 struct postsend_info { 21 struct dr_data_seg write; 22 struct dr_data_seg read; 23 u64 remote_addr; 24 u32 rkey; 25 }; 26 27 struct dr_qp_rtr_attr { 28 struct mlx5dr_cmd_gid_attr dgid_attr; 29 enum ib_mtu mtu; 30 u32 qp_num; 31 u16 port_num; 32 u8 min_rnr_timer; 33 u8 sgid_index; 34 u16 udp_src_port; 35 }; 36 37 struct dr_qp_rts_attr { 38 u8 timeout; 39 u8 retry_cnt; 40 u8 rnr_retry; 41 }; 42 43 struct dr_qp_init_attr { 44 u32 cqn; 45 u32 pdn; 46 u32 max_send_wr; 47 struct mlx5_uars_page *uar; 48 }; 49 50 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64) 51 { 52 unsigned int idx; 53 u8 opcode; 54 55 opcode = get_cqe_opcode(cqe64); 56 if (opcode == MLX5_CQE_REQ_ERR) { 57 idx = be16_to_cpu(cqe64->wqe_counter) & 58 (dr_cq->qp->sq.wqe_cnt - 1); 59 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; 60 } else if (opcode == MLX5_CQE_RESP_ERR) { 61 ++dr_cq->qp->sq.cc; 62 } else { 63 idx = be16_to_cpu(cqe64->wqe_counter) & 64 (dr_cq->qp->sq.wqe_cnt - 1); 65 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; 66 67 return CQ_OK; 68 } 69 70 return CQ_POLL_ERR; 71 } 72 73 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq) 74 { 75 struct mlx5_cqe64 *cqe64; 76 int err; 77 78 cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq); 79 if (!cqe64) 80 return CQ_EMPTY; 81 82 mlx5_cqwq_pop(&dr_cq->wq); 83 err = dr_parse_cqe(dr_cq, cqe64); 84 mlx5_cqwq_update_db_record(&dr_cq->wq); 85 86 return err; 87 } 88 89 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) 90 { 91 int npolled; 92 int err = 0; 93 94 for (npolled = 0; npolled < ne; ++npolled) { 95 err = dr_cq_poll_one(dr_cq); 96 if (err != CQ_OK) 97 break; 98 } 99 100 return err == CQ_POLL_ERR ? err : npolled; 101 } 102 103 static void dr_qp_event(struct mlx5_core_qp *mqp, int event) 104 { 105 pr_info("DR QP event %u on QP #%u\n", event, mqp->qpn); 106 } 107 108 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, 109 struct dr_qp_init_attr *attr) 110 { 111 u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; 112 struct mlx5_wq_param wqp; 113 struct mlx5dr_qp *dr_qp; 114 int inlen; 115 void *qpc; 116 void *in; 117 int err; 118 119 dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL); 120 if (!dr_qp) 121 return NULL; 122 123 wqp.buf_numa_node = mdev->priv.numa_node; 124 wqp.db_numa_node = mdev->priv.numa_node; 125 126 dr_qp->rq.pc = 0; 127 dr_qp->rq.cc = 0; 128 dr_qp->rq.wqe_cnt = 4; 129 dr_qp->sq.pc = 0; 130 dr_qp->sq.cc = 0; 131 dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr); 132 133 MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); 134 MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); 135 MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); 136 err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq, 137 &dr_qp->wq_ctrl); 138 if (err) { 139 mlx5_core_warn(mdev, "Can't create QP WQ\n"); 140 goto err_wq; 141 } 142 143 dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt, 144 sizeof(dr_qp->sq.wqe_head[0]), 145 GFP_KERNEL); 146 147 if (!dr_qp->sq.wqe_head) { 148 mlx5_core_warn(mdev, "Can't allocate wqe head\n"); 149 goto err_wqe_head; 150 } 151 152 inlen = MLX5_ST_SZ_BYTES(create_qp_in) + 153 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * 154 dr_qp->wq_ctrl.buf.npages; 155 in = kvzalloc(inlen, GFP_KERNEL); 156 if (!in) { 157 err = -ENOMEM; 158 goto err_in; 159 } 160 161 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 162 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 163 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 164 MLX5_SET(qpc, qpc, pd, attr->pdn); 165 MLX5_SET(qpc, qpc, uar_page, attr->uar->index); 166 MLX5_SET(qpc, qpc, log_page_size, 167 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 168 MLX5_SET(qpc, qpc, fre, 1); 169 MLX5_SET(qpc, qpc, rlky, 1); 170 MLX5_SET(qpc, qpc, cqn_snd, attr->cqn); 171 MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn); 172 MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); 173 MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); 174 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 175 MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); 176 MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma); 177 if (MLX5_CAP_GEN(mdev, cqe_version) == 1) 178 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); 179 mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf, 180 (__be64 *)MLX5_ADDR_OF(create_qp_in, 181 in, pas)); 182 183 err = mlx5_core_create_qp(mdev, &dr_qp->mqp, in, inlen); 184 kfree(in); 185 186 if (err) { 187 mlx5_core_warn(mdev, " Can't create QP\n"); 188 goto err_in; 189 } 190 dr_qp->mqp.event = dr_qp_event; 191 dr_qp->uar = attr->uar; 192 193 return dr_qp; 194 195 err_in: 196 kfree(dr_qp->sq.wqe_head); 197 err_wqe_head: 198 mlx5_wq_destroy(&dr_qp->wq_ctrl); 199 err_wq: 200 kfree(dr_qp); 201 return NULL; 202 } 203 204 static void dr_destroy_qp(struct mlx5_core_dev *mdev, 205 struct mlx5dr_qp *dr_qp) 206 { 207 mlx5_core_destroy_qp(mdev, &dr_qp->mqp); 208 kfree(dr_qp->sq.wqe_head); 209 mlx5_wq_destroy(&dr_qp->wq_ctrl); 210 kfree(dr_qp); 211 } 212 213 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) 214 { 215 dma_wmb(); 216 *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff); 217 218 /* After wmb() the hw aware of new work */ 219 wmb(); 220 221 mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET); 222 } 223 224 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, 225 u32 rkey, struct dr_data_seg *data_seg, 226 u32 opcode, int nreq) 227 { 228 struct mlx5_wqe_raddr_seg *wq_raddr; 229 struct mlx5_wqe_ctrl_seg *wq_ctrl; 230 struct mlx5_wqe_data_seg *wq_dseg; 231 unsigned int size; 232 unsigned int idx; 233 234 size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 + 235 sizeof(*wq_raddr) / 16; 236 237 idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1); 238 239 wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); 240 wq_ctrl->imm = 0; 241 wq_ctrl->fm_ce_se = (data_seg->send_flags) ? 242 MLX5_WQE_CTRL_CQ_UPDATE : 0; 243 wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) | 244 opcode); 245 wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->mqp.qpn << 8); 246 wq_raddr = (void *)(wq_ctrl + 1); 247 wq_raddr->raddr = cpu_to_be64(remote_addr); 248 wq_raddr->rkey = cpu_to_be32(rkey); 249 wq_raddr->reserved = 0; 250 251 wq_dseg = (void *)(wq_raddr + 1); 252 wq_dseg->byte_count = cpu_to_be32(data_seg->length); 253 wq_dseg->lkey = cpu_to_be32(data_seg->lkey); 254 wq_dseg->addr = cpu_to_be64(data_seg->addr); 255 256 dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++; 257 258 if (nreq) 259 dr_cmd_notify_hw(dr_qp, wq_ctrl); 260 } 261 262 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info) 263 { 264 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, 265 &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0); 266 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, 267 &send_info->read, MLX5_OPCODE_RDMA_READ, 1); 268 } 269 270 /** 271 * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent 272 * with send_list parameters: 273 * 274 * @ste: The data that attached to this specific ste 275 * @size: of data to write 276 * @offset: of the data from start of the hw_ste entry 277 * @data: data 278 * @ste_info: ste to be sent with send_list 279 * @send_list: to append into it 280 * @copy_data: if true indicates that the data should be kept because 281 * it's not backuped any where (like in re-hash). 282 * if false, it lets the data to be updated after 283 * it was added to the list. 284 */ 285 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, 286 u16 offset, u8 *data, 287 struct mlx5dr_ste_send_info *ste_info, 288 struct list_head *send_list, 289 bool copy_data) 290 { 291 ste_info->size = size; 292 ste_info->ste = ste; 293 ste_info->offset = offset; 294 295 if (copy_data) { 296 memcpy(ste_info->data_cont, data, size); 297 ste_info->data = ste_info->data_cont; 298 } else { 299 ste_info->data = data; 300 } 301 302 list_add_tail(&ste_info->send_list, send_list); 303 } 304 305 /* The function tries to consume one wc each time, unless the queue is full, in 306 * that case, which means that the hw is behind the sw in a full queue len 307 * the function will drain the cq till it empty. 308 */ 309 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn, 310 struct mlx5dr_send_ring *send_ring) 311 { 312 bool is_drain = false; 313 int ne; 314 315 if (send_ring->pending_wqe < send_ring->signal_th) 316 return 0; 317 318 /* Queue is full start drain it */ 319 if (send_ring->pending_wqe >= 320 dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN) 321 is_drain = true; 322 323 do { 324 ne = dr_poll_cq(send_ring->cq, 1); 325 if (ne < 0) 326 return ne; 327 else if (ne == 1) 328 send_ring->pending_wqe -= send_ring->signal_th; 329 } while (is_drain && send_ring->pending_wqe); 330 331 return 0; 332 } 333 334 static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring, 335 struct postsend_info *send_info) 336 { 337 send_ring->pending_wqe++; 338 339 if (send_ring->pending_wqe % send_ring->signal_th == 0) 340 send_info->write.send_flags |= IB_SEND_SIGNALED; 341 342 send_ring->pending_wqe++; 343 send_info->read.length = send_info->write.length; 344 /* Read into the same write area */ 345 send_info->read.addr = (uintptr_t)send_info->write.addr; 346 send_info->read.lkey = send_ring->mr->mkey.key; 347 348 if (send_ring->pending_wqe % send_ring->signal_th == 0) 349 send_info->read.send_flags = IB_SEND_SIGNALED; 350 else 351 send_info->read.send_flags = 0; 352 } 353 354 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, 355 struct postsend_info *send_info) 356 { 357 struct mlx5dr_send_ring *send_ring = dmn->send_ring; 358 u32 buff_offset; 359 int ret; 360 361 ret = dr_handle_pending_wc(dmn, send_ring); 362 if (ret) 363 return ret; 364 365 if (send_info->write.length > dmn->info.max_inline_size) { 366 buff_offset = (send_ring->tx_head & 367 (dmn->send_ring->signal_th - 1)) * 368 send_ring->max_post_send_size; 369 /* Copy to ring mr */ 370 memcpy(send_ring->buf + buff_offset, 371 (void *)(uintptr_t)send_info->write.addr, 372 send_info->write.length); 373 send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset; 374 send_info->write.lkey = send_ring->mr->mkey.key; 375 } 376 377 send_ring->tx_head++; 378 dr_fill_data_segs(send_ring, send_info); 379 dr_post_send(send_ring->qp, send_info); 380 381 return 0; 382 } 383 384 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, 385 struct mlx5dr_ste_htbl *htbl, 386 u8 **data, 387 u32 *byte_size, 388 int *iterations, 389 int *num_stes) 390 { 391 int alloc_size; 392 393 if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) { 394 *iterations = htbl->chunk->byte_size / 395 dmn->send_ring->max_post_send_size; 396 *byte_size = dmn->send_ring->max_post_send_size; 397 alloc_size = *byte_size; 398 *num_stes = *byte_size / DR_STE_SIZE; 399 } else { 400 *iterations = 1; 401 *num_stes = htbl->chunk->num_of_entries; 402 alloc_size = *num_stes * DR_STE_SIZE; 403 } 404 405 *data = kzalloc(alloc_size, GFP_KERNEL); 406 if (!*data) 407 return -ENOMEM; 408 409 return 0; 410 } 411 412 /** 413 * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm. 414 * 415 * @dmn: Domain 416 * @ste: The ste struct that contains the data (at 417 * least part of it) 418 * @data: The real data to send size data 419 * @size: for writing. 420 * @offset: The offset from the icm mapped data to 421 * start write to this for write only part of the 422 * buffer. 423 * 424 * Return: 0 on success. 425 */ 426 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, 427 u8 *data, u16 size, u16 offset) 428 { 429 struct postsend_info send_info = {}; 430 431 send_info.write.addr = (uintptr_t)data; 432 send_info.write.length = size; 433 send_info.write.lkey = 0; 434 send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset; 435 send_info.rkey = ste->htbl->chunk->rkey; 436 437 return dr_postsend_icm_data(dmn, &send_info); 438 } 439 440 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, 441 struct mlx5dr_ste_htbl *htbl, 442 u8 *formatted_ste, u8 *mask) 443 { 444 u32 byte_size = htbl->chunk->byte_size; 445 int num_stes_per_iter; 446 int iterations; 447 u8 *data; 448 int ret; 449 int i; 450 int j; 451 452 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, 453 &iterations, &num_stes_per_iter); 454 if (ret) 455 return ret; 456 457 /* Send the data iteration times */ 458 for (i = 0; i < iterations; i++) { 459 u32 ste_index = i * (byte_size / DR_STE_SIZE); 460 struct postsend_info send_info = {}; 461 462 /* Copy all ste's on the data buffer 463 * need to add the bit_mask 464 */ 465 for (j = 0; j < num_stes_per_iter; j++) { 466 u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste; 467 u32 ste_off = j * DR_STE_SIZE; 468 469 if (mlx5dr_ste_is_not_valid_entry(hw_ste)) { 470 memcpy(data + ste_off, 471 formatted_ste, DR_STE_SIZE); 472 } else { 473 /* Copy data */ 474 memcpy(data + ste_off, 475 htbl->ste_arr[ste_index + j].hw_ste, 476 DR_STE_SIZE_REDUCED); 477 /* Copy bit_mask */ 478 memcpy(data + ste_off + DR_STE_SIZE_REDUCED, 479 mask, DR_STE_SIZE_MASK); 480 } 481 } 482 483 send_info.write.addr = (uintptr_t)data; 484 send_info.write.length = byte_size; 485 send_info.write.lkey = 0; 486 send_info.remote_addr = 487 mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); 488 send_info.rkey = htbl->chunk->rkey; 489 490 ret = dr_postsend_icm_data(dmn, &send_info); 491 if (ret) 492 goto out_free; 493 } 494 495 out_free: 496 kfree(data); 497 return ret; 498 } 499 500 /* Initialize htble with default STEs */ 501 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, 502 struct mlx5dr_ste_htbl *htbl, 503 u8 *ste_init_data, 504 bool update_hw_ste) 505 { 506 u32 byte_size = htbl->chunk->byte_size; 507 int iterations; 508 int num_stes; 509 u8 *data; 510 int ret; 511 int i; 512 513 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, 514 &iterations, &num_stes); 515 if (ret) 516 return ret; 517 518 for (i = 0; i < num_stes; i++) { 519 u8 *copy_dst; 520 521 /* Copy the same ste on the data buffer */ 522 copy_dst = data + i * DR_STE_SIZE; 523 memcpy(copy_dst, ste_init_data, DR_STE_SIZE); 524 525 if (update_hw_ste) { 526 /* Copy the reduced ste to hash table ste_arr */ 527 copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED; 528 memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED); 529 } 530 } 531 532 /* Send the data iteration times */ 533 for (i = 0; i < iterations; i++) { 534 u8 ste_index = i * (byte_size / DR_STE_SIZE); 535 struct postsend_info send_info = {}; 536 537 send_info.write.addr = (uintptr_t)data; 538 send_info.write.length = byte_size; 539 send_info.write.lkey = 0; 540 send_info.remote_addr = 541 mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); 542 send_info.rkey = htbl->chunk->rkey; 543 544 ret = dr_postsend_icm_data(dmn, &send_info); 545 if (ret) 546 goto out_free; 547 } 548 549 out_free: 550 kfree(data); 551 return ret; 552 } 553 554 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, 555 struct mlx5dr_action *action) 556 { 557 struct postsend_info send_info = {}; 558 int ret; 559 560 send_info.write.addr = (uintptr_t)action->rewrite.data; 561 send_info.write.length = action->rewrite.chunk->byte_size; 562 send_info.write.lkey = 0; 563 send_info.remote_addr = action->rewrite.chunk->mr_addr; 564 send_info.rkey = action->rewrite.chunk->rkey; 565 566 mutex_lock(&dmn->mutex); 567 ret = dr_postsend_icm_data(dmn, &send_info); 568 mutex_unlock(&dmn->mutex); 569 570 return ret; 571 } 572 573 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev, 574 struct mlx5dr_qp *dr_qp, 575 int port) 576 { 577 u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; 578 void *qpc; 579 580 qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); 581 582 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port); 583 MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); 584 MLX5_SET(qpc, qpc, rre, 1); 585 MLX5_SET(qpc, qpc, rwe, 1); 586 587 return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc, 588 &dr_qp->mqp); 589 } 590 591 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev, 592 struct mlx5dr_qp *dr_qp, 593 struct dr_qp_rts_attr *attr) 594 { 595 u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; 596 void *qpc; 597 598 qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc); 599 600 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn); 601 602 MLX5_SET(qpc, qpc, log_ack_req_freq, 0); 603 MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt); 604 MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry); 605 606 return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, qpc, 607 &dr_qp->mqp); 608 } 609 610 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, 611 struct mlx5dr_qp *dr_qp, 612 struct dr_qp_rtr_attr *attr) 613 { 614 u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; 615 void *qpc; 616 617 qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc); 618 619 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn); 620 621 MLX5_SET(qpc, qpc, mtu, attr->mtu); 622 MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1); 623 MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num); 624 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), 625 attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac)); 626 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), 627 attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid)); 628 MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, 629 attr->sgid_index); 630 631 if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2) 632 MLX5_SET(qpc, qpc, primary_address_path.udp_sport, 633 attr->udp_src_port); 634 635 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num); 636 MLX5_SET(qpc, qpc, min_rnr_nak, 1); 637 638 return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc, 639 &dr_qp->mqp); 640 } 641 642 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) 643 { 644 struct mlx5dr_qp *dr_qp = dmn->send_ring->qp; 645 struct dr_qp_rts_attr rts_attr = {}; 646 struct dr_qp_rtr_attr rtr_attr = {}; 647 enum ib_mtu mtu = IB_MTU_1024; 648 u16 gid_index = 0; 649 int port = 1; 650 int ret; 651 652 /* Init */ 653 ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port); 654 if (ret) { 655 mlx5dr_err(dmn, "Failed modify QP rst2init\n"); 656 return ret; 657 } 658 659 /* RTR */ 660 ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr); 661 if (ret) 662 return ret; 663 664 rtr_attr.mtu = mtu; 665 rtr_attr.qp_num = dr_qp->mqp.qpn; 666 rtr_attr.min_rnr_timer = 12; 667 rtr_attr.port_num = port; 668 rtr_attr.sgid_index = gid_index; 669 rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; 670 671 ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); 672 if (ret) { 673 mlx5dr_err(dmn, "Failed modify QP init2rtr\n"); 674 return ret; 675 } 676 677 /* RTS */ 678 rts_attr.timeout = 14; 679 rts_attr.retry_cnt = 7; 680 rts_attr.rnr_retry = 7; 681 682 ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr); 683 if (ret) { 684 mlx5dr_err(dmn, "Failed modify QP rtr2rts\n"); 685 return ret; 686 } 687 688 return 0; 689 } 690 691 static void dr_cq_event(struct mlx5_core_cq *mcq, 692 enum mlx5_event event) 693 { 694 pr_info("CQ event %u on CQ #%u\n", event, mcq->cqn); 695 } 696 697 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, 698 struct mlx5_uars_page *uar, 699 size_t ncqe) 700 { 701 u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {}; 702 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 703 struct mlx5_wq_param wqp; 704 struct mlx5_cqe64 *cqe; 705 struct mlx5dr_cq *cq; 706 int inlen, err, eqn; 707 unsigned int irqn; 708 void *cqc, *in; 709 __be64 *pas; 710 int vector; 711 u32 i; 712 713 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 714 if (!cq) 715 return NULL; 716 717 ncqe = roundup_pow_of_two(ncqe); 718 MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe)); 719 720 wqp.buf_numa_node = mdev->priv.numa_node; 721 wqp.db_numa_node = mdev->priv.numa_node; 722 723 err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq, 724 &cq->wq_ctrl); 725 if (err) 726 goto out; 727 728 for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { 729 cqe = mlx5_cqwq_get_wqe(&cq->wq, i); 730 cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; 731 } 732 733 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 734 sizeof(u64) * cq->wq_ctrl.buf.npages; 735 in = kvzalloc(inlen, GFP_KERNEL); 736 if (!in) 737 goto err_cqwq; 738 739 vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); 740 err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn); 741 if (err) { 742 kvfree(in); 743 goto err_cqwq; 744 } 745 746 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 747 MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); 748 MLX5_SET(cqc, cqc, c_eqn, eqn); 749 MLX5_SET(cqc, cqc, uar_page, uar->index); 750 MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - 751 MLX5_ADAPTER_PAGE_SHIFT); 752 MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); 753 754 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 755 mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); 756 757 cq->mcq.event = dr_cq_event; 758 759 err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); 760 kvfree(in); 761 762 if (err) 763 goto err_cqwq; 764 765 cq->mcq.cqe_sz = 64; 766 cq->mcq.set_ci_db = cq->wq_ctrl.db.db; 767 cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; 768 *cq->mcq.set_ci_db = 0; 769 *cq->mcq.arm_db = 0; 770 cq->mcq.vector = 0; 771 cq->mcq.irqn = irqn; 772 cq->mcq.uar = uar; 773 774 return cq; 775 776 err_cqwq: 777 mlx5_wq_destroy(&cq->wq_ctrl); 778 out: 779 kfree(cq); 780 return NULL; 781 } 782 783 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq) 784 { 785 mlx5_core_destroy_cq(mdev, &cq->mcq); 786 mlx5_wq_destroy(&cq->wq_ctrl); 787 kfree(cq); 788 } 789 790 static int 791 dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey) 792 { 793 u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; 794 void *mkc; 795 796 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 797 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 798 MLX5_SET(mkc, mkc, a, 1); 799 MLX5_SET(mkc, mkc, rw, 1); 800 MLX5_SET(mkc, mkc, rr, 1); 801 MLX5_SET(mkc, mkc, lw, 1); 802 MLX5_SET(mkc, mkc, lr, 1); 803 804 MLX5_SET(mkc, mkc, pd, pdn); 805 MLX5_SET(mkc, mkc, length64, 1); 806 MLX5_SET(mkc, mkc, qpn, 0xffffff); 807 808 return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in)); 809 } 810 811 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev, 812 u32 pdn, void *buf, size_t size) 813 { 814 struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL); 815 struct device *dma_device; 816 dma_addr_t dma_addr; 817 int err; 818 819 if (!mr) 820 return NULL; 821 822 dma_device = &mdev->pdev->dev; 823 dma_addr = dma_map_single(dma_device, buf, size, 824 DMA_BIDIRECTIONAL); 825 err = dma_mapping_error(dma_device, dma_addr); 826 if (err) { 827 mlx5_core_warn(mdev, "Can't dma buf\n"); 828 kfree(mr); 829 return NULL; 830 } 831 832 err = dr_create_mkey(mdev, pdn, &mr->mkey); 833 if (err) { 834 mlx5_core_warn(mdev, "Can't create mkey\n"); 835 dma_unmap_single(dma_device, dma_addr, size, 836 DMA_BIDIRECTIONAL); 837 kfree(mr); 838 return NULL; 839 } 840 841 mr->dma_addr = dma_addr; 842 mr->size = size; 843 mr->addr = buf; 844 845 return mr; 846 } 847 848 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr) 849 { 850 mlx5_core_destroy_mkey(mdev, &mr->mkey); 851 dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size, 852 DMA_BIDIRECTIONAL); 853 kfree(mr); 854 } 855 856 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) 857 { 858 struct dr_qp_init_attr init_attr = {}; 859 int cq_size; 860 int size; 861 int ret; 862 863 dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL); 864 if (!dmn->send_ring) 865 return -ENOMEM; 866 867 cq_size = QUEUE_SIZE + 1; 868 dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size); 869 if (!dmn->send_ring->cq) { 870 mlx5dr_err(dmn, "Failed creating CQ\n"); 871 ret = -ENOMEM; 872 goto free_send_ring; 873 } 874 875 init_attr.cqn = dmn->send_ring->cq->mcq.cqn; 876 init_attr.pdn = dmn->pdn; 877 init_attr.uar = dmn->uar; 878 init_attr.max_send_wr = QUEUE_SIZE; 879 880 dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); 881 if (!dmn->send_ring->qp) { 882 mlx5dr_err(dmn, "Failed creating QP\n"); 883 ret = -ENOMEM; 884 goto clean_cq; 885 } 886 887 dmn->send_ring->cq->qp = dmn->send_ring->qp; 888 889 dmn->info.max_send_wr = QUEUE_SIZE; 890 dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, 891 DR_STE_SIZE); 892 893 dmn->send_ring->signal_th = dmn->info.max_send_wr / 894 SIGNAL_PER_DIV_QUEUE; 895 896 /* Prepare qp to be used */ 897 ret = dr_prepare_qp_to_rts(dmn); 898 if (ret) 899 goto clean_qp; 900 901 dmn->send_ring->max_post_send_size = 902 mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K, 903 DR_ICM_TYPE_STE); 904 905 /* Allocating the max size as a buffer for writing */ 906 size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size; 907 dmn->send_ring->buf = kzalloc(size, GFP_KERNEL); 908 if (!dmn->send_ring->buf) { 909 ret = -ENOMEM; 910 goto clean_qp; 911 } 912 913 dmn->send_ring->buf_size = size; 914 915 dmn->send_ring->mr = dr_reg_mr(dmn->mdev, 916 dmn->pdn, dmn->send_ring->buf, size); 917 if (!dmn->send_ring->mr) { 918 ret = -ENOMEM; 919 goto free_mem; 920 } 921 922 dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev, 923 dmn->pdn, dmn->send_ring->sync_buff, 924 MIN_READ_SYNC); 925 if (!dmn->send_ring->sync_mr) { 926 ret = -ENOMEM; 927 goto clean_mr; 928 } 929 930 return 0; 931 932 clean_mr: 933 dr_dereg_mr(dmn->mdev, dmn->send_ring->mr); 934 free_mem: 935 kfree(dmn->send_ring->buf); 936 clean_qp: 937 dr_destroy_qp(dmn->mdev, dmn->send_ring->qp); 938 clean_cq: 939 dr_destroy_cq(dmn->mdev, dmn->send_ring->cq); 940 free_send_ring: 941 kfree(dmn->send_ring); 942 943 return ret; 944 } 945 946 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, 947 struct mlx5dr_send_ring *send_ring) 948 { 949 dr_destroy_qp(dmn->mdev, send_ring->qp); 950 dr_destroy_cq(dmn->mdev, send_ring->cq); 951 dr_dereg_mr(dmn->mdev, send_ring->sync_mr); 952 dr_dereg_mr(dmn->mdev, send_ring->mr); 953 kfree(send_ring->buf); 954 kfree(send_ring); 955 } 956 957 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) 958 { 959 struct mlx5dr_send_ring *send_ring = dmn->send_ring; 960 struct postsend_info send_info = {}; 961 u8 data[DR_STE_SIZE]; 962 int num_of_sends_req; 963 int ret; 964 int i; 965 966 /* Sending this amount of requests makes sure we will get drain */ 967 num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; 968 969 /* Send fake requests forcing the last to be signaled */ 970 send_info.write.addr = (uintptr_t)data; 971 send_info.write.length = DR_STE_SIZE; 972 send_info.write.lkey = 0; 973 /* Using the sync_mr in order to write/read */ 974 send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; 975 send_info.rkey = send_ring->sync_mr->mkey.key; 976 977 for (i = 0; i < num_of_sends_req; i++) { 978 ret = dr_postsend_icm_data(dmn, &send_info); 979 if (ret) 980 return ret; 981 } 982 983 ret = dr_handle_pending_wc(dmn, send_ring); 984 985 return ret; 986 } 987