1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <linux/smp.h> 5 #include "dr_types.h" 6 7 #define QUEUE_SIZE 128 8 #define SIGNAL_PER_DIV_QUEUE 16 9 #define TH_NUMS_TO_DRAIN 2 10 11 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; 12 13 struct dr_data_seg { 14 u64 addr; 15 u32 length; 16 u32 lkey; 17 unsigned int send_flags; 18 }; 19 20 struct postsend_info { 21 struct dr_data_seg write; 22 struct dr_data_seg read; 23 u64 remote_addr; 24 u32 rkey; 25 }; 26 27 struct dr_qp_rtr_attr { 28 struct mlx5dr_cmd_gid_attr dgid_attr; 29 enum ib_mtu mtu; 30 u32 qp_num; 31 u16 port_num; 32 u8 min_rnr_timer; 33 u8 sgid_index; 34 u16 udp_src_port; 35 }; 36 37 struct dr_qp_rts_attr { 38 u8 timeout; 39 u8 retry_cnt; 40 u8 rnr_retry; 41 }; 42 43 struct dr_qp_init_attr { 44 u32 cqn; 45 u32 pdn; 46 u32 max_send_wr; 47 struct mlx5_uars_page *uar; 48 }; 49 50 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64) 51 { 52 unsigned int idx; 53 u8 opcode; 54 55 opcode = get_cqe_opcode(cqe64); 56 if (opcode == MLX5_CQE_REQ_ERR) { 57 idx = be16_to_cpu(cqe64->wqe_counter) & 58 (dr_cq->qp->sq.wqe_cnt - 1); 59 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; 60 } else if (opcode == MLX5_CQE_RESP_ERR) { 61 ++dr_cq->qp->sq.cc; 62 } else { 63 idx = be16_to_cpu(cqe64->wqe_counter) & 64 (dr_cq->qp->sq.wqe_cnt - 1); 65 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; 66 67 return CQ_OK; 68 } 69 70 return CQ_POLL_ERR; 71 } 72 73 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq) 74 { 75 struct mlx5_cqe64 *cqe64; 76 int err; 77 78 cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq); 79 if (!cqe64) 80 return CQ_EMPTY; 81 82 mlx5_cqwq_pop(&dr_cq->wq); 83 err = dr_parse_cqe(dr_cq, cqe64); 84 mlx5_cqwq_update_db_record(&dr_cq->wq); 85 86 return err; 87 } 88 89 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) 90 { 91 int npolled; 92 int err = 0; 93 94 for (npolled = 0; npolled < ne; ++npolled) { 95 err = dr_cq_poll_one(dr_cq); 96 if (err != CQ_OK) 97 break; 98 } 99 100 return err == CQ_POLL_ERR ? err : npolled; 101 } 102 103 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, 104 struct dr_qp_init_attr *attr) 105 { 106 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 107 u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; 108 struct mlx5_wq_param wqp; 109 struct mlx5dr_qp *dr_qp; 110 int inlen; 111 void *qpc; 112 void *in; 113 int err; 114 115 dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL); 116 if (!dr_qp) 117 return NULL; 118 119 wqp.buf_numa_node = mdev->priv.numa_node; 120 wqp.db_numa_node = mdev->priv.numa_node; 121 122 dr_qp->rq.pc = 0; 123 dr_qp->rq.cc = 0; 124 dr_qp->rq.wqe_cnt = 4; 125 dr_qp->sq.pc = 0; 126 dr_qp->sq.cc = 0; 127 dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr); 128 129 MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); 130 MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); 131 MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); 132 err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq, 133 &dr_qp->wq_ctrl); 134 if (err) { 135 mlx5_core_warn(mdev, "Can't create QP WQ\n"); 136 goto err_wq; 137 } 138 139 dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt, 140 sizeof(dr_qp->sq.wqe_head[0]), 141 GFP_KERNEL); 142 143 if (!dr_qp->sq.wqe_head) { 144 mlx5_core_warn(mdev, "Can't allocate wqe head\n"); 145 goto err_wqe_head; 146 } 147 148 inlen = MLX5_ST_SZ_BYTES(create_qp_in) + 149 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * 150 dr_qp->wq_ctrl.buf.npages; 151 in = kvzalloc(inlen, GFP_KERNEL); 152 if (!in) { 153 err = -ENOMEM; 154 goto err_in; 155 } 156 157 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 158 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 159 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 160 MLX5_SET(qpc, qpc, pd, attr->pdn); 161 MLX5_SET(qpc, qpc, uar_page, attr->uar->index); 162 MLX5_SET(qpc, qpc, log_page_size, 163 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 164 MLX5_SET(qpc, qpc, fre, 1); 165 MLX5_SET(qpc, qpc, rlky, 1); 166 MLX5_SET(qpc, qpc, cqn_snd, attr->cqn); 167 MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn); 168 MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); 169 MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); 170 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 171 MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); 172 MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma); 173 if (MLX5_CAP_GEN(mdev, cqe_version) == 1) 174 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); 175 mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf, 176 (__be64 *)MLX5_ADDR_OF(create_qp_in, 177 in, pas)); 178 179 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 180 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 181 dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn); 182 kvfree(in); 183 if (err) 184 goto err_in; 185 dr_qp->uar = attr->uar; 186 187 return dr_qp; 188 189 err_in: 190 kfree(dr_qp->sq.wqe_head); 191 err_wqe_head: 192 mlx5_wq_destroy(&dr_qp->wq_ctrl); 193 err_wq: 194 kfree(dr_qp); 195 return NULL; 196 } 197 198 static void dr_destroy_qp(struct mlx5_core_dev *mdev, 199 struct mlx5dr_qp *dr_qp) 200 { 201 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 202 203 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 204 MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn); 205 mlx5_cmd_exec_in(mdev, destroy_qp, in); 206 207 kfree(dr_qp->sq.wqe_head); 208 mlx5_wq_destroy(&dr_qp->wq_ctrl); 209 kfree(dr_qp); 210 } 211 212 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) 213 { 214 dma_wmb(); 215 *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff); 216 217 /* After wmb() the hw aware of new work */ 218 wmb(); 219 220 mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET); 221 } 222 223 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, 224 u32 rkey, struct dr_data_seg *data_seg, 225 u32 opcode, int nreq) 226 { 227 struct mlx5_wqe_raddr_seg *wq_raddr; 228 struct mlx5_wqe_ctrl_seg *wq_ctrl; 229 struct mlx5_wqe_data_seg *wq_dseg; 230 unsigned int size; 231 unsigned int idx; 232 233 size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 + 234 sizeof(*wq_raddr) / 16; 235 236 idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1); 237 238 wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); 239 wq_ctrl->imm = 0; 240 wq_ctrl->fm_ce_se = (data_seg->send_flags) ? 241 MLX5_WQE_CTRL_CQ_UPDATE : 0; 242 wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) | 243 opcode); 244 wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8); 245 wq_raddr = (void *)(wq_ctrl + 1); 246 wq_raddr->raddr = cpu_to_be64(remote_addr); 247 wq_raddr->rkey = cpu_to_be32(rkey); 248 wq_raddr->reserved = 0; 249 250 wq_dseg = (void *)(wq_raddr + 1); 251 wq_dseg->byte_count = cpu_to_be32(data_seg->length); 252 wq_dseg->lkey = cpu_to_be32(data_seg->lkey); 253 wq_dseg->addr = cpu_to_be64(data_seg->addr); 254 255 dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++; 256 257 if (nreq) 258 dr_cmd_notify_hw(dr_qp, wq_ctrl); 259 } 260 261 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info) 262 { 263 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, 264 &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0); 265 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, 266 &send_info->read, MLX5_OPCODE_RDMA_READ, 1); 267 } 268 269 /** 270 * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent 271 * with send_list parameters: 272 * 273 * @ste: The data that attached to this specific ste 274 * @size: of data to write 275 * @offset: of the data from start of the hw_ste entry 276 * @data: data 277 * @ste_info: ste to be sent with send_list 278 * @send_list: to append into it 279 * @copy_data: if true indicates that the data should be kept because 280 * it's not backuped any where (like in re-hash). 281 * if false, it lets the data to be updated after 282 * it was added to the list. 283 */ 284 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, 285 u16 offset, u8 *data, 286 struct mlx5dr_ste_send_info *ste_info, 287 struct list_head *send_list, 288 bool copy_data) 289 { 290 ste_info->size = size; 291 ste_info->ste = ste; 292 ste_info->offset = offset; 293 294 if (copy_data) { 295 memcpy(ste_info->data_cont, data, size); 296 ste_info->data = ste_info->data_cont; 297 } else { 298 ste_info->data = data; 299 } 300 301 list_add_tail(&ste_info->send_list, send_list); 302 } 303 304 /* The function tries to consume one wc each time, unless the queue is full, in 305 * that case, which means that the hw is behind the sw in a full queue len 306 * the function will drain the cq till it empty. 307 */ 308 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn, 309 struct mlx5dr_send_ring *send_ring) 310 { 311 bool is_drain = false; 312 int ne; 313 314 if (send_ring->pending_wqe < send_ring->signal_th) 315 return 0; 316 317 /* Queue is full start drain it */ 318 if (send_ring->pending_wqe >= 319 dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN) 320 is_drain = true; 321 322 do { 323 ne = dr_poll_cq(send_ring->cq, 1); 324 if (ne < 0) 325 return ne; 326 else if (ne == 1) 327 send_ring->pending_wqe -= send_ring->signal_th; 328 } while (is_drain && send_ring->pending_wqe); 329 330 return 0; 331 } 332 333 static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring, 334 struct postsend_info *send_info) 335 { 336 send_ring->pending_wqe++; 337 338 if (send_ring->pending_wqe % send_ring->signal_th == 0) 339 send_info->write.send_flags |= IB_SEND_SIGNALED; 340 341 send_ring->pending_wqe++; 342 send_info->read.length = send_info->write.length; 343 /* Read into the same write area */ 344 send_info->read.addr = (uintptr_t)send_info->write.addr; 345 send_info->read.lkey = send_ring->mr->mkey.key; 346 347 if (send_ring->pending_wqe % send_ring->signal_th == 0) 348 send_info->read.send_flags = IB_SEND_SIGNALED; 349 else 350 send_info->read.send_flags = 0; 351 } 352 353 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, 354 struct postsend_info *send_info) 355 { 356 struct mlx5dr_send_ring *send_ring = dmn->send_ring; 357 u32 buff_offset; 358 int ret; 359 360 spin_lock(&send_ring->lock); 361 362 ret = dr_handle_pending_wc(dmn, send_ring); 363 if (ret) 364 goto out_unlock; 365 366 if (send_info->write.length > dmn->info.max_inline_size) { 367 buff_offset = (send_ring->tx_head & 368 (dmn->send_ring->signal_th - 1)) * 369 send_ring->max_post_send_size; 370 /* Copy to ring mr */ 371 memcpy(send_ring->buf + buff_offset, 372 (void *)(uintptr_t)send_info->write.addr, 373 send_info->write.length); 374 send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset; 375 send_info->write.lkey = send_ring->mr->mkey.key; 376 } 377 378 send_ring->tx_head++; 379 dr_fill_data_segs(send_ring, send_info); 380 dr_post_send(send_ring->qp, send_info); 381 382 out_unlock: 383 spin_unlock(&send_ring->lock); 384 return ret; 385 } 386 387 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, 388 struct mlx5dr_ste_htbl *htbl, 389 u8 **data, 390 u32 *byte_size, 391 int *iterations, 392 int *num_stes) 393 { 394 int alloc_size; 395 396 if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) { 397 *iterations = htbl->chunk->byte_size / 398 dmn->send_ring->max_post_send_size; 399 *byte_size = dmn->send_ring->max_post_send_size; 400 alloc_size = *byte_size; 401 *num_stes = *byte_size / DR_STE_SIZE; 402 } else { 403 *iterations = 1; 404 *num_stes = htbl->chunk->num_of_entries; 405 alloc_size = *num_stes * DR_STE_SIZE; 406 } 407 408 *data = kzalloc(alloc_size, GFP_KERNEL); 409 if (!*data) 410 return -ENOMEM; 411 412 return 0; 413 } 414 415 /** 416 * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm. 417 * 418 * @dmn: Domain 419 * @ste: The ste struct that contains the data (at 420 * least part of it) 421 * @data: The real data to send size data 422 * @size: for writing. 423 * @offset: The offset from the icm mapped data to 424 * start write to this for write only part of the 425 * buffer. 426 * 427 * Return: 0 on success. 428 */ 429 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, 430 u8 *data, u16 size, u16 offset) 431 { 432 struct postsend_info send_info = {}; 433 434 send_info.write.addr = (uintptr_t)data; 435 send_info.write.length = size; 436 send_info.write.lkey = 0; 437 send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset; 438 send_info.rkey = ste->htbl->chunk->rkey; 439 440 return dr_postsend_icm_data(dmn, &send_info); 441 } 442 443 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, 444 struct mlx5dr_ste_htbl *htbl, 445 u8 *formatted_ste, u8 *mask) 446 { 447 u32 byte_size = htbl->chunk->byte_size; 448 int num_stes_per_iter; 449 int iterations; 450 u8 *data; 451 int ret; 452 int i; 453 int j; 454 455 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, 456 &iterations, &num_stes_per_iter); 457 if (ret) 458 return ret; 459 460 /* Send the data iteration times */ 461 for (i = 0; i < iterations; i++) { 462 u32 ste_index = i * (byte_size / DR_STE_SIZE); 463 struct postsend_info send_info = {}; 464 465 /* Copy all ste's on the data buffer 466 * need to add the bit_mask 467 */ 468 for (j = 0; j < num_stes_per_iter; j++) { 469 u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste; 470 u32 ste_off = j * DR_STE_SIZE; 471 472 if (mlx5dr_ste_is_not_valid_entry(hw_ste)) { 473 memcpy(data + ste_off, 474 formatted_ste, DR_STE_SIZE); 475 } else { 476 /* Copy data */ 477 memcpy(data + ste_off, 478 htbl->ste_arr[ste_index + j].hw_ste, 479 DR_STE_SIZE_REDUCED); 480 /* Copy bit_mask */ 481 memcpy(data + ste_off + DR_STE_SIZE_REDUCED, 482 mask, DR_STE_SIZE_MASK); 483 } 484 } 485 486 send_info.write.addr = (uintptr_t)data; 487 send_info.write.length = byte_size; 488 send_info.write.lkey = 0; 489 send_info.remote_addr = 490 mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); 491 send_info.rkey = htbl->chunk->rkey; 492 493 ret = dr_postsend_icm_data(dmn, &send_info); 494 if (ret) 495 goto out_free; 496 } 497 498 out_free: 499 kfree(data); 500 return ret; 501 } 502 503 /* Initialize htble with default STEs */ 504 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, 505 struct mlx5dr_ste_htbl *htbl, 506 u8 *ste_init_data, 507 bool update_hw_ste) 508 { 509 u32 byte_size = htbl->chunk->byte_size; 510 int iterations; 511 int num_stes; 512 u8 *data; 513 int ret; 514 int i; 515 516 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, 517 &iterations, &num_stes); 518 if (ret) 519 return ret; 520 521 for (i = 0; i < num_stes; i++) { 522 u8 *copy_dst; 523 524 /* Copy the same ste on the data buffer */ 525 copy_dst = data + i * DR_STE_SIZE; 526 memcpy(copy_dst, ste_init_data, DR_STE_SIZE); 527 528 if (update_hw_ste) { 529 /* Copy the reduced ste to hash table ste_arr */ 530 copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED; 531 memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED); 532 } 533 } 534 535 /* Send the data iteration times */ 536 for (i = 0; i < iterations; i++) { 537 u8 ste_index = i * (byte_size / DR_STE_SIZE); 538 struct postsend_info send_info = {}; 539 540 send_info.write.addr = (uintptr_t)data; 541 send_info.write.length = byte_size; 542 send_info.write.lkey = 0; 543 send_info.remote_addr = 544 mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); 545 send_info.rkey = htbl->chunk->rkey; 546 547 ret = dr_postsend_icm_data(dmn, &send_info); 548 if (ret) 549 goto out_free; 550 } 551 552 out_free: 553 kfree(data); 554 return ret; 555 } 556 557 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, 558 struct mlx5dr_action *action) 559 { 560 struct postsend_info send_info = {}; 561 int ret; 562 563 send_info.write.addr = (uintptr_t)action->rewrite.data; 564 send_info.write.length = action->rewrite.num_of_actions * 565 DR_MODIFY_ACTION_SIZE; 566 send_info.write.lkey = 0; 567 send_info.remote_addr = action->rewrite.chunk->mr_addr; 568 send_info.rkey = action->rewrite.chunk->rkey; 569 570 ret = dr_postsend_icm_data(dmn, &send_info); 571 572 return ret; 573 } 574 575 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev, 576 struct mlx5dr_qp *dr_qp, 577 int port) 578 { 579 u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; 580 void *qpc; 581 582 qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); 583 584 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port); 585 MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); 586 MLX5_SET(qpc, qpc, rre, 1); 587 MLX5_SET(qpc, qpc, rwe, 1); 588 589 MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP); 590 MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn); 591 592 return mlx5_cmd_exec_in(mdev, rst2init_qp, in); 593 } 594 595 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev, 596 struct mlx5dr_qp *dr_qp, 597 struct dr_qp_rts_attr *attr) 598 { 599 u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; 600 void *qpc; 601 602 qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc); 603 604 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn); 605 606 MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt); 607 MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry); 608 609 MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP); 610 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn); 611 612 return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in); 613 } 614 615 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, 616 struct mlx5dr_qp *dr_qp, 617 struct dr_qp_rtr_attr *attr) 618 { 619 u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; 620 void *qpc; 621 622 qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc); 623 624 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn); 625 626 MLX5_SET(qpc, qpc, mtu, attr->mtu); 627 MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1); 628 MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num); 629 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), 630 attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac)); 631 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), 632 attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid)); 633 MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, 634 attr->sgid_index); 635 636 if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2) 637 MLX5_SET(qpc, qpc, primary_address_path.udp_sport, 638 attr->udp_src_port); 639 640 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num); 641 MLX5_SET(qpc, qpc, min_rnr_nak, 1); 642 643 MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP); 644 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn); 645 646 return mlx5_cmd_exec_in(mdev, init2rtr_qp, in); 647 } 648 649 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) 650 { 651 struct mlx5dr_qp *dr_qp = dmn->send_ring->qp; 652 struct dr_qp_rts_attr rts_attr = {}; 653 struct dr_qp_rtr_attr rtr_attr = {}; 654 enum ib_mtu mtu = IB_MTU_1024; 655 u16 gid_index = 0; 656 int port = 1; 657 int ret; 658 659 /* Init */ 660 ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port); 661 if (ret) { 662 mlx5dr_err(dmn, "Failed modify QP rst2init\n"); 663 return ret; 664 } 665 666 /* RTR */ 667 ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr); 668 if (ret) 669 return ret; 670 671 rtr_attr.mtu = mtu; 672 rtr_attr.qp_num = dr_qp->qpn; 673 rtr_attr.min_rnr_timer = 12; 674 rtr_attr.port_num = port; 675 rtr_attr.sgid_index = gid_index; 676 rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; 677 678 ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); 679 if (ret) { 680 mlx5dr_err(dmn, "Failed modify QP init2rtr\n"); 681 return ret; 682 } 683 684 /* RTS */ 685 rts_attr.timeout = 14; 686 rts_attr.retry_cnt = 7; 687 rts_attr.rnr_retry = 7; 688 689 ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr); 690 if (ret) { 691 mlx5dr_err(dmn, "Failed modify QP rtr2rts\n"); 692 return ret; 693 } 694 695 return 0; 696 } 697 698 static void dr_cq_complete(struct mlx5_core_cq *mcq, 699 struct mlx5_eqe *eqe) 700 { 701 pr_err("CQ completion CQ: #%u\n", mcq->cqn); 702 } 703 704 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, 705 struct mlx5_uars_page *uar, 706 size_t ncqe) 707 { 708 u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {}; 709 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 710 struct mlx5_wq_param wqp; 711 struct mlx5_cqe64 *cqe; 712 struct mlx5dr_cq *cq; 713 int inlen, err, eqn; 714 unsigned int irqn; 715 void *cqc, *in; 716 __be64 *pas; 717 int vector; 718 u32 i; 719 720 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 721 if (!cq) 722 return NULL; 723 724 ncqe = roundup_pow_of_two(ncqe); 725 MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe)); 726 727 wqp.buf_numa_node = mdev->priv.numa_node; 728 wqp.db_numa_node = mdev->priv.numa_node; 729 730 err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq, 731 &cq->wq_ctrl); 732 if (err) 733 goto out; 734 735 for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { 736 cqe = mlx5_cqwq_get_wqe(&cq->wq, i); 737 cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; 738 } 739 740 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 741 sizeof(u64) * cq->wq_ctrl.buf.npages; 742 in = kvzalloc(inlen, GFP_KERNEL); 743 if (!in) 744 goto err_cqwq; 745 746 vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); 747 err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn); 748 if (err) { 749 kvfree(in); 750 goto err_cqwq; 751 } 752 753 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 754 MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); 755 MLX5_SET(cqc, cqc, c_eqn, eqn); 756 MLX5_SET(cqc, cqc, uar_page, uar->index); 757 MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - 758 MLX5_ADAPTER_PAGE_SHIFT); 759 MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); 760 761 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 762 mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); 763 764 cq->mcq.comp = dr_cq_complete; 765 766 err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); 767 kvfree(in); 768 769 if (err) 770 goto err_cqwq; 771 772 cq->mcq.cqe_sz = 64; 773 cq->mcq.set_ci_db = cq->wq_ctrl.db.db; 774 cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; 775 *cq->mcq.set_ci_db = 0; 776 777 /* set no-zero value, in order to avoid the HW to run db-recovery on 778 * CQ that used in polling mode. 779 */ 780 *cq->mcq.arm_db = cpu_to_be32(2 << 28); 781 782 cq->mcq.vector = 0; 783 cq->mcq.irqn = irqn; 784 cq->mcq.uar = uar; 785 786 return cq; 787 788 err_cqwq: 789 mlx5_wq_destroy(&cq->wq_ctrl); 790 out: 791 kfree(cq); 792 return NULL; 793 } 794 795 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq) 796 { 797 mlx5_core_destroy_cq(mdev, &cq->mcq); 798 mlx5_wq_destroy(&cq->wq_ctrl); 799 kfree(cq); 800 } 801 802 static int 803 dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey) 804 { 805 u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; 806 void *mkc; 807 808 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 809 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 810 MLX5_SET(mkc, mkc, a, 1); 811 MLX5_SET(mkc, mkc, rw, 1); 812 MLX5_SET(mkc, mkc, rr, 1); 813 MLX5_SET(mkc, mkc, lw, 1); 814 MLX5_SET(mkc, mkc, lr, 1); 815 816 MLX5_SET(mkc, mkc, pd, pdn); 817 MLX5_SET(mkc, mkc, length64, 1); 818 MLX5_SET(mkc, mkc, qpn, 0xffffff); 819 820 return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in)); 821 } 822 823 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev, 824 u32 pdn, void *buf, size_t size) 825 { 826 struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL); 827 struct device *dma_device; 828 dma_addr_t dma_addr; 829 int err; 830 831 if (!mr) 832 return NULL; 833 834 dma_device = &mdev->pdev->dev; 835 dma_addr = dma_map_single(dma_device, buf, size, 836 DMA_BIDIRECTIONAL); 837 err = dma_mapping_error(dma_device, dma_addr); 838 if (err) { 839 mlx5_core_warn(mdev, "Can't dma buf\n"); 840 kfree(mr); 841 return NULL; 842 } 843 844 err = dr_create_mkey(mdev, pdn, &mr->mkey); 845 if (err) { 846 mlx5_core_warn(mdev, "Can't create mkey\n"); 847 dma_unmap_single(dma_device, dma_addr, size, 848 DMA_BIDIRECTIONAL); 849 kfree(mr); 850 return NULL; 851 } 852 853 mr->dma_addr = dma_addr; 854 mr->size = size; 855 mr->addr = buf; 856 857 return mr; 858 } 859 860 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr) 861 { 862 mlx5_core_destroy_mkey(mdev, &mr->mkey); 863 dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size, 864 DMA_BIDIRECTIONAL); 865 kfree(mr); 866 } 867 868 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) 869 { 870 struct dr_qp_init_attr init_attr = {}; 871 int cq_size; 872 int size; 873 int ret; 874 875 dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL); 876 if (!dmn->send_ring) 877 return -ENOMEM; 878 879 cq_size = QUEUE_SIZE + 1; 880 dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size); 881 if (!dmn->send_ring->cq) { 882 mlx5dr_err(dmn, "Failed creating CQ\n"); 883 ret = -ENOMEM; 884 goto free_send_ring; 885 } 886 887 init_attr.cqn = dmn->send_ring->cq->mcq.cqn; 888 init_attr.pdn = dmn->pdn; 889 init_attr.uar = dmn->uar; 890 init_attr.max_send_wr = QUEUE_SIZE; 891 spin_lock_init(&dmn->send_ring->lock); 892 893 dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); 894 if (!dmn->send_ring->qp) { 895 mlx5dr_err(dmn, "Failed creating QP\n"); 896 ret = -ENOMEM; 897 goto clean_cq; 898 } 899 900 dmn->send_ring->cq->qp = dmn->send_ring->qp; 901 902 dmn->info.max_send_wr = QUEUE_SIZE; 903 dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, 904 DR_STE_SIZE); 905 906 dmn->send_ring->signal_th = dmn->info.max_send_wr / 907 SIGNAL_PER_DIV_QUEUE; 908 909 /* Prepare qp to be used */ 910 ret = dr_prepare_qp_to_rts(dmn); 911 if (ret) 912 goto clean_qp; 913 914 dmn->send_ring->max_post_send_size = 915 mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K, 916 DR_ICM_TYPE_STE); 917 918 /* Allocating the max size as a buffer for writing */ 919 size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size; 920 dmn->send_ring->buf = kzalloc(size, GFP_KERNEL); 921 if (!dmn->send_ring->buf) { 922 ret = -ENOMEM; 923 goto clean_qp; 924 } 925 926 dmn->send_ring->buf_size = size; 927 928 dmn->send_ring->mr = dr_reg_mr(dmn->mdev, 929 dmn->pdn, dmn->send_ring->buf, size); 930 if (!dmn->send_ring->mr) { 931 ret = -ENOMEM; 932 goto free_mem; 933 } 934 935 dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev, 936 dmn->pdn, dmn->send_ring->sync_buff, 937 MIN_READ_SYNC); 938 if (!dmn->send_ring->sync_mr) { 939 ret = -ENOMEM; 940 goto clean_mr; 941 } 942 943 return 0; 944 945 clean_mr: 946 dr_dereg_mr(dmn->mdev, dmn->send_ring->mr); 947 free_mem: 948 kfree(dmn->send_ring->buf); 949 clean_qp: 950 dr_destroy_qp(dmn->mdev, dmn->send_ring->qp); 951 clean_cq: 952 dr_destroy_cq(dmn->mdev, dmn->send_ring->cq); 953 free_send_ring: 954 kfree(dmn->send_ring); 955 956 return ret; 957 } 958 959 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, 960 struct mlx5dr_send_ring *send_ring) 961 { 962 dr_destroy_qp(dmn->mdev, send_ring->qp); 963 dr_destroy_cq(dmn->mdev, send_ring->cq); 964 dr_dereg_mr(dmn->mdev, send_ring->sync_mr); 965 dr_dereg_mr(dmn->mdev, send_ring->mr); 966 kfree(send_ring->buf); 967 kfree(send_ring); 968 } 969 970 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) 971 { 972 struct mlx5dr_send_ring *send_ring = dmn->send_ring; 973 struct postsend_info send_info = {}; 974 u8 data[DR_STE_SIZE]; 975 int num_of_sends_req; 976 int ret; 977 int i; 978 979 /* Sending this amount of requests makes sure we will get drain */ 980 num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; 981 982 /* Send fake requests forcing the last to be signaled */ 983 send_info.write.addr = (uintptr_t)data; 984 send_info.write.length = DR_STE_SIZE; 985 send_info.write.lkey = 0; 986 /* Using the sync_mr in order to write/read */ 987 send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; 988 send_info.rkey = send_ring->sync_mr->mkey.key; 989 990 for (i = 0; i < num_of_sends_req; i++) { 991 ret = dr_postsend_icm_data(dmn, &send_info); 992 if (ret) 993 return ret; 994 } 995 996 spin_lock(&send_ring->lock); 997 ret = dr_handle_pending_wc(dmn, send_ring); 998 spin_unlock(&send_ring->lock); 999 1000 return ret; 1001 } 1002