1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ 4 /* Kai Shen <kaishen@linux.alibaba.com> */ 5 /* Copyright (c) 2020-2021, Alibaba Group */ 6 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ 7 /* Copyright (c) 2008-2019, IBM Corporation */ 8 9 #include "erdma_cm.h" 10 #include "erdma_verbs.h" 11 12 void erdma_qp_llp_close(struct erdma_qp *qp) 13 { 14 struct erdma_qp_attrs qp_attrs; 15 16 down_write(&qp->state_lock); 17 18 switch (qp->attrs.state) { 19 case ERDMA_QP_STATE_RTS: 20 case ERDMA_QP_STATE_RTR: 21 case ERDMA_QP_STATE_IDLE: 22 case ERDMA_QP_STATE_TERMINATE: 23 qp_attrs.state = ERDMA_QP_STATE_CLOSING; 24 erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE); 25 break; 26 case ERDMA_QP_STATE_CLOSING: 27 qp->attrs.state = ERDMA_QP_STATE_IDLE; 28 break; 29 default: 30 break; 31 } 32 33 if (qp->cep) { 34 erdma_cep_put(qp->cep); 35 qp->cep = NULL; 36 } 37 38 up_write(&qp->state_lock); 39 } 40 41 struct ib_qp *erdma_get_ibqp(struct ib_device *ibdev, int id) 42 { 43 struct erdma_qp *qp = find_qp_by_qpn(to_edev(ibdev), id); 44 45 if (qp) 46 return &qp->ibqp; 47 48 return NULL; 49 } 50 51 static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, 52 struct erdma_qp_attrs *attrs, 53 enum erdma_qp_attr_mask mask) 54 { 55 int ret; 56 struct erdma_dev *dev = qp->dev; 57 struct erdma_cmdq_modify_qp_req req; 58 struct tcp_sock *tp; 59 struct erdma_cep *cep = qp->cep; 60 struct sockaddr_storage local_addr, remote_addr; 61 62 if (!(mask & ERDMA_QP_ATTR_LLP_HANDLE)) 63 return -EINVAL; 64 65 if (!(mask & ERDMA_QP_ATTR_MPA)) 66 return -EINVAL; 67 68 ret = getname_local(cep->sock, &local_addr); 69 if (ret < 0) 70 return ret; 71 72 ret = getname_peer(cep->sock, &remote_addr); 73 if (ret < 0) 74 return ret; 75 76 qp->attrs.state = ERDMA_QP_STATE_RTS; 77 78 tp = tcp_sk(qp->cep->sock->sk); 79 80 erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, 81 CMDQ_OPCODE_MODIFY_QP); 82 83 req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, qp->attrs.state) | 84 FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) | 85 FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); 86 87 req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie); 88 req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr; 89 req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr; 90 req.dport = to_sockaddr_in(remote_addr).sin_port; 91 req.sport = to_sockaddr_in(local_addr).sin_port; 92 93 req.send_nxt = tp->snd_nxt; 94 /* rsvd tcp seq for mpa-rsp in server. */ 95 if (qp->attrs.qp_type == ERDMA_QP_PASSIVE) 96 req.send_nxt += MPA_DEFAULT_HDR_LEN + qp->attrs.pd_len; 97 req.recv_nxt = tp->rcv_nxt; 98 99 return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); 100 } 101 102 static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp, 103 struct erdma_qp_attrs *attrs, 104 enum erdma_qp_attr_mask mask) 105 { 106 struct erdma_dev *dev = qp->dev; 107 struct erdma_cmdq_modify_qp_req req; 108 109 qp->attrs.state = attrs->state; 110 111 erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, 112 CMDQ_OPCODE_MODIFY_QP); 113 114 req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, attrs->state) | 115 FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); 116 117 return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); 118 } 119 120 int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, 121 enum erdma_qp_attr_mask mask) 122 { 123 int drop_conn, ret = 0; 124 125 if (!mask) 126 return 0; 127 128 if (!(mask & ERDMA_QP_ATTR_STATE)) 129 return 0; 130 131 switch (qp->attrs.state) { 132 case ERDMA_QP_STATE_IDLE: 133 case ERDMA_QP_STATE_RTR: 134 if (attrs->state == ERDMA_QP_STATE_RTS) { 135 ret = erdma_modify_qp_state_to_rts(qp, attrs, mask); 136 } else if (attrs->state == ERDMA_QP_STATE_ERROR) { 137 qp->attrs.state = ERDMA_QP_STATE_ERROR; 138 if (qp->cep) { 139 erdma_cep_put(qp->cep); 140 qp->cep = NULL; 141 } 142 ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); 143 } 144 break; 145 case ERDMA_QP_STATE_RTS: 146 drop_conn = 0; 147 148 if (attrs->state == ERDMA_QP_STATE_CLOSING) { 149 ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); 150 drop_conn = 1; 151 } else if (attrs->state == ERDMA_QP_STATE_TERMINATE) { 152 qp->attrs.state = ERDMA_QP_STATE_TERMINATE; 153 ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); 154 drop_conn = 1; 155 } else if (attrs->state == ERDMA_QP_STATE_ERROR) { 156 ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); 157 qp->attrs.state = ERDMA_QP_STATE_ERROR; 158 drop_conn = 1; 159 } 160 161 if (drop_conn) 162 erdma_qp_cm_drop(qp); 163 164 break; 165 case ERDMA_QP_STATE_TERMINATE: 166 if (attrs->state == ERDMA_QP_STATE_ERROR) 167 qp->attrs.state = ERDMA_QP_STATE_ERROR; 168 break; 169 case ERDMA_QP_STATE_CLOSING: 170 if (attrs->state == ERDMA_QP_STATE_IDLE) { 171 qp->attrs.state = ERDMA_QP_STATE_IDLE; 172 } else if (attrs->state == ERDMA_QP_STATE_ERROR) { 173 ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); 174 qp->attrs.state = ERDMA_QP_STATE_ERROR; 175 } else if (attrs->state != ERDMA_QP_STATE_CLOSING) { 176 return -ECONNABORTED; 177 } 178 break; 179 default: 180 break; 181 } 182 183 return ret; 184 } 185 186 static void erdma_qp_safe_free(struct kref *ref) 187 { 188 struct erdma_qp *qp = container_of(ref, struct erdma_qp, ref); 189 190 complete(&qp->safe_free); 191 } 192 193 void erdma_qp_put(struct erdma_qp *qp) 194 { 195 WARN_ON(kref_read(&qp->ref) < 1); 196 kref_put(&qp->ref, erdma_qp_safe_free); 197 } 198 199 void erdma_qp_get(struct erdma_qp *qp) 200 { 201 kref_get(&qp->ref); 202 } 203 204 static int fill_inline_data(struct erdma_qp *qp, 205 const struct ib_send_wr *send_wr, u16 wqe_idx, 206 u32 sgl_offset, __le32 *length_field) 207 { 208 u32 remain_size, copy_size, data_off, bytes = 0; 209 char *data; 210 int i = 0; 211 212 wqe_idx += (sgl_offset >> SQEBB_SHIFT); 213 sgl_offset &= (SQEBB_SIZE - 1); 214 data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx, qp->attrs.sq_size, 215 SQEBB_SHIFT); 216 217 while (i < send_wr->num_sge) { 218 bytes += send_wr->sg_list[i].length; 219 if (bytes > (int)ERDMA_MAX_INLINE) 220 return -EINVAL; 221 222 remain_size = send_wr->sg_list[i].length; 223 data_off = 0; 224 225 while (1) { 226 copy_size = min(remain_size, SQEBB_SIZE - sgl_offset); 227 228 memcpy(data + sgl_offset, 229 (void *)(uintptr_t)send_wr->sg_list[i].addr + 230 data_off, 231 copy_size); 232 remain_size -= copy_size; 233 data_off += copy_size; 234 sgl_offset += copy_size; 235 wqe_idx += (sgl_offset >> SQEBB_SHIFT); 236 sgl_offset &= (SQEBB_SIZE - 1); 237 238 data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx, 239 qp->attrs.sq_size, SQEBB_SHIFT); 240 if (!remain_size) 241 break; 242 } 243 244 i++; 245 } 246 *length_field = cpu_to_le32(bytes); 247 248 return bytes; 249 } 250 251 static int fill_sgl(struct erdma_qp *qp, const struct ib_send_wr *send_wr, 252 u16 wqe_idx, u32 sgl_offset, __le32 *length_field) 253 { 254 int i = 0; 255 u32 bytes = 0; 256 char *sgl; 257 258 if (send_wr->num_sge > qp->dev->attrs.max_send_sge) 259 return -EINVAL; 260 261 if (sgl_offset & 0xF) 262 return -EINVAL; 263 264 while (i < send_wr->num_sge) { 265 wqe_idx += (sgl_offset >> SQEBB_SHIFT); 266 sgl_offset &= (SQEBB_SIZE - 1); 267 sgl = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx, 268 qp->attrs.sq_size, SQEBB_SHIFT); 269 270 bytes += send_wr->sg_list[i].length; 271 memcpy(sgl + sgl_offset, &send_wr->sg_list[i], 272 sizeof(struct ib_sge)); 273 274 sgl_offset += sizeof(struct ib_sge); 275 i++; 276 } 277 278 *length_field = cpu_to_le32(bytes); 279 return 0; 280 } 281 282 static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, 283 const struct ib_send_wr *send_wr) 284 { 285 u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset; 286 u32 idx = *pi & (qp->attrs.sq_size - 1); 287 enum ib_wr_opcode op = send_wr->opcode; 288 struct erdma_readreq_sqe *read_sqe; 289 struct erdma_reg_mr_sqe *regmr_sge; 290 struct erdma_write_sqe *write_sqe; 291 struct erdma_send_sqe *send_sqe; 292 struct ib_rdma_wr *rdma_wr; 293 struct erdma_mr *mr; 294 __le32 *length_field; 295 u64 wqe_hdr, *entry; 296 struct ib_sge *sge; 297 u32 attrs; 298 int ret; 299 300 entry = get_queue_entry(qp->kern_qp.sq_buf, idx, qp->attrs.sq_size, 301 SQEBB_SHIFT); 302 303 /* Clear the SQE header section. */ 304 *entry = 0; 305 306 qp->kern_qp.swr_tbl[idx] = send_wr->wr_id; 307 flags = send_wr->send_flags; 308 wqe_hdr = FIELD_PREP( 309 ERDMA_SQE_HDR_CE_MASK, 310 ((flags & IB_SEND_SIGNALED) || qp->kern_qp.sig_all) ? 1 : 0); 311 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SE_MASK, 312 flags & IB_SEND_SOLICITED ? 1 : 0); 313 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_FENCE_MASK, 314 flags & IB_SEND_FENCE ? 1 : 0); 315 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_INLINE_MASK, 316 flags & IB_SEND_INLINE ? 1 : 0); 317 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp)); 318 319 switch (op) { 320 case IB_WR_RDMA_WRITE: 321 case IB_WR_RDMA_WRITE_WITH_IMM: 322 hw_op = ERDMA_OP_WRITE; 323 if (op == IB_WR_RDMA_WRITE_WITH_IMM) 324 hw_op = ERDMA_OP_WRITE_WITH_IMM; 325 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op); 326 rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr); 327 write_sqe = (struct erdma_write_sqe *)entry; 328 329 write_sqe->imm_data = send_wr->ex.imm_data; 330 write_sqe->sink_stag = cpu_to_le32(rdma_wr->rkey); 331 write_sqe->sink_to_h = 332 cpu_to_le32(upper_32_bits(rdma_wr->remote_addr)); 333 write_sqe->sink_to_l = 334 cpu_to_le32(lower_32_bits(rdma_wr->remote_addr)); 335 336 length_field = &write_sqe->length; 337 wqe_size = sizeof(struct erdma_write_sqe); 338 sgl_offset = wqe_size; 339 break; 340 case IB_WR_RDMA_READ: 341 case IB_WR_RDMA_READ_WITH_INV: 342 read_sqe = (struct erdma_readreq_sqe *)entry; 343 if (unlikely(send_wr->num_sge != 1)) 344 return -EINVAL; 345 hw_op = ERDMA_OP_READ; 346 if (op == IB_WR_RDMA_READ_WITH_INV) { 347 hw_op = ERDMA_OP_READ_WITH_INV; 348 read_sqe->invalid_stag = 349 cpu_to_le32(send_wr->ex.invalidate_rkey); 350 } 351 352 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op); 353 rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr); 354 read_sqe->length = cpu_to_le32(send_wr->sg_list[0].length); 355 read_sqe->sink_stag = cpu_to_le32(send_wr->sg_list[0].lkey); 356 read_sqe->sink_to_l = 357 cpu_to_le32(lower_32_bits(send_wr->sg_list[0].addr)); 358 read_sqe->sink_to_h = 359 cpu_to_le32(upper_32_bits(send_wr->sg_list[0].addr)); 360 361 sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1, 362 qp->attrs.sq_size, SQEBB_SHIFT); 363 sge->addr = rdma_wr->remote_addr; 364 sge->lkey = rdma_wr->rkey; 365 sge->length = send_wr->sg_list[0].length; 366 wqe_size = sizeof(struct erdma_readreq_sqe) + 367 send_wr->num_sge * sizeof(struct ib_sge); 368 369 goto out; 370 case IB_WR_SEND: 371 case IB_WR_SEND_WITH_IMM: 372 case IB_WR_SEND_WITH_INV: 373 send_sqe = (struct erdma_send_sqe *)entry; 374 hw_op = ERDMA_OP_SEND; 375 if (op == IB_WR_SEND_WITH_IMM) { 376 hw_op = ERDMA_OP_SEND_WITH_IMM; 377 send_sqe->imm_data = send_wr->ex.imm_data; 378 } else if (op == IB_WR_SEND_WITH_INV) { 379 hw_op = ERDMA_OP_SEND_WITH_INV; 380 send_sqe->invalid_stag = 381 cpu_to_le32(send_wr->ex.invalidate_rkey); 382 } 383 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op); 384 length_field = &send_sqe->length; 385 wqe_size = sizeof(struct erdma_send_sqe); 386 sgl_offset = wqe_size; 387 388 break; 389 case IB_WR_REG_MR: 390 wqe_hdr |= 391 FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, ERDMA_OP_REG_MR); 392 regmr_sge = (struct erdma_reg_mr_sqe *)entry; 393 mr = to_emr(reg_wr(send_wr)->mr); 394 395 mr->access = ERDMA_MR_ACC_LR | 396 to_erdma_access_flags(reg_wr(send_wr)->access); 397 regmr_sge->addr = cpu_to_le64(mr->ibmr.iova); 398 regmr_sge->length = cpu_to_le32(mr->ibmr.length); 399 regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key); 400 attrs = FIELD_PREP(ERDMA_SQE_MR_MODE_MASK, 0) | 401 FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) | 402 FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK, 403 mr->mem.mtt_nents); 404 405 if (mr->mem.mtt_nents < ERDMA_MAX_INLINE_MTT_ENTRIES) { 406 attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 0); 407 /* Copy SGLs to SQE content to accelerate */ 408 memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1, 409 qp->attrs.sq_size, SQEBB_SHIFT), 410 mr->mem.mtt_buf, MTT_SIZE(mr->mem.mtt_nents)); 411 wqe_size = sizeof(struct erdma_reg_mr_sqe) + 412 MTT_SIZE(mr->mem.mtt_nents); 413 } else { 414 attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 1); 415 wqe_size = sizeof(struct erdma_reg_mr_sqe); 416 } 417 418 regmr_sge->attrs = cpu_to_le32(attrs); 419 goto out; 420 case IB_WR_LOCAL_INV: 421 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, 422 ERDMA_OP_LOCAL_INV); 423 regmr_sge = (struct erdma_reg_mr_sqe *)entry; 424 regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey); 425 wqe_size = sizeof(struct erdma_reg_mr_sqe); 426 goto out; 427 default: 428 return -EOPNOTSUPP; 429 } 430 431 if (flags & IB_SEND_INLINE) { 432 ret = fill_inline_data(qp, send_wr, idx, sgl_offset, 433 length_field); 434 if (ret < 0) 435 return -EINVAL; 436 wqe_size += ret; 437 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, ret); 438 } else { 439 ret = fill_sgl(qp, send_wr, idx, sgl_offset, length_field); 440 if (ret) 441 return -EINVAL; 442 wqe_size += send_wr->num_sge * sizeof(struct ib_sge); 443 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, 444 send_wr->num_sge); 445 } 446 447 out: 448 wqebb_cnt = SQEBB_COUNT(wqe_size); 449 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1); 450 *pi += wqebb_cnt; 451 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, *pi); 452 453 *entry = wqe_hdr; 454 455 return 0; 456 } 457 458 static void kick_sq_db(struct erdma_qp *qp, u16 pi) 459 { 460 u64 db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp)) | 461 FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi); 462 463 *(u64 *)qp->kern_qp.sq_db_info = db_data; 464 writeq(db_data, qp->kern_qp.hw_sq_db); 465 } 466 467 int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, 468 const struct ib_send_wr **bad_send_wr) 469 { 470 struct erdma_qp *qp = to_eqp(ibqp); 471 int ret = 0; 472 const struct ib_send_wr *wr = send_wr; 473 unsigned long flags; 474 u16 sq_pi; 475 476 if (!send_wr) 477 return -EINVAL; 478 479 spin_lock_irqsave(&qp->lock, flags); 480 sq_pi = qp->kern_qp.sq_pi; 481 482 while (wr) { 483 if ((u16)(sq_pi - qp->kern_qp.sq_ci) >= qp->attrs.sq_size) { 484 ret = -ENOMEM; 485 *bad_send_wr = send_wr; 486 break; 487 } 488 489 ret = erdma_push_one_sqe(qp, &sq_pi, wr); 490 if (ret) { 491 *bad_send_wr = wr; 492 break; 493 } 494 qp->kern_qp.sq_pi = sq_pi; 495 kick_sq_db(qp, sq_pi); 496 497 wr = wr->next; 498 } 499 spin_unlock_irqrestore(&qp->lock, flags); 500 501 return ret; 502 } 503 504 static int erdma_post_recv_one(struct erdma_qp *qp, 505 const struct ib_recv_wr *recv_wr) 506 { 507 struct erdma_rqe *rqe = 508 get_queue_entry(qp->kern_qp.rq_buf, qp->kern_qp.rq_pi, 509 qp->attrs.rq_size, RQE_SHIFT); 510 511 rqe->qe_idx = cpu_to_le16(qp->kern_qp.rq_pi + 1); 512 rqe->qpn = cpu_to_le32(QP_ID(qp)); 513 514 if (recv_wr->num_sge == 0) { 515 rqe->length = 0; 516 } else if (recv_wr->num_sge == 1) { 517 rqe->stag = cpu_to_le32(recv_wr->sg_list[0].lkey); 518 rqe->to = cpu_to_le64(recv_wr->sg_list[0].addr); 519 rqe->length = cpu_to_le32(recv_wr->sg_list[0].length); 520 } else { 521 return -EINVAL; 522 } 523 524 *(u64 *)qp->kern_qp.rq_db_info = *(u64 *)rqe; 525 writeq(*(u64 *)rqe, qp->kern_qp.hw_rq_db); 526 527 qp->kern_qp.rwr_tbl[qp->kern_qp.rq_pi & (qp->attrs.rq_size - 1)] = 528 recv_wr->wr_id; 529 qp->kern_qp.rq_pi++; 530 531 return 0; 532 } 533 534 int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, 535 const struct ib_recv_wr **bad_recv_wr) 536 { 537 const struct ib_recv_wr *wr = recv_wr; 538 struct erdma_qp *qp = to_eqp(ibqp); 539 unsigned long flags; 540 int ret; 541 542 spin_lock_irqsave(&qp->lock, flags); 543 544 while (wr) { 545 ret = erdma_post_recv_one(qp, wr); 546 if (ret) { 547 *bad_recv_wr = wr; 548 break; 549 } 550 wr = wr->next; 551 } 552 553 spin_unlock_irqrestore(&qp->lock, flags); 554 return ret; 555 } 556