1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. 4 */ 5 6 #include <linux/gfp.h> 7 #include <linux/mlx5/qp.h> 8 #include <linux/mlx5/driver.h> 9 #include "wr.h" 10 11 static const u32 mlx5_ib_opcode[] = { 12 [IB_WR_SEND] = MLX5_OPCODE_SEND, 13 [IB_WR_LSO] = MLX5_OPCODE_LSO, 14 [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM, 15 [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE, 16 [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM, 17 [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ, 18 [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS, 19 [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA, 20 [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, 21 [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, 22 [IB_WR_REG_MR] = MLX5_OPCODE_UMR, 23 [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS, 24 [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA, 25 [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, 26 }; 27 28 /* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the 29 * next nearby edge and get new address translation for current WQE position. 30 * @sq - SQ buffer. 31 * @seg: Current WQE position (16B aligned). 32 * @wqe_sz: Total current WQE size [16B]. 33 * @cur_edge: Updated current edge. 34 */ 35 static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, 36 u32 wqe_sz, void **cur_edge) 37 { 38 u32 idx; 39 40 if (likely(*seg != *cur_edge)) 41 return; 42 43 idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1); 44 *cur_edge = get_sq_edge(sq, idx); 45 46 *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx); 47 } 48 49 /* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's 50 * pointers. At the end @seg is aligned to 16B regardless the copied size. 51 * @sq - SQ buffer. 52 * @cur_edge: Updated current edge. 53 * @seg: Current WQE position (16B aligned). 54 * @wqe_sz: Total current WQE size [16B]. 55 * @src: Pointer to copy from. 56 * @n: Number of bytes to copy. 57 */ 58 static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge, 59 void **seg, u32 *wqe_sz, const void *src, 60 size_t n) 61 { 62 while (likely(n)) { 63 size_t leftlen = *cur_edge - *seg; 64 size_t copysz = min_t(size_t, leftlen, n); 65 size_t stride; 66 67 memcpy(*seg, src, copysz); 68 69 n -= copysz; 70 src += copysz; 71 stride = !n ? ALIGN(copysz, 16) : copysz; 72 *seg += stride; 73 *wqe_sz += stride >> 4; 74 handle_post_send_edge(sq, seg, *wqe_sz, cur_edge); 75 } 76 } 77 78 static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, 79 struct ib_cq *ib_cq) 80 { 81 struct mlx5_ib_cq *cq; 82 unsigned int cur; 83 84 cur = wq->head - wq->tail; 85 if (likely(cur + nreq < wq->max_post)) 86 return 0; 87 88 cq = to_mcq(ib_cq); 89 spin_lock(&cq->lock); 90 cur = wq->head - wq->tail; 91 spin_unlock(&cq->lock); 92 93 return cur + nreq >= wq->max_post; 94 } 95 96 static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, 97 u64 remote_addr, u32 rkey) 98 { 99 rseg->raddr = cpu_to_be64(remote_addr); 100 rseg->rkey = cpu_to_be32(rkey); 101 rseg->reserved = 0; 102 } 103 104 static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, 105 void **seg, int *size, void **cur_edge) 106 { 107 struct mlx5_wqe_eth_seg *eseg = *seg; 108 109 memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg)); 110 111 if (wr->send_flags & IB_SEND_IP_CSUM) 112 eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | 113 MLX5_ETH_WQE_L4_CSUM; 114 115 if (wr->opcode == IB_WR_LSO) { 116 struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr); 117 size_t left, copysz; 118 void *pdata = ud_wr->header; 119 size_t stride; 120 121 left = ud_wr->hlen; 122 eseg->mss = cpu_to_be16(ud_wr->mss); 123 eseg->inline_hdr.sz = cpu_to_be16(left); 124 125 /* memcpy_send_wqe should get a 16B align address. Hence, we 126 * first copy up to the current edge and then, if needed, 127 * continue to memcpy_send_wqe. 128 */ 129 copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, 130 left); 131 memcpy(eseg->inline_hdr.start, pdata, copysz); 132 stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) - 133 sizeof(eseg->inline_hdr.start) + copysz, 16); 134 *size += stride / 16; 135 *seg += stride; 136 137 if (copysz < left) { 138 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 139 left -= copysz; 140 pdata += copysz; 141 memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata, 142 left); 143 } 144 145 return; 146 } 147 148 *seg += sizeof(struct mlx5_wqe_eth_seg); 149 *size += sizeof(struct mlx5_wqe_eth_seg) / 16; 150 } 151 152 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, 153 const struct ib_send_wr *wr) 154 { 155 memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av)); 156 dseg->av.dqp_dct = 157 cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV); 158 dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey); 159 } 160 161 static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg) 162 { 163 dseg->byte_count = cpu_to_be32(sg->length); 164 dseg->lkey = cpu_to_be32(sg->lkey); 165 dseg->addr = cpu_to_be64(sg->addr); 166 } 167 168 static u64 get_xlt_octo(u64 bytes) 169 { 170 return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) / 171 MLX5_IB_UMR_OCTOWORD; 172 } 173 174 static __be64 frwr_mkey_mask(bool atomic) 175 { 176 u64 result; 177 178 result = MLX5_MKEY_MASK_LEN | 179 MLX5_MKEY_MASK_PAGE_SIZE | 180 MLX5_MKEY_MASK_START_ADDR | 181 MLX5_MKEY_MASK_EN_RINVAL | 182 MLX5_MKEY_MASK_KEY | 183 MLX5_MKEY_MASK_LR | 184 MLX5_MKEY_MASK_LW | 185 MLX5_MKEY_MASK_RR | 186 MLX5_MKEY_MASK_RW | 187 MLX5_MKEY_MASK_SMALL_FENCE | 188 MLX5_MKEY_MASK_FREE; 189 190 if (atomic) 191 result |= MLX5_MKEY_MASK_A; 192 193 return cpu_to_be64(result); 194 } 195 196 static __be64 sig_mkey_mask(void) 197 { 198 u64 result; 199 200 result = MLX5_MKEY_MASK_LEN | 201 MLX5_MKEY_MASK_PAGE_SIZE | 202 MLX5_MKEY_MASK_START_ADDR | 203 MLX5_MKEY_MASK_EN_SIGERR | 204 MLX5_MKEY_MASK_EN_RINVAL | 205 MLX5_MKEY_MASK_KEY | 206 MLX5_MKEY_MASK_LR | 207 MLX5_MKEY_MASK_LW | 208 MLX5_MKEY_MASK_RR | 209 MLX5_MKEY_MASK_RW | 210 MLX5_MKEY_MASK_SMALL_FENCE | 211 MLX5_MKEY_MASK_FREE | 212 MLX5_MKEY_MASK_BSF_EN; 213 214 return cpu_to_be64(result); 215 } 216 217 static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, 218 struct mlx5_ib_mr *mr, u8 flags, bool atomic) 219 { 220 int size = (mr->mmkey.ndescs + mr->meta_ndescs) * mr->desc_size; 221 222 memset(umr, 0, sizeof(*umr)); 223 224 umr->flags = flags; 225 umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); 226 umr->mkey_mask = frwr_mkey_mask(atomic); 227 } 228 229 static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) 230 { 231 memset(umr, 0, sizeof(*umr)); 232 umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); 233 umr->flags = MLX5_UMR_INLINE; 234 } 235 236 static __be64 get_umr_enable_mr_mask(void) 237 { 238 u64 result; 239 240 result = MLX5_MKEY_MASK_KEY | 241 MLX5_MKEY_MASK_FREE; 242 243 return cpu_to_be64(result); 244 } 245 246 static __be64 get_umr_disable_mr_mask(void) 247 { 248 u64 result; 249 250 result = MLX5_MKEY_MASK_FREE; 251 252 return cpu_to_be64(result); 253 } 254 255 static __be64 get_umr_update_translation_mask(void) 256 { 257 u64 result; 258 259 result = MLX5_MKEY_MASK_LEN | 260 MLX5_MKEY_MASK_PAGE_SIZE | 261 MLX5_MKEY_MASK_START_ADDR; 262 263 return cpu_to_be64(result); 264 } 265 266 static __be64 get_umr_update_access_mask(int atomic, 267 int relaxed_ordering_write, 268 int relaxed_ordering_read) 269 { 270 u64 result; 271 272 result = MLX5_MKEY_MASK_LR | 273 MLX5_MKEY_MASK_LW | 274 MLX5_MKEY_MASK_RR | 275 MLX5_MKEY_MASK_RW; 276 277 if (atomic) 278 result |= MLX5_MKEY_MASK_A; 279 280 if (relaxed_ordering_write) 281 result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE; 282 283 if (relaxed_ordering_read) 284 result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ; 285 286 return cpu_to_be64(result); 287 } 288 289 static __be64 get_umr_update_pd_mask(void) 290 { 291 u64 result; 292 293 result = MLX5_MKEY_MASK_PD; 294 295 return cpu_to_be64(result); 296 } 297 298 static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) 299 { 300 if (mask & MLX5_MKEY_MASK_PAGE_SIZE && 301 MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) 302 return -EPERM; 303 304 if (mask & MLX5_MKEY_MASK_A && 305 MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) 306 return -EPERM; 307 308 if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE && 309 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 310 return -EPERM; 311 312 if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ && 313 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 314 return -EPERM; 315 316 return 0; 317 } 318 319 static int set_reg_umr_segment(struct mlx5_ib_dev *dev, 320 struct mlx5_wqe_umr_ctrl_seg *umr, 321 const struct ib_send_wr *wr) 322 { 323 const struct mlx5_umr_wr *umrwr = umr_wr(wr); 324 325 memset(umr, 0, sizeof(*umr)); 326 327 if (!umrwr->ignore_free_state) { 328 if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) 329 /* fail if free */ 330 umr->flags = MLX5_UMR_CHECK_FREE; 331 else 332 /* fail if not free */ 333 umr->flags = MLX5_UMR_CHECK_NOT_FREE; 334 } 335 336 umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size)); 337 if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) { 338 u64 offset = get_xlt_octo(umrwr->offset); 339 340 umr->xlt_offset = cpu_to_be16(offset & 0xffff); 341 umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16); 342 umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; 343 } 344 if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) 345 umr->mkey_mask |= get_umr_update_translation_mask(); 346 if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) { 347 umr->mkey_mask |= get_umr_update_access_mask( 348 !!(MLX5_CAP_GEN(dev->mdev, atomic)), 349 !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)), 350 !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))); 351 umr->mkey_mask |= get_umr_update_pd_mask(); 352 } 353 if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR) 354 umr->mkey_mask |= get_umr_enable_mr_mask(); 355 if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) 356 umr->mkey_mask |= get_umr_disable_mr_mask(); 357 358 if (!wr->num_sge) 359 umr->flags |= MLX5_UMR_INLINE; 360 361 return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask)); 362 } 363 364 static u8 get_umr_flags(int acc) 365 { 366 return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | 367 (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | 368 (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | 369 (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | 370 MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN; 371 } 372 373 static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, 374 struct mlx5_ib_mr *mr, 375 u32 key, int access) 376 { 377 int ndescs = ALIGN(mr->mmkey.ndescs + mr->meta_ndescs, 8) >> 1; 378 379 memset(seg, 0, sizeof(*seg)); 380 381 if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT) 382 seg->log2_page_size = ilog2(mr->ibmr.page_size); 383 else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) 384 /* KLMs take twice the size of MTTs */ 385 ndescs *= 2; 386 387 seg->flags = get_umr_flags(access) | mr->access_mode; 388 seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00); 389 seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); 390 seg->start_addr = cpu_to_be64(mr->ibmr.iova); 391 seg->len = cpu_to_be64(mr->ibmr.length); 392 seg->xlt_oct_size = cpu_to_be32(ndescs); 393 } 394 395 static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) 396 { 397 memset(seg, 0, sizeof(*seg)); 398 seg->status = MLX5_MKEY_STATUS_FREE; 399 } 400 401 static void set_reg_mkey_segment(struct mlx5_ib_dev *dev, 402 struct mlx5_mkey_seg *seg, 403 const struct ib_send_wr *wr) 404 { 405 const struct mlx5_umr_wr *umrwr = umr_wr(wr); 406 407 memset(seg, 0, sizeof(*seg)); 408 if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) 409 MLX5_SET(mkc, seg, free, 1); 410 411 MLX5_SET(mkc, seg, a, 412 !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC)); 413 MLX5_SET(mkc, seg, rw, 414 !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE)); 415 MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ)); 416 MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE)); 417 MLX5_SET(mkc, seg, lr, 1); 418 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 419 MLX5_SET(mkc, seg, relaxed_ordering_write, 420 !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); 421 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 422 MLX5_SET(mkc, seg, relaxed_ordering_read, 423 !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); 424 425 if (umrwr->pd) 426 MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn); 427 if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION && 428 !umrwr->length) 429 MLX5_SET(mkc, seg, length64, 1); 430 431 MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr); 432 MLX5_SET64(mkc, seg, len, umrwr->length); 433 MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift); 434 MLX5_SET(mkc, seg, qpn, 0xffffff); 435 MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey)); 436 } 437 438 static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, 439 struct mlx5_ib_mr *mr, 440 struct mlx5_ib_pd *pd) 441 { 442 int bcount = mr->desc_size * (mr->mmkey.ndescs + mr->meta_ndescs); 443 444 dseg->addr = cpu_to_be64(mr->desc_map); 445 dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64)); 446 dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); 447 } 448 449 static __be32 send_ieth(const struct ib_send_wr *wr) 450 { 451 switch (wr->opcode) { 452 case IB_WR_SEND_WITH_IMM: 453 case IB_WR_RDMA_WRITE_WITH_IMM: 454 return wr->ex.imm_data; 455 456 case IB_WR_SEND_WITH_INV: 457 return cpu_to_be32(wr->ex.invalidate_rkey); 458 459 default: 460 return 0; 461 } 462 } 463 464 static u8 calc_sig(void *wqe, int size) 465 { 466 u8 *p = wqe; 467 u8 res = 0; 468 int i; 469 470 for (i = 0; i < size; i++) 471 res ^= p[i]; 472 473 return ~res; 474 } 475 476 static u8 wq_sig(void *wqe) 477 { 478 return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4); 479 } 480 481 static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, 482 void **wqe, int *wqe_sz, void **cur_edge) 483 { 484 struct mlx5_wqe_inline_seg *seg; 485 size_t offset; 486 int inl = 0; 487 int i; 488 489 seg = *wqe; 490 *wqe += sizeof(*seg); 491 offset = sizeof(*seg); 492 493 for (i = 0; i < wr->num_sge; i++) { 494 size_t len = wr->sg_list[i].length; 495 void *addr = (void *)(unsigned long)(wr->sg_list[i].addr); 496 497 inl += len; 498 499 if (unlikely(inl > qp->max_inline_data)) 500 return -ENOMEM; 501 502 while (likely(len)) { 503 size_t leftlen; 504 size_t copysz; 505 506 handle_post_send_edge(&qp->sq, wqe, 507 *wqe_sz + (offset >> 4), 508 cur_edge); 509 510 leftlen = *cur_edge - *wqe; 511 copysz = min_t(size_t, leftlen, len); 512 513 memcpy(*wqe, addr, copysz); 514 len -= copysz; 515 addr += copysz; 516 *wqe += copysz; 517 offset += copysz; 518 } 519 } 520 521 seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); 522 523 *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16; 524 525 return 0; 526 } 527 528 static u16 prot_field_size(enum ib_signature_type type) 529 { 530 switch (type) { 531 case IB_SIG_TYPE_T10_DIF: 532 return MLX5_DIF_SIZE; 533 default: 534 return 0; 535 } 536 } 537 538 static u8 bs_selector(int block_size) 539 { 540 switch (block_size) { 541 case 512: return 0x1; 542 case 520: return 0x2; 543 case 4096: return 0x3; 544 case 4160: return 0x4; 545 case 1073741824: return 0x5; 546 default: return 0; 547 } 548 } 549 550 static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain, 551 struct mlx5_bsf_inl *inl) 552 { 553 /* Valid inline section and allow BSF refresh */ 554 inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID | 555 MLX5_BSF_REFRESH_DIF); 556 inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag); 557 inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag); 558 /* repeating block */ 559 inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK; 560 inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ? 561 MLX5_DIF_CRC : MLX5_DIF_IPCS; 562 563 if (domain->sig.dif.ref_remap) 564 inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG; 565 566 if (domain->sig.dif.app_escape) { 567 if (domain->sig.dif.ref_escape) 568 inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE; 569 else 570 inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE; 571 } 572 573 inl->dif_app_bitmask_check = 574 cpu_to_be16(domain->sig.dif.apptag_check_mask); 575 } 576 577 static int mlx5_set_bsf(struct ib_mr *sig_mr, 578 struct ib_sig_attrs *sig_attrs, 579 struct mlx5_bsf *bsf, u32 data_size) 580 { 581 struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig; 582 struct mlx5_bsf_basic *basic = &bsf->basic; 583 struct ib_sig_domain *mem = &sig_attrs->mem; 584 struct ib_sig_domain *wire = &sig_attrs->wire; 585 586 memset(bsf, 0, sizeof(*bsf)); 587 588 /* Basic + Extended + Inline */ 589 basic->bsf_size_sbs = 1 << 7; 590 /* Input domain check byte mask */ 591 basic->check_byte_mask = sig_attrs->check_mask; 592 basic->raw_data_size = cpu_to_be32(data_size); 593 594 /* Memory domain */ 595 switch (sig_attrs->mem.sig_type) { 596 case IB_SIG_TYPE_NONE: 597 break; 598 case IB_SIG_TYPE_T10_DIF: 599 basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); 600 basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx); 601 mlx5_fill_inl_bsf(mem, &bsf->m_inl); 602 break; 603 default: 604 return -EINVAL; 605 } 606 607 /* Wire domain */ 608 switch (sig_attrs->wire.sig_type) { 609 case IB_SIG_TYPE_NONE: 610 break; 611 case IB_SIG_TYPE_T10_DIF: 612 if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && 613 mem->sig_type == wire->sig_type) { 614 /* Same block structure */ 615 basic->bsf_size_sbs |= 1 << 4; 616 if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) 617 basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK; 618 if (mem->sig.dif.app_tag == wire->sig.dif.app_tag) 619 basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK; 620 if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag) 621 basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK; 622 } else 623 basic->wire.bs_selector = 624 bs_selector(wire->sig.dif.pi_interval); 625 626 basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx); 627 mlx5_fill_inl_bsf(wire, &bsf->w_inl); 628 break; 629 default: 630 return -EINVAL; 631 } 632 633 return 0; 634 } 635 636 637 static int set_sig_data_segment(const struct ib_send_wr *send_wr, 638 struct ib_mr *sig_mr, 639 struct ib_sig_attrs *sig_attrs, 640 struct mlx5_ib_qp *qp, void **seg, int *size, 641 void **cur_edge) 642 { 643 struct mlx5_bsf *bsf; 644 u32 data_len; 645 u32 data_key; 646 u64 data_va; 647 u32 prot_len = 0; 648 u32 prot_key = 0; 649 u64 prot_va = 0; 650 bool prot = false; 651 int ret; 652 int wqe_size; 653 struct mlx5_ib_mr *mr = to_mmr(sig_mr); 654 struct mlx5_ib_mr *pi_mr = mr->pi_mr; 655 656 data_len = pi_mr->data_length; 657 data_key = pi_mr->ibmr.lkey; 658 data_va = pi_mr->data_iova; 659 if (pi_mr->meta_ndescs) { 660 prot_len = pi_mr->meta_length; 661 prot_key = pi_mr->ibmr.lkey; 662 prot_va = pi_mr->pi_iova; 663 prot = true; 664 } 665 666 if (!prot || (data_key == prot_key && data_va == prot_va && 667 data_len == prot_len)) { 668 /** 669 * Source domain doesn't contain signature information 670 * or data and protection are interleaved in memory. 671 * So need construct: 672 * ------------------ 673 * | data_klm | 674 * ------------------ 675 * | BSF | 676 * ------------------ 677 **/ 678 struct mlx5_klm *data_klm = *seg; 679 680 data_klm->bcount = cpu_to_be32(data_len); 681 data_klm->key = cpu_to_be32(data_key); 682 data_klm->va = cpu_to_be64(data_va); 683 wqe_size = ALIGN(sizeof(*data_klm), 64); 684 } else { 685 /** 686 * Source domain contains signature information 687 * So need construct a strided block format: 688 * --------------------------- 689 * | stride_block_ctrl | 690 * --------------------------- 691 * | data_klm | 692 * --------------------------- 693 * | prot_klm | 694 * --------------------------- 695 * | BSF | 696 * --------------------------- 697 **/ 698 struct mlx5_stride_block_ctrl_seg *sblock_ctrl; 699 struct mlx5_stride_block_entry *data_sentry; 700 struct mlx5_stride_block_entry *prot_sentry; 701 u16 block_size = sig_attrs->mem.sig.dif.pi_interval; 702 int prot_size; 703 704 sblock_ctrl = *seg; 705 data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl); 706 prot_sentry = (void *)data_sentry + sizeof(*data_sentry); 707 708 prot_size = prot_field_size(sig_attrs->mem.sig_type); 709 if (!prot_size) { 710 pr_err("Bad block size given: %u\n", block_size); 711 return -EINVAL; 712 } 713 sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size + 714 prot_size); 715 sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP); 716 sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size); 717 sblock_ctrl->num_entries = cpu_to_be16(2); 718 719 data_sentry->bcount = cpu_to_be16(block_size); 720 data_sentry->key = cpu_to_be32(data_key); 721 data_sentry->va = cpu_to_be64(data_va); 722 data_sentry->stride = cpu_to_be16(block_size); 723 724 prot_sentry->bcount = cpu_to_be16(prot_size); 725 prot_sentry->key = cpu_to_be32(prot_key); 726 prot_sentry->va = cpu_to_be64(prot_va); 727 prot_sentry->stride = cpu_to_be16(prot_size); 728 729 wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + 730 sizeof(*prot_sentry), 64); 731 } 732 733 *seg += wqe_size; 734 *size += wqe_size / 16; 735 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 736 737 bsf = *seg; 738 ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); 739 if (ret) 740 return -EINVAL; 741 742 *seg += sizeof(*bsf); 743 *size += sizeof(*bsf) / 16; 744 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 745 746 return 0; 747 } 748 749 static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, 750 struct ib_mr *sig_mr, int access_flags, 751 u32 size, u32 length, u32 pdn) 752 { 753 u32 sig_key = sig_mr->rkey; 754 u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; 755 756 memset(seg, 0, sizeof(*seg)); 757 758 seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS; 759 seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); 760 seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | 761 MLX5_MKEY_BSF_EN | pdn); 762 seg->len = cpu_to_be64(length); 763 seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size)); 764 seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); 765 } 766 767 static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, 768 u32 size) 769 { 770 memset(umr, 0, sizeof(*umr)); 771 772 umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; 773 umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); 774 umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); 775 umr->mkey_mask = sig_mkey_mask(); 776 } 777 778 static int set_pi_umr_wr(const struct ib_send_wr *send_wr, 779 struct mlx5_ib_qp *qp, void **seg, int *size, 780 void **cur_edge) 781 { 782 const struct ib_reg_wr *wr = reg_wr(send_wr); 783 struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr); 784 struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr; 785 struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs; 786 u32 pdn = to_mpd(qp->ibqp.pd)->pdn; 787 u32 xlt_size; 788 int region_len, ret; 789 790 if (unlikely(send_wr->num_sge != 0) || 791 unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) || 792 unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) || 793 unlikely(!sig_mr->sig->sig_status_checked)) 794 return -EINVAL; 795 796 /* length of the protected region, data + protection */ 797 region_len = pi_mr->ibmr.length; 798 799 /** 800 * KLM octoword size - if protection was provided 801 * then we use strided block format (3 octowords), 802 * else we use single KLM (1 octoword) 803 **/ 804 if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE) 805 xlt_size = 0x30; 806 else 807 xlt_size = sizeof(struct mlx5_klm); 808 809 set_sig_umr_segment(*seg, xlt_size); 810 *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); 811 *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; 812 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 813 814 set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len, 815 pdn); 816 *seg += sizeof(struct mlx5_mkey_seg); 817 *size += sizeof(struct mlx5_mkey_seg) / 16; 818 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 819 820 ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size, 821 cur_edge); 822 if (ret) 823 return ret; 824 825 sig_mr->sig->sig_status_checked = false; 826 return 0; 827 } 828 829 static int set_psv_wr(struct ib_sig_domain *domain, 830 u32 psv_idx, void **seg, int *size) 831 { 832 struct mlx5_seg_set_psv *psv_seg = *seg; 833 834 memset(psv_seg, 0, sizeof(*psv_seg)); 835 psv_seg->psv_num = cpu_to_be32(psv_idx); 836 switch (domain->sig_type) { 837 case IB_SIG_TYPE_NONE: 838 break; 839 case IB_SIG_TYPE_T10_DIF: 840 psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | 841 domain->sig.dif.app_tag); 842 psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); 843 break; 844 default: 845 pr_err("Bad signature type (%d) is given.\n", 846 domain->sig_type); 847 return -EINVAL; 848 } 849 850 *seg += sizeof(*psv_seg); 851 *size += sizeof(*psv_seg) / 16; 852 853 return 0; 854 } 855 856 static int set_reg_wr(struct mlx5_ib_qp *qp, 857 const struct ib_reg_wr *wr, 858 void **seg, int *size, void **cur_edge, 859 bool check_not_free) 860 { 861 struct mlx5_ib_mr *mr = to_mmr(wr->mr); 862 struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); 863 struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); 864 int mr_list_size = (mr->mmkey.ndescs + mr->meta_ndescs) * mr->desc_size; 865 bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; 866 bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC; 867 u8 flags = 0; 868 869 /* Matches access in mlx5_set_umr_free_mkey(). 870 * Relaxed Ordering is set implicitly in mlx5_set_umr_free_mkey() and 871 * kernel ULPs are not aware of it, so we don't set it here. 872 */ 873 if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) { 874 mlx5_ib_warn( 875 to_mdev(qp->ibqp.device), 876 "Fast update for MR access flags is not possible\n"); 877 return -EINVAL; 878 } 879 880 if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { 881 mlx5_ib_warn(to_mdev(qp->ibqp.device), 882 "Invalid IB_SEND_INLINE send flag\n"); 883 return -EINVAL; 884 } 885 886 if (check_not_free) 887 flags |= MLX5_UMR_CHECK_NOT_FREE; 888 if (umr_inline) 889 flags |= MLX5_UMR_INLINE; 890 891 set_reg_umr_seg(*seg, mr, flags, atomic); 892 *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); 893 *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; 894 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 895 896 set_reg_mkey_seg(*seg, mr, wr->key, wr->access); 897 *seg += sizeof(struct mlx5_mkey_seg); 898 *size += sizeof(struct mlx5_mkey_seg) / 16; 899 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 900 901 if (umr_inline) { 902 memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs, 903 mr_list_size); 904 *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4); 905 } else { 906 set_reg_data_seg(*seg, mr, pd); 907 *seg += sizeof(struct mlx5_wqe_data_seg); 908 *size += (sizeof(struct mlx5_wqe_data_seg) / 16); 909 } 910 return 0; 911 } 912 913 static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size, 914 void **cur_edge) 915 { 916 set_linv_umr_seg(*seg); 917 *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); 918 *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; 919 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 920 set_linv_mkey_seg(*seg); 921 *seg += sizeof(struct mlx5_mkey_seg); 922 *size += sizeof(struct mlx5_mkey_seg) / 16; 923 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 924 } 925 926 static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16) 927 { 928 __be32 *p = NULL; 929 int i, j; 930 931 pr_debug("dump WQE index %u:\n", idx); 932 for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { 933 if ((i & 0xf) == 0) { 934 p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); 935 pr_debug("WQBB at %p:\n", (void *)p); 936 j = 0; 937 idx = (idx + 1) & (qp->sq.wqe_cnt - 1); 938 } 939 pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), 940 be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]), 941 be32_to_cpu(p[j + 3])); 942 } 943 } 944 945 static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, 946 struct mlx5_wqe_ctrl_seg **ctrl, 947 const struct ib_send_wr *wr, unsigned int *idx, 948 int *size, void **cur_edge, int nreq, 949 bool send_signaled, bool solicited) 950 { 951 if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) 952 return -ENOMEM; 953 954 *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); 955 *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx); 956 *ctrl = *seg; 957 *(uint32_t *)(*seg + 8) = 0; 958 (*ctrl)->imm = send_ieth(wr); 959 (*ctrl)->fm_ce_se = qp->sq_signal_bits | 960 (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | 961 (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); 962 963 *seg += sizeof(**ctrl); 964 *size = sizeof(**ctrl) / 16; 965 *cur_edge = qp->sq.cur_edge; 966 967 return 0; 968 } 969 970 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, 971 struct mlx5_wqe_ctrl_seg **ctrl, 972 const struct ib_send_wr *wr, unsigned int *idx, int *size, 973 void **cur_edge, int nreq) 974 { 975 return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, 976 wr->send_flags & IB_SEND_SIGNALED, 977 wr->send_flags & IB_SEND_SOLICITED); 978 } 979 980 static void finish_wqe(struct mlx5_ib_qp *qp, 981 struct mlx5_wqe_ctrl_seg *ctrl, 982 void *seg, u8 size, void *cur_edge, 983 unsigned int idx, u64 wr_id, int nreq, u8 fence, 984 u32 mlx5_opcode) 985 { 986 u8 opmod = 0; 987 988 ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | 989 mlx5_opcode | ((u32)opmod << 24)); 990 ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8)); 991 ctrl->fm_ce_se |= fence; 992 if (unlikely(qp->flags_en & MLX5_QP_FLAG_SIGNATURE)) 993 ctrl->signature = wq_sig(ctrl); 994 995 qp->sq.wrid[idx] = wr_id; 996 qp->sq.w_list[idx].opcode = mlx5_opcode; 997 qp->sq.wqe_head[idx] = qp->sq.head + nreq; 998 qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); 999 qp->sq.w_list[idx].next = qp->sq.cur_post; 1000 1001 /* We save the edge which was possibly updated during the WQE 1002 * construction, into SQ's cache. 1003 */ 1004 seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB); 1005 qp->sq.cur_edge = (unlikely(seg == cur_edge)) ? 1006 get_sq_edge(&qp->sq, qp->sq.cur_post & 1007 (qp->sq.wqe_cnt - 1)) : 1008 cur_edge; 1009 } 1010 1011 static void handle_rdma_op(const struct ib_send_wr *wr, void **seg, int *size) 1012 { 1013 set_raddr_seg(*seg, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey); 1014 *seg += sizeof(struct mlx5_wqe_raddr_seg); 1015 *size += sizeof(struct mlx5_wqe_raddr_seg) / 16; 1016 } 1017 1018 static void handle_local_inv(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, 1019 struct mlx5_wqe_ctrl_seg **ctrl, void **seg, 1020 int *size, void **cur_edge, unsigned int idx) 1021 { 1022 qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; 1023 (*ctrl)->imm = cpu_to_be32(wr->ex.invalidate_rkey); 1024 set_linv_wr(qp, seg, size, cur_edge); 1025 } 1026 1027 static int handle_reg_mr(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, 1028 struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, 1029 void **cur_edge, unsigned int idx) 1030 { 1031 qp->sq.wr_data[idx] = IB_WR_REG_MR; 1032 (*ctrl)->imm = cpu_to_be32(reg_wr(wr)->key); 1033 return set_reg_wr(qp, reg_wr(wr), seg, size, cur_edge, true); 1034 } 1035 1036 static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, 1037 const struct ib_send_wr *wr, 1038 struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, 1039 void **cur_edge, unsigned int *idx, int nreq, 1040 struct ib_sig_domain *domain, u32 psv_index, 1041 u8 next_fence) 1042 { 1043 int err; 1044 1045 /* 1046 * SET_PSV WQEs are not signaled and solicited on error. 1047 */ 1048 err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, 1049 false, true); 1050 if (unlikely(err)) { 1051 mlx5_ib_warn(dev, "\n"); 1052 err = -ENOMEM; 1053 goto out; 1054 } 1055 err = set_psv_wr(domain, psv_index, seg, size); 1056 if (unlikely(err)) { 1057 mlx5_ib_warn(dev, "\n"); 1058 goto out; 1059 } 1060 finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, 1061 next_fence, MLX5_OPCODE_SET_PSV); 1062 1063 out: 1064 return err; 1065 } 1066 1067 static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev, 1068 struct mlx5_ib_qp *qp, 1069 const struct ib_send_wr *wr, 1070 struct mlx5_wqe_ctrl_seg **ctrl, void **seg, 1071 int *size, void **cur_edge, 1072 unsigned int *idx, int nreq, u8 fence, 1073 u8 next_fence) 1074 { 1075 struct mlx5_ib_mr *mr; 1076 struct mlx5_ib_mr *pi_mr; 1077 struct mlx5_ib_mr pa_pi_mr; 1078 struct ib_sig_attrs *sig_attrs; 1079 struct ib_reg_wr reg_pi_wr; 1080 int err; 1081 1082 qp->sq.wr_data[*idx] = IB_WR_REG_MR_INTEGRITY; 1083 1084 mr = to_mmr(reg_wr(wr)->mr); 1085 pi_mr = mr->pi_mr; 1086 1087 if (pi_mr) { 1088 memset(®_pi_wr, 0, 1089 sizeof(struct ib_reg_wr)); 1090 1091 reg_pi_wr.mr = &pi_mr->ibmr; 1092 reg_pi_wr.access = reg_wr(wr)->access; 1093 reg_pi_wr.key = pi_mr->ibmr.rkey; 1094 1095 (*ctrl)->imm = cpu_to_be32(reg_pi_wr.key); 1096 /* UMR for data + prot registration */ 1097 err = set_reg_wr(qp, ®_pi_wr, seg, size, cur_edge, false); 1098 if (unlikely(err)) 1099 goto out; 1100 1101 finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, 1102 nreq, fence, MLX5_OPCODE_UMR); 1103 1104 err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq); 1105 if (unlikely(err)) { 1106 mlx5_ib_warn(dev, "\n"); 1107 err = -ENOMEM; 1108 goto out; 1109 } 1110 } else { 1111 memset(&pa_pi_mr, 0, sizeof(struct mlx5_ib_mr)); 1112 /* No UMR, use local_dma_lkey */ 1113 pa_pi_mr.ibmr.lkey = mr->ibmr.pd->local_dma_lkey; 1114 pa_pi_mr.mmkey.ndescs = mr->mmkey.ndescs; 1115 pa_pi_mr.data_length = mr->data_length; 1116 pa_pi_mr.data_iova = mr->data_iova; 1117 if (mr->meta_ndescs) { 1118 pa_pi_mr.meta_ndescs = mr->meta_ndescs; 1119 pa_pi_mr.meta_length = mr->meta_length; 1120 pa_pi_mr.pi_iova = mr->pi_iova; 1121 } 1122 1123 pa_pi_mr.ibmr.length = mr->ibmr.length; 1124 mr->pi_mr = &pa_pi_mr; 1125 } 1126 (*ctrl)->imm = cpu_to_be32(mr->ibmr.rkey); 1127 /* UMR for sig MR */ 1128 err = set_pi_umr_wr(wr, qp, seg, size, cur_edge); 1129 if (unlikely(err)) { 1130 mlx5_ib_warn(dev, "\n"); 1131 goto out; 1132 } 1133 finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, 1134 fence, MLX5_OPCODE_UMR); 1135 1136 sig_attrs = mr->ibmr.sig_attrs; 1137 err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq, 1138 &sig_attrs->mem, mr->sig->psv_memory.psv_idx, 1139 next_fence); 1140 if (unlikely(err)) 1141 goto out; 1142 1143 err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq, 1144 &sig_attrs->wire, mr->sig->psv_wire.psv_idx, 1145 next_fence); 1146 if (unlikely(err)) 1147 goto out; 1148 1149 qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; 1150 1151 out: 1152 return err; 1153 } 1154 1155 static int handle_qpt_rc(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, 1156 const struct ib_send_wr *wr, 1157 struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, 1158 void **cur_edge, unsigned int *idx, int nreq, u8 fence, 1159 u8 next_fence, int *num_sge) 1160 { 1161 int err = 0; 1162 1163 switch (wr->opcode) { 1164 case IB_WR_RDMA_READ: 1165 case IB_WR_RDMA_WRITE: 1166 case IB_WR_RDMA_WRITE_WITH_IMM: 1167 handle_rdma_op(wr, seg, size); 1168 break; 1169 1170 case IB_WR_ATOMIC_CMP_AND_SWP: 1171 case IB_WR_ATOMIC_FETCH_AND_ADD: 1172 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: 1173 mlx5_ib_warn(dev, "Atomic operations are not supported yet\n"); 1174 err = -EOPNOTSUPP; 1175 goto out; 1176 1177 case IB_WR_LOCAL_INV: 1178 handle_local_inv(qp, wr, ctrl, seg, size, cur_edge, *idx); 1179 *num_sge = 0; 1180 break; 1181 1182 case IB_WR_REG_MR: 1183 err = handle_reg_mr(qp, wr, ctrl, seg, size, cur_edge, *idx); 1184 if (unlikely(err)) 1185 goto out; 1186 *num_sge = 0; 1187 break; 1188 1189 case IB_WR_REG_MR_INTEGRITY: 1190 err = handle_reg_mr_integrity(dev, qp, wr, ctrl, seg, size, 1191 cur_edge, idx, nreq, fence, 1192 next_fence); 1193 if (unlikely(err)) 1194 goto out; 1195 *num_sge = 0; 1196 break; 1197 1198 default: 1199 break; 1200 } 1201 1202 out: 1203 return err; 1204 } 1205 1206 static void handle_qpt_uc(const struct ib_send_wr *wr, void **seg, int *size) 1207 { 1208 switch (wr->opcode) { 1209 case IB_WR_RDMA_WRITE: 1210 case IB_WR_RDMA_WRITE_WITH_IMM: 1211 handle_rdma_op(wr, seg, size); 1212 break; 1213 default: 1214 break; 1215 } 1216 } 1217 1218 static void handle_qpt_hw_gsi(struct mlx5_ib_qp *qp, 1219 const struct ib_send_wr *wr, void **seg, 1220 int *size, void **cur_edge) 1221 { 1222 set_datagram_seg(*seg, wr); 1223 *seg += sizeof(struct mlx5_wqe_datagram_seg); 1224 *size += sizeof(struct mlx5_wqe_datagram_seg) / 16; 1225 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 1226 } 1227 1228 static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, 1229 void **seg, int *size, void **cur_edge) 1230 { 1231 set_datagram_seg(*seg, wr); 1232 *seg += sizeof(struct mlx5_wqe_datagram_seg); 1233 *size += sizeof(struct mlx5_wqe_datagram_seg) / 16; 1234 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 1235 1236 /* handle qp that supports ud offload */ 1237 if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { 1238 struct mlx5_wqe_eth_pad *pad; 1239 1240 pad = *seg; 1241 memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); 1242 *seg += sizeof(struct mlx5_wqe_eth_pad); 1243 *size += sizeof(struct mlx5_wqe_eth_pad) / 16; 1244 set_eth_seg(wr, qp, seg, size, cur_edge); 1245 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 1246 } 1247 } 1248 1249 static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, 1250 const struct ib_send_wr *wr, 1251 struct mlx5_wqe_ctrl_seg **ctrl, void **seg, 1252 int *size, void **cur_edge, unsigned int idx) 1253 { 1254 int err = 0; 1255 1256 if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) { 1257 err = -EINVAL; 1258 mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode); 1259 goto out; 1260 } 1261 1262 qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; 1263 (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey); 1264 err = set_reg_umr_segment(dev, *seg, wr); 1265 if (unlikely(err)) 1266 goto out; 1267 *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); 1268 *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; 1269 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 1270 set_reg_mkey_segment(dev, *seg, wr); 1271 *seg += sizeof(struct mlx5_mkey_seg); 1272 *size += sizeof(struct mlx5_mkey_seg) / 16; 1273 handle_post_send_edge(&qp->sq, seg, *size, cur_edge); 1274 out: 1275 return err; 1276 } 1277 1278 int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, 1279 const struct ib_send_wr **bad_wr, bool drain) 1280 { 1281 struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ 1282 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 1283 struct mlx5_core_dev *mdev = dev->mdev; 1284 struct mlx5_ib_qp *qp = to_mqp(ibqp); 1285 struct mlx5_wqe_xrc_seg *xrc; 1286 struct mlx5_bf *bf; 1287 void *cur_edge; 1288 int size; 1289 unsigned long flags; 1290 unsigned int idx; 1291 int err = 0; 1292 int num_sge; 1293 void *seg; 1294 int nreq; 1295 int i; 1296 u8 next_fence = 0; 1297 u8 fence; 1298 1299 if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && 1300 !drain)) { 1301 *bad_wr = wr; 1302 return -EIO; 1303 } 1304 1305 if (qp->type == IB_QPT_GSI) 1306 return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); 1307 1308 bf = &qp->bf; 1309 1310 spin_lock_irqsave(&qp->sq.lock, flags); 1311 1312 for (nreq = 0; wr; nreq++, wr = wr->next) { 1313 if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { 1314 mlx5_ib_warn(dev, "\n"); 1315 err = -EINVAL; 1316 *bad_wr = wr; 1317 goto out; 1318 } 1319 1320 num_sge = wr->num_sge; 1321 if (unlikely(num_sge > qp->sq.max_gs)) { 1322 mlx5_ib_warn(dev, "\n"); 1323 err = -EINVAL; 1324 *bad_wr = wr; 1325 goto out; 1326 } 1327 1328 err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge, 1329 nreq); 1330 if (err) { 1331 mlx5_ib_warn(dev, "\n"); 1332 err = -ENOMEM; 1333 *bad_wr = wr; 1334 goto out; 1335 } 1336 1337 if (wr->opcode == IB_WR_REG_MR || 1338 wr->opcode == IB_WR_REG_MR_INTEGRITY) { 1339 fence = dev->umr_fence; 1340 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; 1341 } else { 1342 if (wr->send_flags & IB_SEND_FENCE) { 1343 if (qp->next_fence) 1344 fence = MLX5_FENCE_MODE_SMALL_AND_FENCE; 1345 else 1346 fence = MLX5_FENCE_MODE_FENCE; 1347 } else { 1348 fence = qp->next_fence; 1349 } 1350 } 1351 1352 switch (qp->type) { 1353 case IB_QPT_XRC_INI: 1354 xrc = seg; 1355 seg += sizeof(*xrc); 1356 size += sizeof(*xrc) / 16; 1357 fallthrough; 1358 case IB_QPT_RC: 1359 err = handle_qpt_rc(dev, qp, wr, &ctrl, &seg, &size, 1360 &cur_edge, &idx, nreq, fence, 1361 next_fence, &num_sge); 1362 if (unlikely(err)) { 1363 *bad_wr = wr; 1364 goto out; 1365 } else if (wr->opcode == IB_WR_REG_MR_INTEGRITY) { 1366 goto skip_psv; 1367 } 1368 break; 1369 1370 case IB_QPT_UC: 1371 handle_qpt_uc(wr, &seg, &size); 1372 break; 1373 case IB_QPT_SMI: 1374 if (unlikely(!dev->port_caps[qp->port - 1].has_smi)) { 1375 mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n"); 1376 err = -EPERM; 1377 *bad_wr = wr; 1378 goto out; 1379 } 1380 fallthrough; 1381 case MLX5_IB_QPT_HW_GSI: 1382 handle_qpt_hw_gsi(qp, wr, &seg, &size, &cur_edge); 1383 break; 1384 case IB_QPT_UD: 1385 handle_qpt_ud(qp, wr, &seg, &size, &cur_edge); 1386 break; 1387 case MLX5_IB_QPT_REG_UMR: 1388 err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg, 1389 &size, &cur_edge, idx); 1390 if (unlikely(err)) 1391 goto out; 1392 break; 1393 1394 default: 1395 break; 1396 } 1397 1398 if (wr->send_flags & IB_SEND_INLINE && num_sge) { 1399 err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge); 1400 if (unlikely(err)) { 1401 mlx5_ib_warn(dev, "\n"); 1402 *bad_wr = wr; 1403 goto out; 1404 } 1405 } else { 1406 for (i = 0; i < num_sge; i++) { 1407 handle_post_send_edge(&qp->sq, &seg, size, 1408 &cur_edge); 1409 if (unlikely(!wr->sg_list[i].length)) 1410 continue; 1411 1412 set_data_ptr_seg( 1413 (struct mlx5_wqe_data_seg *)seg, 1414 wr->sg_list + i); 1415 size += sizeof(struct mlx5_wqe_data_seg) / 16; 1416 seg += sizeof(struct mlx5_wqe_data_seg); 1417 } 1418 } 1419 1420 qp->next_fence = next_fence; 1421 finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq, 1422 fence, mlx5_ib_opcode[wr->opcode]); 1423 skip_psv: 1424 if (0) 1425 dump_wqe(qp, idx, size); 1426 } 1427 1428 out: 1429 if (likely(nreq)) { 1430 qp->sq.head += nreq; 1431 1432 /* Make sure that descriptors are written before 1433 * updating doorbell record and ringing the doorbell 1434 */ 1435 wmb(); 1436 1437 qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); 1438 1439 /* Make sure doorbell record is visible to the HCA before 1440 * we hit doorbell. 1441 */ 1442 wmb(); 1443 1444 mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); 1445 /* Make sure doorbells don't leak out of SQ spinlock 1446 * and reach the HCA out of order. 1447 */ 1448 bf->offset ^= bf->buf_size; 1449 } 1450 1451 spin_unlock_irqrestore(&qp->sq.lock, flags); 1452 1453 return err; 1454 } 1455 1456 static void set_sig_seg(struct mlx5_rwqe_sig *sig, int max_gs) 1457 { 1458 sig->signature = calc_sig(sig, (max_gs + 1) << 2); 1459 } 1460 1461 int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, 1462 const struct ib_recv_wr **bad_wr, bool drain) 1463 { 1464 struct mlx5_ib_qp *qp = to_mqp(ibqp); 1465 struct mlx5_wqe_data_seg *scat; 1466 struct mlx5_rwqe_sig *sig; 1467 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 1468 struct mlx5_core_dev *mdev = dev->mdev; 1469 unsigned long flags; 1470 int err = 0; 1471 int nreq; 1472 int ind; 1473 int i; 1474 1475 if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && 1476 !drain)) { 1477 *bad_wr = wr; 1478 return -EIO; 1479 } 1480 1481 if (qp->type == IB_QPT_GSI) 1482 return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr); 1483 1484 spin_lock_irqsave(&qp->rq.lock, flags); 1485 1486 ind = qp->rq.head & (qp->rq.wqe_cnt - 1); 1487 1488 for (nreq = 0; wr; nreq++, wr = wr->next) { 1489 if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { 1490 err = -ENOMEM; 1491 *bad_wr = wr; 1492 goto out; 1493 } 1494 1495 if (unlikely(wr->num_sge > qp->rq.max_gs)) { 1496 err = -EINVAL; 1497 *bad_wr = wr; 1498 goto out; 1499 } 1500 1501 scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind); 1502 if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) 1503 scat++; 1504 1505 for (i = 0; i < wr->num_sge; i++) 1506 set_data_ptr_seg(scat + i, wr->sg_list + i); 1507 1508 if (i < qp->rq.max_gs) { 1509 scat[i].byte_count = 0; 1510 scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY); 1511 scat[i].addr = 0; 1512 } 1513 1514 if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) { 1515 sig = (struct mlx5_rwqe_sig *)scat; 1516 set_sig_seg(sig, qp->rq.max_gs); 1517 } 1518 1519 qp->rq.wrid[ind] = wr->wr_id; 1520 1521 ind = (ind + 1) & (qp->rq.wqe_cnt - 1); 1522 } 1523 1524 out: 1525 if (likely(nreq)) { 1526 qp->rq.head += nreq; 1527 1528 /* Make sure that descriptors are written before 1529 * doorbell record. 1530 */ 1531 wmb(); 1532 1533 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); 1534 } 1535 1536 spin_unlock_irqrestore(&qp->rq.lock, flags); 1537 1538 return err; 1539 } 1540