1 /* QLogic qedr NIC Driver 2 * Copyright (c) 2015-2016 QLogic Corporation 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and /or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #include <linux/dma-mapping.h> 33 #include <linux/crc32.h> 34 #include <net/ip.h> 35 #include <net/ipv6.h> 36 #include <net/udp.h> 37 #include <linux/iommu.h> 38 39 #include <rdma/ib_verbs.h> 40 #include <rdma/ib_user_verbs.h> 41 #include <rdma/iw_cm.h> 42 #include <rdma/ib_umem.h> 43 #include <rdma/ib_addr.h> 44 #include <rdma/ib_cache.h> 45 46 #include <linux/qed/common_hsi.h> 47 #include "qedr_hsi_rdma.h" 48 #include <linux/qed/qed_if.h> 49 #include "qedr.h" 50 #include "verbs.h" 51 #include <rdma/qedr-abi.h> 52 #include "qedr_roce_cm.h" 53 54 #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) 55 56 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, 57 size_t len) 58 { 59 size_t min_len = min_t(size_t, len, udata->outlen); 60 61 return ib_copy_to_udata(udata, src, min_len); 62 } 63 64 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) 65 { 66 if (index > QEDR_ROCE_PKEY_TABLE_LEN) 67 return -EINVAL; 68 69 *pkey = QEDR_ROCE_PKEY_DEFAULT; 70 return 0; 71 } 72 73 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port, 74 int index, union ib_gid *sgid) 75 { 76 struct qedr_dev *dev = get_qedr_dev(ibdev); 77 78 memset(sgid->raw, 0, sizeof(sgid->raw)); 79 ether_addr_copy(sgid->raw, dev->ndev->dev_addr); 80 81 DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index, 82 sgid->global.interface_id, sgid->global.subnet_prefix); 83 84 return 0; 85 } 86 87 int qedr_query_device(struct ib_device *ibdev, 88 struct ib_device_attr *attr, struct ib_udata *udata) 89 { 90 struct qedr_dev *dev = get_qedr_dev(ibdev); 91 struct qedr_device_attr *qattr = &dev->attr; 92 93 if (!dev->rdma_ctx) { 94 DP_ERR(dev, 95 "qedr_query_device called with invalid params rdma_ctx=%p\n", 96 dev->rdma_ctx); 97 return -EINVAL; 98 } 99 100 memset(attr, 0, sizeof(*attr)); 101 102 attr->fw_ver = qattr->fw_ver; 103 attr->sys_image_guid = qattr->sys_image_guid; 104 attr->max_mr_size = qattr->max_mr_size; 105 attr->page_size_cap = qattr->page_size_caps; 106 attr->vendor_id = qattr->vendor_id; 107 attr->vendor_part_id = qattr->vendor_part_id; 108 attr->hw_ver = qattr->hw_ver; 109 attr->max_qp = qattr->max_qp; 110 attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe); 111 attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD | 112 IB_DEVICE_RC_RNR_NAK_GEN | 113 IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; 114 115 attr->max_sge = qattr->max_sge; 116 attr->max_sge_rd = qattr->max_sge; 117 attr->max_cq = qattr->max_cq; 118 attr->max_cqe = qattr->max_cqe; 119 attr->max_mr = qattr->max_mr; 120 attr->max_mw = qattr->max_mw; 121 attr->max_pd = qattr->max_pd; 122 attr->atomic_cap = dev->atomic_cap; 123 attr->max_fmr = qattr->max_fmr; 124 attr->max_map_per_fmr = 16; 125 attr->max_qp_init_rd_atom = 126 1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1); 127 attr->max_qp_rd_atom = 128 min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1), 129 attr->max_qp_init_rd_atom); 130 131 attr->max_srq = qattr->max_srq; 132 attr->max_srq_sge = qattr->max_srq_sge; 133 attr->max_srq_wr = qattr->max_srq_wr; 134 135 attr->local_ca_ack_delay = qattr->dev_ack_delay; 136 attr->max_fast_reg_page_list_len = qattr->max_mr / 8; 137 attr->max_pkeys = QEDR_ROCE_PKEY_MAX; 138 attr->max_ah = qattr->max_ah; 139 140 return 0; 141 } 142 143 #define QEDR_SPEED_SDR (1) 144 #define QEDR_SPEED_DDR (2) 145 #define QEDR_SPEED_QDR (4) 146 #define QEDR_SPEED_FDR10 (8) 147 #define QEDR_SPEED_FDR (16) 148 #define QEDR_SPEED_EDR (32) 149 150 static inline void get_link_speed_and_width(int speed, u8 *ib_speed, 151 u8 *ib_width) 152 { 153 switch (speed) { 154 case 1000: 155 *ib_speed = QEDR_SPEED_SDR; 156 *ib_width = IB_WIDTH_1X; 157 break; 158 case 10000: 159 *ib_speed = QEDR_SPEED_QDR; 160 *ib_width = IB_WIDTH_1X; 161 break; 162 163 case 20000: 164 *ib_speed = QEDR_SPEED_DDR; 165 *ib_width = IB_WIDTH_4X; 166 break; 167 168 case 25000: 169 *ib_speed = QEDR_SPEED_EDR; 170 *ib_width = IB_WIDTH_1X; 171 break; 172 173 case 40000: 174 *ib_speed = QEDR_SPEED_QDR; 175 *ib_width = IB_WIDTH_4X; 176 break; 177 178 case 50000: 179 *ib_speed = QEDR_SPEED_QDR; 180 *ib_width = IB_WIDTH_4X; 181 break; 182 183 case 100000: 184 *ib_speed = QEDR_SPEED_EDR; 185 *ib_width = IB_WIDTH_4X; 186 break; 187 188 default: 189 /* Unsupported */ 190 *ib_speed = QEDR_SPEED_SDR; 191 *ib_width = IB_WIDTH_1X; 192 } 193 } 194 195 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) 196 { 197 struct qedr_dev *dev; 198 struct qed_rdma_port *rdma_port; 199 200 dev = get_qedr_dev(ibdev); 201 if (port > 1) { 202 DP_ERR(dev, "invalid_port=0x%x\n", port); 203 return -EINVAL; 204 } 205 206 if (!dev->rdma_ctx) { 207 DP_ERR(dev, "rdma_ctx is NULL\n"); 208 return -EINVAL; 209 } 210 211 rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx); 212 213 /* *attr being zeroed by the caller, avoid zeroing it here */ 214 if (rdma_port->port_state == QED_RDMA_PORT_UP) { 215 attr->state = IB_PORT_ACTIVE; 216 attr->phys_state = 5; 217 } else { 218 attr->state = IB_PORT_DOWN; 219 attr->phys_state = 3; 220 } 221 attr->max_mtu = IB_MTU_4096; 222 attr->active_mtu = iboe_get_mtu(dev->ndev->mtu); 223 attr->lid = 0; 224 attr->lmc = 0; 225 attr->sm_lid = 0; 226 attr->sm_sl = 0; 227 attr->port_cap_flags = IB_PORT_IP_BASED_GIDS; 228 if (rdma_protocol_iwarp(&dev->ibdev, 1)) { 229 attr->gid_tbl_len = 1; 230 attr->pkey_tbl_len = 1; 231 } else { 232 attr->gid_tbl_len = QEDR_MAX_SGID; 233 attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN; 234 } 235 attr->bad_pkey_cntr = rdma_port->pkey_bad_counter; 236 attr->qkey_viol_cntr = 0; 237 get_link_speed_and_width(rdma_port->link_speed, 238 &attr->active_speed, &attr->active_width); 239 attr->max_msg_sz = rdma_port->max_msg_size; 240 attr->max_vl_num = 4; 241 242 return 0; 243 } 244 245 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask, 246 struct ib_port_modify *props) 247 { 248 struct qedr_dev *dev; 249 250 dev = get_qedr_dev(ibdev); 251 if (port > 1) { 252 DP_ERR(dev, "invalid_port=0x%x\n", port); 253 return -EINVAL; 254 } 255 256 return 0; 257 } 258 259 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr, 260 unsigned long len) 261 { 262 struct qedr_mm *mm; 263 264 mm = kzalloc(sizeof(*mm), GFP_KERNEL); 265 if (!mm) 266 return -ENOMEM; 267 268 mm->key.phy_addr = phy_addr; 269 /* This function might be called with a length which is not a multiple 270 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel 271 * forces this granularity by increasing the requested size if needed. 272 * When qedr_mmap is called, it will search the list with the updated 273 * length as a key. To prevent search failures, the length is rounded up 274 * in advance to PAGE_SIZE. 275 */ 276 mm->key.len = roundup(len, PAGE_SIZE); 277 INIT_LIST_HEAD(&mm->entry); 278 279 mutex_lock(&uctx->mm_list_lock); 280 list_add(&mm->entry, &uctx->mm_head); 281 mutex_unlock(&uctx->mm_list_lock); 282 283 DP_DEBUG(uctx->dev, QEDR_MSG_MISC, 284 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n", 285 (unsigned long long)mm->key.phy_addr, 286 (unsigned long)mm->key.len, uctx); 287 288 return 0; 289 } 290 291 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr, 292 unsigned long len) 293 { 294 bool found = false; 295 struct qedr_mm *mm; 296 297 mutex_lock(&uctx->mm_list_lock); 298 list_for_each_entry(mm, &uctx->mm_head, entry) { 299 if (len != mm->key.len || phy_addr != mm->key.phy_addr) 300 continue; 301 302 found = true; 303 break; 304 } 305 mutex_unlock(&uctx->mm_list_lock); 306 DP_DEBUG(uctx->dev, QEDR_MSG_MISC, 307 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n", 308 mm->key.phy_addr, mm->key.len, uctx, found); 309 310 return found; 311 } 312 313 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev, 314 struct ib_udata *udata) 315 { 316 int rc; 317 struct qedr_ucontext *ctx; 318 struct qedr_alloc_ucontext_resp uresp; 319 struct qedr_dev *dev = get_qedr_dev(ibdev); 320 struct qed_rdma_add_user_out_params oparams; 321 322 if (!udata) 323 return ERR_PTR(-EFAULT); 324 325 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 326 if (!ctx) 327 return ERR_PTR(-ENOMEM); 328 329 rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams); 330 if (rc) { 331 DP_ERR(dev, 332 "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n", 333 rc); 334 goto err; 335 } 336 337 ctx->dpi = oparams.dpi; 338 ctx->dpi_addr = oparams.dpi_addr; 339 ctx->dpi_phys_addr = oparams.dpi_phys_addr; 340 ctx->dpi_size = oparams.dpi_size; 341 INIT_LIST_HEAD(&ctx->mm_head); 342 mutex_init(&ctx->mm_list_lock); 343 344 memset(&uresp, 0, sizeof(uresp)); 345 346 uresp.dpm_enabled = dev->user_dpm_enabled; 347 uresp.wids_enabled = 1; 348 uresp.wid_count = oparams.wid_count; 349 uresp.db_pa = ctx->dpi_phys_addr; 350 uresp.db_size = ctx->dpi_size; 351 uresp.max_send_wr = dev->attr.max_sqe; 352 uresp.max_recv_wr = dev->attr.max_rqe; 353 uresp.max_srq_wr = dev->attr.max_srq_wr; 354 uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE; 355 uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE; 356 uresp.sges_per_srq_wr = dev->attr.max_srq_sge; 357 uresp.max_cqes = QEDR_MAX_CQES; 358 359 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 360 if (rc) 361 goto err; 362 363 ctx->dev = dev; 364 365 rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size); 366 if (rc) 367 goto err; 368 369 DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n", 370 &ctx->ibucontext); 371 return &ctx->ibucontext; 372 373 err: 374 kfree(ctx); 375 return ERR_PTR(rc); 376 } 377 378 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx) 379 { 380 struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx); 381 struct qedr_mm *mm, *tmp; 382 int status = 0; 383 384 DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n", 385 uctx); 386 uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi); 387 388 list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { 389 DP_DEBUG(uctx->dev, QEDR_MSG_MISC, 390 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n", 391 mm->key.phy_addr, mm->key.len, uctx); 392 list_del(&mm->entry); 393 kfree(mm); 394 } 395 396 kfree(uctx); 397 return status; 398 } 399 400 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) 401 { 402 struct qedr_ucontext *ucontext = get_qedr_ucontext(context); 403 struct qedr_dev *dev = get_qedr_dev(context->device); 404 unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT; 405 unsigned long len = (vma->vm_end - vma->vm_start); 406 unsigned long dpi_start; 407 408 dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size); 409 410 DP_DEBUG(dev, QEDR_MSG_INIT, 411 "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n", 412 (void *)vma->vm_start, (void *)vma->vm_end, 413 (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size); 414 415 if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) { 416 DP_ERR(dev, 417 "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n", 418 (void *)vma->vm_start, (void *)vma->vm_end); 419 return -EINVAL; 420 } 421 422 if (!qedr_search_mmap(ucontext, phys_addr, len)) { 423 DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n", 424 vma->vm_pgoff); 425 return -EINVAL; 426 } 427 428 if (phys_addr < dpi_start || 429 ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) { 430 DP_ERR(dev, 431 "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n", 432 (void *)phys_addr, (void *)dpi_start, 433 ucontext->dpi_size); 434 return -EINVAL; 435 } 436 437 if (vma->vm_flags & VM_READ) { 438 DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n"); 439 return -EINVAL; 440 } 441 442 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 443 return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, 444 vma->vm_page_prot); 445 } 446 447 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, 448 struct ib_ucontext *context, struct ib_udata *udata) 449 { 450 struct qedr_dev *dev = get_qedr_dev(ibdev); 451 struct qedr_pd *pd; 452 u16 pd_id; 453 int rc; 454 455 DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n", 456 (udata && context) ? "User Lib" : "Kernel"); 457 458 if (!dev->rdma_ctx) { 459 DP_ERR(dev, "invalid RDMA context\n"); 460 return ERR_PTR(-EINVAL); 461 } 462 463 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 464 if (!pd) 465 return ERR_PTR(-ENOMEM); 466 467 rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id); 468 if (rc) 469 goto err; 470 471 pd->pd_id = pd_id; 472 473 if (udata && context) { 474 struct qedr_alloc_pd_uresp uresp = { 475 .pd_id = pd_id, 476 }; 477 478 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 479 if (rc) { 480 DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id); 481 dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id); 482 goto err; 483 } 484 485 pd->uctx = get_qedr_ucontext(context); 486 pd->uctx->pd = pd; 487 } 488 489 return &pd->ibpd; 490 491 err: 492 kfree(pd); 493 return ERR_PTR(rc); 494 } 495 496 int qedr_dealloc_pd(struct ib_pd *ibpd) 497 { 498 struct qedr_dev *dev = get_qedr_dev(ibpd->device); 499 struct qedr_pd *pd = get_qedr_pd(ibpd); 500 501 if (!pd) { 502 pr_err("Invalid PD received in dealloc_pd\n"); 503 return -EINVAL; 504 } 505 506 DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id); 507 dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id); 508 509 kfree(pd); 510 511 return 0; 512 } 513 514 static void qedr_free_pbl(struct qedr_dev *dev, 515 struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl) 516 { 517 struct pci_dev *pdev = dev->pdev; 518 int i; 519 520 for (i = 0; i < pbl_info->num_pbls; i++) { 521 if (!pbl[i].va) 522 continue; 523 dma_free_coherent(&pdev->dev, pbl_info->pbl_size, 524 pbl[i].va, pbl[i].pa); 525 } 526 527 kfree(pbl); 528 } 529 530 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024) 531 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024) 532 533 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64)) 534 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE) 535 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE) 536 537 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev, 538 struct qedr_pbl_info *pbl_info, 539 gfp_t flags) 540 { 541 struct pci_dev *pdev = dev->pdev; 542 struct qedr_pbl *pbl_table; 543 dma_addr_t *pbl_main_tbl; 544 dma_addr_t pa; 545 void *va; 546 int i; 547 548 pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags); 549 if (!pbl_table) 550 return ERR_PTR(-ENOMEM); 551 552 for (i = 0; i < pbl_info->num_pbls; i++) { 553 va = dma_zalloc_coherent(&pdev->dev, pbl_info->pbl_size, 554 &pa, flags); 555 if (!va) 556 goto err; 557 558 pbl_table[i].va = va; 559 pbl_table[i].pa = pa; 560 } 561 562 /* Two-Layer PBLs, if we have more than one pbl we need to initialize 563 * the first one with physical pointers to all of the rest 564 */ 565 pbl_main_tbl = (dma_addr_t *)pbl_table[0].va; 566 for (i = 0; i < pbl_info->num_pbls - 1; i++) 567 pbl_main_tbl[i] = pbl_table[i + 1].pa; 568 569 return pbl_table; 570 571 err: 572 for (i--; i >= 0; i--) 573 dma_free_coherent(&pdev->dev, pbl_info->pbl_size, 574 pbl_table[i].va, pbl_table[i].pa); 575 576 qedr_free_pbl(dev, pbl_info, pbl_table); 577 578 return ERR_PTR(-ENOMEM); 579 } 580 581 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev, 582 struct qedr_pbl_info *pbl_info, 583 u32 num_pbes, int two_layer_capable) 584 { 585 u32 pbl_capacity; 586 u32 pbl_size; 587 u32 num_pbls; 588 589 if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) { 590 if (num_pbes > MAX_PBES_TWO_LAYER) { 591 DP_ERR(dev, "prepare pbl table: too many pages %d\n", 592 num_pbes); 593 return -EINVAL; 594 } 595 596 /* calculate required pbl page size */ 597 pbl_size = MIN_FW_PBL_PAGE_SIZE; 598 pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) * 599 NUM_PBES_ON_PAGE(pbl_size); 600 601 while (pbl_capacity < num_pbes) { 602 pbl_size *= 2; 603 pbl_capacity = pbl_size / sizeof(u64); 604 pbl_capacity = pbl_capacity * pbl_capacity; 605 } 606 607 num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size)); 608 num_pbls++; /* One for the layer0 ( points to the pbls) */ 609 pbl_info->two_layered = true; 610 } else { 611 /* One layered PBL */ 612 num_pbls = 1; 613 pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE, 614 roundup_pow_of_two((num_pbes * sizeof(u64)))); 615 pbl_info->two_layered = false; 616 } 617 618 pbl_info->num_pbls = num_pbls; 619 pbl_info->pbl_size = pbl_size; 620 pbl_info->num_pbes = num_pbes; 621 622 DP_DEBUG(dev, QEDR_MSG_MR, 623 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n", 624 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size); 625 626 return 0; 627 } 628 629 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, 630 struct qedr_pbl *pbl, 631 struct qedr_pbl_info *pbl_info, u32 pg_shift) 632 { 633 int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0; 634 u32 fw_pg_cnt, fw_pg_per_umem_pg; 635 struct qedr_pbl *pbl_tbl; 636 struct scatterlist *sg; 637 struct regpair *pbe; 638 u64 pg_addr; 639 int entry; 640 641 if (!pbl_info->num_pbes) 642 return; 643 644 /* If we have a two layered pbl, the first pbl points to the rest 645 * of the pbls and the first entry lays on the second pbl in the table 646 */ 647 if (pbl_info->two_layered) 648 pbl_tbl = &pbl[1]; 649 else 650 pbl_tbl = pbl; 651 652 pbe = (struct regpair *)pbl_tbl->va; 653 if (!pbe) { 654 DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n"); 655 return; 656 } 657 658 pbe_cnt = 0; 659 660 shift = umem->page_shift; 661 662 fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift); 663 664 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 665 pages = sg_dma_len(sg) >> shift; 666 pg_addr = sg_dma_address(sg); 667 for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { 668 for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) { 669 pbe->lo = cpu_to_le32(pg_addr); 670 pbe->hi = cpu_to_le32(upper_32_bits(pg_addr)); 671 672 pg_addr += BIT(pg_shift); 673 pbe_cnt++; 674 total_num_pbes++; 675 pbe++; 676 677 if (total_num_pbes == pbl_info->num_pbes) 678 return; 679 680 /* If the given pbl is full storing the pbes, 681 * move to next pbl. 682 */ 683 if (pbe_cnt == 684 (pbl_info->pbl_size / sizeof(u64))) { 685 pbl_tbl++; 686 pbe = (struct regpair *)pbl_tbl->va; 687 pbe_cnt = 0; 688 } 689 690 fw_pg_cnt++; 691 } 692 } 693 } 694 } 695 696 static int qedr_copy_cq_uresp(struct qedr_dev *dev, 697 struct qedr_cq *cq, struct ib_udata *udata) 698 { 699 struct qedr_create_cq_uresp uresp; 700 int rc; 701 702 memset(&uresp, 0, sizeof(uresp)); 703 704 uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); 705 uresp.icid = cq->icid; 706 707 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 708 if (rc) 709 DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid); 710 711 return rc; 712 } 713 714 static void consume_cqe(struct qedr_cq *cq) 715 { 716 if (cq->latest_cqe == cq->toggle_cqe) 717 cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK; 718 719 cq->latest_cqe = qed_chain_consume(&cq->pbl); 720 } 721 722 static inline int qedr_align_cq_entries(int entries) 723 { 724 u64 size, aligned_size; 725 726 /* We allocate an extra entry that we don't report to the FW. */ 727 size = (entries + 1) * QEDR_CQE_SIZE; 728 aligned_size = ALIGN(size, PAGE_SIZE); 729 730 return aligned_size / QEDR_CQE_SIZE; 731 } 732 733 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx, 734 struct qedr_dev *dev, 735 struct qedr_userq *q, 736 u64 buf_addr, size_t buf_len, 737 int access, int dmasync, 738 int alloc_and_init) 739 { 740 u32 fw_pages; 741 int rc; 742 743 q->buf_addr = buf_addr; 744 q->buf_len = buf_len; 745 q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync); 746 if (IS_ERR(q->umem)) { 747 DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n", 748 PTR_ERR(q->umem)); 749 return PTR_ERR(q->umem); 750 } 751 752 fw_pages = ib_umem_page_count(q->umem) << 753 (q->umem->page_shift - FW_PAGE_SHIFT); 754 755 rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0); 756 if (rc) 757 goto err0; 758 759 if (alloc_and_init) { 760 q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL); 761 if (IS_ERR(q->pbl_tbl)) { 762 rc = PTR_ERR(q->pbl_tbl); 763 goto err0; 764 } 765 qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info, 766 FW_PAGE_SHIFT); 767 } else { 768 q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL); 769 if (!q->pbl_tbl) { 770 rc = -ENOMEM; 771 goto err0; 772 } 773 } 774 775 return 0; 776 777 err0: 778 ib_umem_release(q->umem); 779 q->umem = NULL; 780 781 return rc; 782 } 783 784 static inline void qedr_init_cq_params(struct qedr_cq *cq, 785 struct qedr_ucontext *ctx, 786 struct qedr_dev *dev, int vector, 787 int chain_entries, int page_cnt, 788 u64 pbl_ptr, 789 struct qed_rdma_create_cq_in_params 790 *params) 791 { 792 memset(params, 0, sizeof(*params)); 793 params->cq_handle_hi = upper_32_bits((uintptr_t)cq); 794 params->cq_handle_lo = lower_32_bits((uintptr_t)cq); 795 params->cnq_id = vector; 796 params->cq_size = chain_entries - 1; 797 params->dpi = (ctx) ? ctx->dpi : dev->dpi; 798 params->pbl_num_pages = page_cnt; 799 params->pbl_ptr = pbl_ptr; 800 params->pbl_two_level = 0; 801 } 802 803 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags) 804 { 805 cq->db.data.agg_flags = flags; 806 cq->db.data.value = cpu_to_le32(cons); 807 writeq(cq->db.raw, cq->db_addr); 808 809 /* Make sure write would stick */ 810 mmiowb(); 811 } 812 813 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 814 { 815 struct qedr_cq *cq = get_qedr_cq(ibcq); 816 unsigned long sflags; 817 struct qedr_dev *dev; 818 819 dev = get_qedr_dev(ibcq->device); 820 821 if (cq->destroyed) { 822 DP_ERR(dev, 823 "warning: arm was invoked after destroy for cq %p (icid=%d)\n", 824 cq, cq->icid); 825 return -EINVAL; 826 } 827 828 829 if (cq->cq_type == QEDR_CQ_TYPE_GSI) 830 return 0; 831 832 spin_lock_irqsave(&cq->cq_lock, sflags); 833 834 cq->arm_flags = 0; 835 836 if (flags & IB_CQ_SOLICITED) 837 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD; 838 839 if (flags & IB_CQ_NEXT_COMP) 840 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD; 841 842 doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags); 843 844 spin_unlock_irqrestore(&cq->cq_lock, sflags); 845 846 return 0; 847 } 848 849 struct ib_cq *qedr_create_cq(struct ib_device *ibdev, 850 const struct ib_cq_init_attr *attr, 851 struct ib_ucontext *ib_ctx, struct ib_udata *udata) 852 { 853 struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx); 854 struct qed_rdma_destroy_cq_out_params destroy_oparams; 855 struct qed_rdma_destroy_cq_in_params destroy_iparams; 856 struct qedr_dev *dev = get_qedr_dev(ibdev); 857 struct qed_rdma_create_cq_in_params params; 858 struct qedr_create_cq_ureq ureq; 859 int vector = attr->comp_vector; 860 int entries = attr->cqe; 861 struct qedr_cq *cq; 862 int chain_entries; 863 int page_cnt; 864 u64 pbl_ptr; 865 u16 icid; 866 int rc; 867 868 DP_DEBUG(dev, QEDR_MSG_INIT, 869 "create_cq: called from %s. entries=%d, vector=%d\n", 870 udata ? "User Lib" : "Kernel", entries, vector); 871 872 if (entries > QEDR_MAX_CQES) { 873 DP_ERR(dev, 874 "create cq: the number of entries %d is too high. Must be equal or below %d.\n", 875 entries, QEDR_MAX_CQES); 876 return ERR_PTR(-EINVAL); 877 } 878 879 chain_entries = qedr_align_cq_entries(entries); 880 chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES); 881 882 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 883 if (!cq) 884 return ERR_PTR(-ENOMEM); 885 886 if (udata) { 887 memset(&ureq, 0, sizeof(ureq)); 888 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { 889 DP_ERR(dev, 890 "create cq: problem copying data from user space\n"); 891 goto err0; 892 } 893 894 if (!ureq.len) { 895 DP_ERR(dev, 896 "create cq: cannot create a cq with 0 entries\n"); 897 goto err0; 898 } 899 900 cq->cq_type = QEDR_CQ_TYPE_USER; 901 902 rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr, 903 ureq.len, IB_ACCESS_LOCAL_WRITE, 904 1, 1); 905 if (rc) 906 goto err0; 907 908 pbl_ptr = cq->q.pbl_tbl->pa; 909 page_cnt = cq->q.pbl_info.num_pbes; 910 911 cq->ibcq.cqe = chain_entries; 912 } else { 913 cq->cq_type = QEDR_CQ_TYPE_KERNEL; 914 915 rc = dev->ops->common->chain_alloc(dev->cdev, 916 QED_CHAIN_USE_TO_CONSUME, 917 QED_CHAIN_MODE_PBL, 918 QED_CHAIN_CNT_TYPE_U32, 919 chain_entries, 920 sizeof(union rdma_cqe), 921 &cq->pbl, NULL); 922 if (rc) 923 goto err1; 924 925 page_cnt = qed_chain_get_page_cnt(&cq->pbl); 926 pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl); 927 cq->ibcq.cqe = cq->pbl.capacity; 928 } 929 930 qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt, 931 pbl_ptr, ¶ms); 932 933 rc = dev->ops->rdma_create_cq(dev->rdma_ctx, ¶ms, &icid); 934 if (rc) 935 goto err2; 936 937 cq->icid = icid; 938 cq->sig = QEDR_CQ_MAGIC_NUMBER; 939 spin_lock_init(&cq->cq_lock); 940 941 if (ib_ctx) { 942 rc = qedr_copy_cq_uresp(dev, cq, udata); 943 if (rc) 944 goto err3; 945 } else { 946 /* Generate doorbell address. */ 947 cq->db_addr = dev->db_addr + 948 DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); 949 cq->db.data.icid = cq->icid; 950 cq->db.data.params = DB_AGG_CMD_SET << 951 RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT; 952 953 /* point to the very last element, passing it we will toggle */ 954 cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl); 955 cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK; 956 cq->latest_cqe = NULL; 957 consume_cqe(cq); 958 cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl); 959 } 960 961 DP_DEBUG(dev, QEDR_MSG_CQ, 962 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n", 963 cq->icid, cq, params.cq_size); 964 965 return &cq->ibcq; 966 967 err3: 968 destroy_iparams.icid = cq->icid; 969 dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams, 970 &destroy_oparams); 971 err2: 972 if (udata) 973 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); 974 else 975 dev->ops->common->chain_free(dev->cdev, &cq->pbl); 976 err1: 977 if (udata) 978 ib_umem_release(cq->q.umem); 979 err0: 980 kfree(cq); 981 return ERR_PTR(-EINVAL); 982 } 983 984 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) 985 { 986 struct qedr_dev *dev = get_qedr_dev(ibcq->device); 987 struct qedr_cq *cq = get_qedr_cq(ibcq); 988 989 DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq); 990 991 return 0; 992 } 993 994 #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10) 995 #define QEDR_DESTROY_CQ_ITER_DURATION (10) 996 997 int qedr_destroy_cq(struct ib_cq *ibcq) 998 { 999 struct qedr_dev *dev = get_qedr_dev(ibcq->device); 1000 struct qed_rdma_destroy_cq_out_params oparams; 1001 struct qed_rdma_destroy_cq_in_params iparams; 1002 struct qedr_cq *cq = get_qedr_cq(ibcq); 1003 int iter; 1004 int rc; 1005 1006 DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid); 1007 1008 cq->destroyed = 1; 1009 1010 /* GSIs CQs are handled by driver, so they don't exist in the FW */ 1011 if (cq->cq_type == QEDR_CQ_TYPE_GSI) 1012 goto done; 1013 1014 iparams.icid = cq->icid; 1015 rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); 1016 if (rc) 1017 return rc; 1018 1019 dev->ops->common->chain_free(dev->cdev, &cq->pbl); 1020 1021 if (ibcq->uobject && ibcq->uobject->context) { 1022 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); 1023 ib_umem_release(cq->q.umem); 1024 } 1025 1026 /* We don't want the IRQ handler to handle a non-existing CQ so we 1027 * wait until all CNQ interrupts, if any, are received. This will always 1028 * happen and will always happen very fast. If not, then a serious error 1029 * has occured. That is why we can use a long delay. 1030 * We spin for a short time so we don’t lose time on context switching 1031 * in case all the completions are handled in that span. Otherwise 1032 * we sleep for a while and check again. Since the CNQ may be 1033 * associated with (only) the current CPU we use msleep to allow the 1034 * current CPU to be freed. 1035 * The CNQ notification is increased in qedr_irq_handler(). 1036 */ 1037 iter = QEDR_DESTROY_CQ_MAX_ITERATIONS; 1038 while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) { 1039 udelay(QEDR_DESTROY_CQ_ITER_DURATION); 1040 iter--; 1041 } 1042 1043 iter = QEDR_DESTROY_CQ_MAX_ITERATIONS; 1044 while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) { 1045 msleep(QEDR_DESTROY_CQ_ITER_DURATION); 1046 iter--; 1047 } 1048 1049 if (oparams.num_cq_notif != cq->cnq_notif) 1050 goto err; 1051 1052 /* Note that we don't need to have explicit code to wait for the 1053 * completion of the event handler because it is invoked from the EQ. 1054 * Since the destroy CQ ramrod has also been received on the EQ we can 1055 * be certain that there's no event handler in process. 1056 */ 1057 done: 1058 cq->sig = ~cq->sig; 1059 1060 kfree(cq); 1061 1062 return 0; 1063 1064 err: 1065 DP_ERR(dev, 1066 "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n", 1067 cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif); 1068 1069 return -EINVAL; 1070 } 1071 1072 static inline int get_gid_info_from_table(struct ib_qp *ibqp, 1073 struct ib_qp_attr *attr, 1074 int attr_mask, 1075 struct qed_rdma_modify_qp_in_params 1076 *qp_params) 1077 { 1078 enum rdma_network_type nw_type; 1079 struct ib_gid_attr gid_attr; 1080 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); 1081 union ib_gid gid; 1082 u32 ipv4_addr; 1083 int rc = 0; 1084 int i; 1085 1086 rc = ib_get_cached_gid(ibqp->device, 1087 rdma_ah_get_port_num(&attr->ah_attr), 1088 grh->sgid_index, &gid, &gid_attr); 1089 if (rc) 1090 return rc; 1091 1092 qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev); 1093 1094 dev_put(gid_attr.ndev); 1095 nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid); 1096 switch (nw_type) { 1097 case RDMA_NETWORK_IPV6: 1098 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], 1099 sizeof(qp_params->sgid)); 1100 memcpy(&qp_params->dgid.bytes[0], 1101 &grh->dgid, 1102 sizeof(qp_params->dgid)); 1103 qp_params->roce_mode = ROCE_V2_IPV6; 1104 SET_FIELD(qp_params->modify_flags, 1105 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); 1106 break; 1107 case RDMA_NETWORK_IB: 1108 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], 1109 sizeof(qp_params->sgid)); 1110 memcpy(&qp_params->dgid.bytes[0], 1111 &grh->dgid, 1112 sizeof(qp_params->dgid)); 1113 qp_params->roce_mode = ROCE_V1; 1114 break; 1115 case RDMA_NETWORK_IPV4: 1116 memset(&qp_params->sgid, 0, sizeof(qp_params->sgid)); 1117 memset(&qp_params->dgid, 0, sizeof(qp_params->dgid)); 1118 ipv4_addr = qedr_get_ipv4_from_gid(gid.raw); 1119 qp_params->sgid.ipv4_addr = ipv4_addr; 1120 ipv4_addr = 1121 qedr_get_ipv4_from_gid(grh->dgid.raw); 1122 qp_params->dgid.ipv4_addr = ipv4_addr; 1123 SET_FIELD(qp_params->modify_flags, 1124 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); 1125 qp_params->roce_mode = ROCE_V2_IPV4; 1126 break; 1127 } 1128 1129 for (i = 0; i < 4; i++) { 1130 qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]); 1131 qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]); 1132 } 1133 1134 if (qp_params->vlan_id >= VLAN_CFI_MASK) 1135 qp_params->vlan_id = 0; 1136 1137 return 0; 1138 } 1139 1140 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, 1141 struct ib_qp_init_attr *attrs) 1142 { 1143 struct qedr_device_attr *qattr = &dev->attr; 1144 1145 /* QP0... attrs->qp_type == IB_QPT_GSI */ 1146 if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) { 1147 DP_DEBUG(dev, QEDR_MSG_QP, 1148 "create qp: unsupported qp type=0x%x requested\n", 1149 attrs->qp_type); 1150 return -EINVAL; 1151 } 1152 1153 if (attrs->cap.max_send_wr > qattr->max_sqe) { 1154 DP_ERR(dev, 1155 "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n", 1156 attrs->cap.max_send_wr, qattr->max_sqe); 1157 return -EINVAL; 1158 } 1159 1160 if (attrs->cap.max_inline_data > qattr->max_inline) { 1161 DP_ERR(dev, 1162 "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n", 1163 attrs->cap.max_inline_data, qattr->max_inline); 1164 return -EINVAL; 1165 } 1166 1167 if (attrs->cap.max_send_sge > qattr->max_sge) { 1168 DP_ERR(dev, 1169 "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n", 1170 attrs->cap.max_send_sge, qattr->max_sge); 1171 return -EINVAL; 1172 } 1173 1174 if (attrs->cap.max_recv_sge > qattr->max_sge) { 1175 DP_ERR(dev, 1176 "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n", 1177 attrs->cap.max_recv_sge, qattr->max_sge); 1178 return -EINVAL; 1179 } 1180 1181 /* Unprivileged user space cannot create special QP */ 1182 if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { 1183 DP_ERR(dev, 1184 "create qp: userspace can't create special QPs of type=0x%x\n", 1185 attrs->qp_type); 1186 return -EINVAL; 1187 } 1188 1189 return 0; 1190 } 1191 1192 static void qedr_copy_rq_uresp(struct qedr_dev *dev, 1193 struct qedr_create_qp_uresp *uresp, 1194 struct qedr_qp *qp) 1195 { 1196 /* iWARP requires two doorbells per RQ. */ 1197 if (rdma_protocol_iwarp(&dev->ibdev, 1)) { 1198 uresp->rq_db_offset = 1199 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); 1200 uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); 1201 } else { 1202 uresp->rq_db_offset = 1203 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); 1204 } 1205 1206 uresp->rq_icid = qp->icid; 1207 } 1208 1209 static void qedr_copy_sq_uresp(struct qedr_dev *dev, 1210 struct qedr_create_qp_uresp *uresp, 1211 struct qedr_qp *qp) 1212 { 1213 uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); 1214 1215 /* iWARP uses the same cid for rq and sq */ 1216 if (rdma_protocol_iwarp(&dev->ibdev, 1)) 1217 uresp->sq_icid = qp->icid; 1218 else 1219 uresp->sq_icid = qp->icid + 1; 1220 } 1221 1222 static int qedr_copy_qp_uresp(struct qedr_dev *dev, 1223 struct qedr_qp *qp, struct ib_udata *udata) 1224 { 1225 struct qedr_create_qp_uresp uresp; 1226 int rc; 1227 1228 memset(&uresp, 0, sizeof(uresp)); 1229 qedr_copy_sq_uresp(dev, &uresp, qp); 1230 qedr_copy_rq_uresp(dev, &uresp, qp); 1231 1232 uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; 1233 uresp.qp_id = qp->qp_id; 1234 1235 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 1236 if (rc) 1237 DP_ERR(dev, 1238 "create qp: failed a copy to user space with qp icid=0x%x.\n", 1239 qp->icid); 1240 1241 return rc; 1242 } 1243 1244 static void qedr_set_common_qp_params(struct qedr_dev *dev, 1245 struct qedr_qp *qp, 1246 struct qedr_pd *pd, 1247 struct ib_qp_init_attr *attrs) 1248 { 1249 spin_lock_init(&qp->q_lock); 1250 atomic_set(&qp->refcnt, 1); 1251 qp->pd = pd; 1252 qp->qp_type = attrs->qp_type; 1253 qp->max_inline_data = attrs->cap.max_inline_data; 1254 qp->sq.max_sges = attrs->cap.max_send_sge; 1255 qp->state = QED_ROCE_QP_STATE_RESET; 1256 qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; 1257 qp->sq_cq = get_qedr_cq(attrs->send_cq); 1258 qp->rq_cq = get_qedr_cq(attrs->recv_cq); 1259 qp->dev = dev; 1260 qp->rq.max_sges = attrs->cap.max_recv_sge; 1261 1262 DP_DEBUG(dev, QEDR_MSG_QP, 1263 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n", 1264 qp->rq.max_sges, qp->rq_cq->icid); 1265 DP_DEBUG(dev, QEDR_MSG_QP, 1266 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n", 1267 pd->pd_id, qp->qp_type, qp->max_inline_data, 1268 qp->state, qp->signaled, (attrs->srq) ? 1 : 0); 1269 DP_DEBUG(dev, QEDR_MSG_QP, 1270 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n", 1271 qp->sq.max_sges, qp->sq_cq->icid); 1272 } 1273 1274 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) 1275 { 1276 qp->sq.db = dev->db_addr + 1277 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); 1278 qp->sq.db_data.data.icid = qp->icid + 1; 1279 qp->rq.db = dev->db_addr + 1280 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); 1281 qp->rq.db_data.data.icid = qp->icid; 1282 } 1283 1284 static inline void 1285 qedr_init_common_qp_in_params(struct qedr_dev *dev, 1286 struct qedr_pd *pd, 1287 struct qedr_qp *qp, 1288 struct ib_qp_init_attr *attrs, 1289 bool fmr_and_reserved_lkey, 1290 struct qed_rdma_create_qp_in_params *params) 1291 { 1292 /* QP handle to be written in an async event */ 1293 params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp); 1294 params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp); 1295 1296 params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR); 1297 params->fmr_and_reserved_lkey = fmr_and_reserved_lkey; 1298 params->pd = pd->pd_id; 1299 params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; 1300 params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid; 1301 params->stats_queue = 0; 1302 params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; 1303 params->srq_id = 0; 1304 params->use_srq = false; 1305 } 1306 1307 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) 1308 { 1309 DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. " 1310 "qp=%p. " 1311 "sq_addr=0x%llx, " 1312 "sq_len=%zd, " 1313 "rq_addr=0x%llx, " 1314 "rq_len=%zd" 1315 "\n", 1316 qp, 1317 qp->usq.buf_addr, 1318 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len); 1319 } 1320 1321 static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id) 1322 { 1323 int rc; 1324 1325 if (!rdma_protocol_iwarp(&dev->ibdev, 1)) 1326 return 0; 1327 1328 idr_preload(GFP_KERNEL); 1329 spin_lock_irq(&dev->idr_lock); 1330 1331 rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC); 1332 1333 spin_unlock_irq(&dev->idr_lock); 1334 idr_preload_end(); 1335 1336 return rc < 0 ? rc : 0; 1337 } 1338 1339 static void qedr_idr_remove(struct qedr_dev *dev, u32 id) 1340 { 1341 if (!rdma_protocol_iwarp(&dev->ibdev, 1)) 1342 return; 1343 1344 spin_lock_irq(&dev->idr_lock); 1345 idr_remove(&dev->qpidr, id); 1346 spin_unlock_irq(&dev->idr_lock); 1347 } 1348 1349 static inline void 1350 qedr_iwarp_populate_user_qp(struct qedr_dev *dev, 1351 struct qedr_qp *qp, 1352 struct qed_rdma_create_qp_out_params *out_params) 1353 { 1354 qp->usq.pbl_tbl->va = out_params->sq_pbl_virt; 1355 qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys; 1356 1357 qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl, 1358 &qp->usq.pbl_info, FW_PAGE_SHIFT); 1359 1360 qp->urq.pbl_tbl->va = out_params->rq_pbl_virt; 1361 qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys; 1362 1363 qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl, 1364 &qp->urq.pbl_info, FW_PAGE_SHIFT); 1365 } 1366 1367 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) 1368 { 1369 if (qp->usq.umem) 1370 ib_umem_release(qp->usq.umem); 1371 qp->usq.umem = NULL; 1372 1373 if (qp->urq.umem) 1374 ib_umem_release(qp->urq.umem); 1375 qp->urq.umem = NULL; 1376 } 1377 1378 static int qedr_create_user_qp(struct qedr_dev *dev, 1379 struct qedr_qp *qp, 1380 struct ib_pd *ibpd, 1381 struct ib_udata *udata, 1382 struct ib_qp_init_attr *attrs) 1383 { 1384 struct qed_rdma_create_qp_in_params in_params; 1385 struct qed_rdma_create_qp_out_params out_params; 1386 struct qedr_pd *pd = get_qedr_pd(ibpd); 1387 struct ib_ucontext *ib_ctx = NULL; 1388 struct qedr_create_qp_ureq ureq; 1389 int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); 1390 int rc = -EINVAL; 1391 1392 ib_ctx = ibpd->uobject->context; 1393 1394 memset(&ureq, 0, sizeof(ureq)); 1395 rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); 1396 if (rc) { 1397 DP_ERR(dev, "Problem copying data from user space\n"); 1398 return rc; 1399 } 1400 1401 /* SQ - read access only (0), dma sync not required (0) */ 1402 rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr, 1403 ureq.sq_len, 0, 0, alloc_and_init); 1404 if (rc) 1405 return rc; 1406 1407 /* RQ - read access only (0), dma sync not required (0) */ 1408 rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr, 1409 ureq.rq_len, 0, 0, alloc_and_init); 1410 if (rc) 1411 return rc; 1412 1413 memset(&in_params, 0, sizeof(in_params)); 1414 qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params); 1415 in_params.qp_handle_lo = ureq.qp_handle_lo; 1416 in_params.qp_handle_hi = ureq.qp_handle_hi; 1417 in_params.sq_num_pages = qp->usq.pbl_info.num_pbes; 1418 in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa; 1419 in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; 1420 in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; 1421 1422 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, 1423 &in_params, &out_params); 1424 1425 if (!qp->qed_qp) { 1426 rc = -ENOMEM; 1427 goto err1; 1428 } 1429 1430 if (rdma_protocol_iwarp(&dev->ibdev, 1)) 1431 qedr_iwarp_populate_user_qp(dev, qp, &out_params); 1432 1433 qp->qp_id = out_params.qp_id; 1434 qp->icid = out_params.icid; 1435 1436 rc = qedr_copy_qp_uresp(dev, qp, udata); 1437 if (rc) 1438 goto err; 1439 1440 qedr_qp_user_print(dev, qp); 1441 1442 return 0; 1443 err: 1444 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); 1445 if (rc) 1446 DP_ERR(dev, "create qp: fatal fault. rc=%d", rc); 1447 1448 err1: 1449 qedr_cleanup_user(dev, qp); 1450 return rc; 1451 } 1452 1453 static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) 1454 { 1455 qp->sq.db = dev->db_addr + 1456 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); 1457 qp->sq.db_data.data.icid = qp->icid; 1458 1459 qp->rq.db = dev->db_addr + 1460 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); 1461 qp->rq.db_data.data.icid = qp->icid; 1462 qp->rq.iwarp_db2 = dev->db_addr + 1463 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); 1464 qp->rq.iwarp_db2_data.data.icid = qp->icid; 1465 qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD; 1466 } 1467 1468 static int 1469 qedr_roce_create_kernel_qp(struct qedr_dev *dev, 1470 struct qedr_qp *qp, 1471 struct qed_rdma_create_qp_in_params *in_params, 1472 u32 n_sq_elems, u32 n_rq_elems) 1473 { 1474 struct qed_rdma_create_qp_out_params out_params; 1475 int rc; 1476 1477 rc = dev->ops->common->chain_alloc(dev->cdev, 1478 QED_CHAIN_USE_TO_PRODUCE, 1479 QED_CHAIN_MODE_PBL, 1480 QED_CHAIN_CNT_TYPE_U32, 1481 n_sq_elems, 1482 QEDR_SQE_ELEMENT_SIZE, 1483 &qp->sq.pbl, NULL); 1484 1485 if (rc) 1486 return rc; 1487 1488 in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl); 1489 in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl); 1490 1491 rc = dev->ops->common->chain_alloc(dev->cdev, 1492 QED_CHAIN_USE_TO_CONSUME_PRODUCE, 1493 QED_CHAIN_MODE_PBL, 1494 QED_CHAIN_CNT_TYPE_U32, 1495 n_rq_elems, 1496 QEDR_RQE_ELEMENT_SIZE, 1497 &qp->rq.pbl, NULL); 1498 if (rc) 1499 return rc; 1500 1501 in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl); 1502 in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl); 1503 1504 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, 1505 in_params, &out_params); 1506 1507 if (!qp->qed_qp) 1508 return -EINVAL; 1509 1510 qp->qp_id = out_params.qp_id; 1511 qp->icid = out_params.icid; 1512 1513 qedr_set_roce_db_info(dev, qp); 1514 return rc; 1515 } 1516 1517 static int 1518 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev, 1519 struct qedr_qp *qp, 1520 struct qed_rdma_create_qp_in_params *in_params, 1521 u32 n_sq_elems, u32 n_rq_elems) 1522 { 1523 struct qed_rdma_create_qp_out_params out_params; 1524 struct qed_chain_ext_pbl ext_pbl; 1525 int rc; 1526 1527 in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems, 1528 QEDR_SQE_ELEMENT_SIZE, 1529 QED_CHAIN_MODE_PBL); 1530 in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems, 1531 QEDR_RQE_ELEMENT_SIZE, 1532 QED_CHAIN_MODE_PBL); 1533 1534 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, 1535 in_params, &out_params); 1536 1537 if (!qp->qed_qp) 1538 return -EINVAL; 1539 1540 /* Now we allocate the chain */ 1541 ext_pbl.p_pbl_virt = out_params.sq_pbl_virt; 1542 ext_pbl.p_pbl_phys = out_params.sq_pbl_phys; 1543 1544 rc = dev->ops->common->chain_alloc(dev->cdev, 1545 QED_CHAIN_USE_TO_PRODUCE, 1546 QED_CHAIN_MODE_PBL, 1547 QED_CHAIN_CNT_TYPE_U32, 1548 n_sq_elems, 1549 QEDR_SQE_ELEMENT_SIZE, 1550 &qp->sq.pbl, &ext_pbl); 1551 1552 if (rc) 1553 goto err; 1554 1555 ext_pbl.p_pbl_virt = out_params.rq_pbl_virt; 1556 ext_pbl.p_pbl_phys = out_params.rq_pbl_phys; 1557 1558 rc = dev->ops->common->chain_alloc(dev->cdev, 1559 QED_CHAIN_USE_TO_CONSUME_PRODUCE, 1560 QED_CHAIN_MODE_PBL, 1561 QED_CHAIN_CNT_TYPE_U32, 1562 n_rq_elems, 1563 QEDR_RQE_ELEMENT_SIZE, 1564 &qp->rq.pbl, &ext_pbl); 1565 1566 if (rc) 1567 goto err; 1568 1569 qp->qp_id = out_params.qp_id; 1570 qp->icid = out_params.icid; 1571 1572 qedr_set_iwarp_db_info(dev, qp); 1573 return rc; 1574 1575 err: 1576 dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); 1577 1578 return rc; 1579 } 1580 1581 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp) 1582 { 1583 dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl); 1584 kfree(qp->wqe_wr_id); 1585 1586 dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl); 1587 kfree(qp->rqe_wr_id); 1588 } 1589 1590 static int qedr_create_kernel_qp(struct qedr_dev *dev, 1591 struct qedr_qp *qp, 1592 struct ib_pd *ibpd, 1593 struct ib_qp_init_attr *attrs) 1594 { 1595 struct qed_rdma_create_qp_in_params in_params; 1596 struct qedr_pd *pd = get_qedr_pd(ibpd); 1597 int rc = -EINVAL; 1598 u32 n_rq_elems; 1599 u32 n_sq_elems; 1600 u32 n_sq_entries; 1601 1602 memset(&in_params, 0, sizeof(in_params)); 1603 1604 /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in 1605 * the ring. The ring should allow at least a single WR, even if the 1606 * user requested none, due to allocation issues. 1607 * We should add an extra WR since the prod and cons indices of 1608 * wqe_wr_id are managed in such a way that the WQ is considered full 1609 * when (prod+1)%max_wr==cons. We currently don't do that because we 1610 * double the number of entries due an iSER issue that pushes far more 1611 * WRs than indicated. If we decline its ib_post_send() then we get 1612 * error prints in the dmesg we'd like to avoid. 1613 */ 1614 qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier, 1615 dev->attr.max_sqe); 1616 1617 qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id), 1618 GFP_KERNEL); 1619 if (!qp->wqe_wr_id) { 1620 DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n"); 1621 return -ENOMEM; 1622 } 1623 1624 /* QP handle to be written in CQE */ 1625 in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp); 1626 in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp); 1627 1628 /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in 1629 * the ring. There ring should allow at least a single WR, even if the 1630 * user requested none, due to allocation issues. 1631 */ 1632 qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1); 1633 1634 /* Allocate driver internal RQ array */ 1635 qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id), 1636 GFP_KERNEL); 1637 if (!qp->rqe_wr_id) { 1638 DP_ERR(dev, 1639 "create qp: failed RQ shadow memory allocation\n"); 1640 kfree(qp->wqe_wr_id); 1641 return -ENOMEM; 1642 } 1643 1644 qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params); 1645 1646 n_sq_entries = attrs->cap.max_send_wr; 1647 n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe); 1648 n_sq_entries = max_t(u32, n_sq_entries, 1); 1649 n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE; 1650 1651 n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE; 1652 1653 if (rdma_protocol_iwarp(&dev->ibdev, 1)) 1654 rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params, 1655 n_sq_elems, n_rq_elems); 1656 else 1657 rc = qedr_roce_create_kernel_qp(dev, qp, &in_params, 1658 n_sq_elems, n_rq_elems); 1659 if (rc) 1660 qedr_cleanup_kernel(dev, qp); 1661 1662 return rc; 1663 } 1664 1665 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, 1666 struct ib_qp_init_attr *attrs, 1667 struct ib_udata *udata) 1668 { 1669 struct qedr_dev *dev = get_qedr_dev(ibpd->device); 1670 struct qedr_pd *pd = get_qedr_pd(ibpd); 1671 struct qedr_qp *qp; 1672 struct ib_qp *ibqp; 1673 int rc = 0; 1674 1675 DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n", 1676 udata ? "user library" : "kernel", pd); 1677 1678 rc = qedr_check_qp_attrs(ibpd, dev, attrs); 1679 if (rc) 1680 return ERR_PTR(rc); 1681 1682 if (attrs->srq) 1683 return ERR_PTR(-EINVAL); 1684 1685 DP_DEBUG(dev, QEDR_MSG_QP, 1686 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n", 1687 udata ? "user library" : "kernel", attrs->event_handler, pd, 1688 get_qedr_cq(attrs->send_cq), 1689 get_qedr_cq(attrs->send_cq)->icid, 1690 get_qedr_cq(attrs->recv_cq), 1691 get_qedr_cq(attrs->recv_cq)->icid); 1692 1693 qp = kzalloc(sizeof(*qp), GFP_KERNEL); 1694 if (!qp) { 1695 DP_ERR(dev, "create qp: failed allocating memory\n"); 1696 return ERR_PTR(-ENOMEM); 1697 } 1698 1699 qedr_set_common_qp_params(dev, qp, pd, attrs); 1700 1701 if (attrs->qp_type == IB_QPT_GSI) { 1702 ibqp = qedr_create_gsi_qp(dev, attrs, qp); 1703 if (IS_ERR(ibqp)) 1704 kfree(qp); 1705 return ibqp; 1706 } 1707 1708 if (udata) 1709 rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs); 1710 else 1711 rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs); 1712 1713 if (rc) 1714 goto err; 1715 1716 qp->ibqp.qp_num = qp->qp_id; 1717 1718 rc = qedr_idr_add(dev, qp, qp->qp_id); 1719 if (rc) 1720 goto err; 1721 1722 return &qp->ibqp; 1723 1724 err: 1725 kfree(qp); 1726 1727 return ERR_PTR(-EFAULT); 1728 } 1729 1730 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state) 1731 { 1732 switch (qp_state) { 1733 case QED_ROCE_QP_STATE_RESET: 1734 return IB_QPS_RESET; 1735 case QED_ROCE_QP_STATE_INIT: 1736 return IB_QPS_INIT; 1737 case QED_ROCE_QP_STATE_RTR: 1738 return IB_QPS_RTR; 1739 case QED_ROCE_QP_STATE_RTS: 1740 return IB_QPS_RTS; 1741 case QED_ROCE_QP_STATE_SQD: 1742 return IB_QPS_SQD; 1743 case QED_ROCE_QP_STATE_ERR: 1744 return IB_QPS_ERR; 1745 case QED_ROCE_QP_STATE_SQE: 1746 return IB_QPS_SQE; 1747 } 1748 return IB_QPS_ERR; 1749 } 1750 1751 static enum qed_roce_qp_state qedr_get_state_from_ibqp( 1752 enum ib_qp_state qp_state) 1753 { 1754 switch (qp_state) { 1755 case IB_QPS_RESET: 1756 return QED_ROCE_QP_STATE_RESET; 1757 case IB_QPS_INIT: 1758 return QED_ROCE_QP_STATE_INIT; 1759 case IB_QPS_RTR: 1760 return QED_ROCE_QP_STATE_RTR; 1761 case IB_QPS_RTS: 1762 return QED_ROCE_QP_STATE_RTS; 1763 case IB_QPS_SQD: 1764 return QED_ROCE_QP_STATE_SQD; 1765 case IB_QPS_ERR: 1766 return QED_ROCE_QP_STATE_ERR; 1767 default: 1768 return QED_ROCE_QP_STATE_ERR; 1769 } 1770 } 1771 1772 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph) 1773 { 1774 qed_chain_reset(&qph->pbl); 1775 qph->prod = 0; 1776 qph->cons = 0; 1777 qph->wqe_cons = 0; 1778 qph->db_data.data.value = cpu_to_le16(0); 1779 } 1780 1781 static int qedr_update_qp_state(struct qedr_dev *dev, 1782 struct qedr_qp *qp, 1783 enum qed_roce_qp_state cur_state, 1784 enum qed_roce_qp_state new_state) 1785 { 1786 int status = 0; 1787 1788 if (new_state == cur_state) 1789 return 0; 1790 1791 switch (cur_state) { 1792 case QED_ROCE_QP_STATE_RESET: 1793 switch (new_state) { 1794 case QED_ROCE_QP_STATE_INIT: 1795 qp->prev_wqe_size = 0; 1796 qedr_reset_qp_hwq_info(&qp->sq); 1797 qedr_reset_qp_hwq_info(&qp->rq); 1798 break; 1799 default: 1800 status = -EINVAL; 1801 break; 1802 }; 1803 break; 1804 case QED_ROCE_QP_STATE_INIT: 1805 switch (new_state) { 1806 case QED_ROCE_QP_STATE_RTR: 1807 /* Update doorbell (in case post_recv was 1808 * done before move to RTR) 1809 */ 1810 1811 if (rdma_protocol_roce(&dev->ibdev, 1)) { 1812 writel(qp->rq.db_data.raw, qp->rq.db); 1813 /* Make sure write takes effect */ 1814 mmiowb(); 1815 } 1816 break; 1817 case QED_ROCE_QP_STATE_ERR: 1818 break; 1819 default: 1820 /* Invalid state change. */ 1821 status = -EINVAL; 1822 break; 1823 }; 1824 break; 1825 case QED_ROCE_QP_STATE_RTR: 1826 /* RTR->XXX */ 1827 switch (new_state) { 1828 case QED_ROCE_QP_STATE_RTS: 1829 break; 1830 case QED_ROCE_QP_STATE_ERR: 1831 break; 1832 default: 1833 /* Invalid state change. */ 1834 status = -EINVAL; 1835 break; 1836 }; 1837 break; 1838 case QED_ROCE_QP_STATE_RTS: 1839 /* RTS->XXX */ 1840 switch (new_state) { 1841 case QED_ROCE_QP_STATE_SQD: 1842 break; 1843 case QED_ROCE_QP_STATE_ERR: 1844 break; 1845 default: 1846 /* Invalid state change. */ 1847 status = -EINVAL; 1848 break; 1849 }; 1850 break; 1851 case QED_ROCE_QP_STATE_SQD: 1852 /* SQD->XXX */ 1853 switch (new_state) { 1854 case QED_ROCE_QP_STATE_RTS: 1855 case QED_ROCE_QP_STATE_ERR: 1856 break; 1857 default: 1858 /* Invalid state change. */ 1859 status = -EINVAL; 1860 break; 1861 }; 1862 break; 1863 case QED_ROCE_QP_STATE_ERR: 1864 /* ERR->XXX */ 1865 switch (new_state) { 1866 case QED_ROCE_QP_STATE_RESET: 1867 if ((qp->rq.prod != qp->rq.cons) || 1868 (qp->sq.prod != qp->sq.cons)) { 1869 DP_NOTICE(dev, 1870 "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n", 1871 qp->rq.prod, qp->rq.cons, qp->sq.prod, 1872 qp->sq.cons); 1873 status = -EINVAL; 1874 } 1875 break; 1876 default: 1877 status = -EINVAL; 1878 break; 1879 }; 1880 break; 1881 default: 1882 status = -EINVAL; 1883 break; 1884 }; 1885 1886 return status; 1887 } 1888 1889 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 1890 int attr_mask, struct ib_udata *udata) 1891 { 1892 struct qedr_qp *qp = get_qedr_qp(ibqp); 1893 struct qed_rdma_modify_qp_in_params qp_params = { 0 }; 1894 struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev); 1895 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); 1896 enum ib_qp_state old_qp_state, new_qp_state; 1897 enum qed_roce_qp_state cur_state; 1898 int rc = 0; 1899 1900 DP_DEBUG(dev, QEDR_MSG_QP, 1901 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask, 1902 attr->qp_state); 1903 1904 old_qp_state = qedr_get_ibqp_state(qp->state); 1905 if (attr_mask & IB_QP_STATE) 1906 new_qp_state = attr->qp_state; 1907 else 1908 new_qp_state = old_qp_state; 1909 1910 if (rdma_protocol_roce(&dev->ibdev, 1)) { 1911 if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state, 1912 ibqp->qp_type, attr_mask, 1913 IB_LINK_LAYER_ETHERNET)) { 1914 DP_ERR(dev, 1915 "modify qp: invalid attribute mask=0x%x specified for\n" 1916 "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n", 1917 attr_mask, qp->qp_id, ibqp->qp_type, 1918 old_qp_state, new_qp_state); 1919 rc = -EINVAL; 1920 goto err; 1921 } 1922 } 1923 1924 /* Translate the masks... */ 1925 if (attr_mask & IB_QP_STATE) { 1926 SET_FIELD(qp_params.modify_flags, 1927 QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1); 1928 qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state); 1929 } 1930 1931 if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) 1932 qp_params.sqd_async = true; 1933 1934 if (attr_mask & IB_QP_PKEY_INDEX) { 1935 SET_FIELD(qp_params.modify_flags, 1936 QED_ROCE_MODIFY_QP_VALID_PKEY, 1); 1937 if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) { 1938 rc = -EINVAL; 1939 goto err; 1940 } 1941 1942 qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT; 1943 } 1944 1945 if (attr_mask & IB_QP_QKEY) 1946 qp->qkey = attr->qkey; 1947 1948 if (attr_mask & IB_QP_ACCESS_FLAGS) { 1949 SET_FIELD(qp_params.modify_flags, 1950 QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1); 1951 qp_params.incoming_rdma_read_en = attr->qp_access_flags & 1952 IB_ACCESS_REMOTE_READ; 1953 qp_params.incoming_rdma_write_en = attr->qp_access_flags & 1954 IB_ACCESS_REMOTE_WRITE; 1955 qp_params.incoming_atomic_en = attr->qp_access_flags & 1956 IB_ACCESS_REMOTE_ATOMIC; 1957 } 1958 1959 if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) { 1960 if (rdma_protocol_iwarp(&dev->ibdev, 1)) 1961 return -EINVAL; 1962 1963 if (attr_mask & IB_QP_PATH_MTU) { 1964 if (attr->path_mtu < IB_MTU_256 || 1965 attr->path_mtu > IB_MTU_4096) { 1966 pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n"); 1967 rc = -EINVAL; 1968 goto err; 1969 } 1970 qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu), 1971 ib_mtu_enum_to_int(iboe_get_mtu 1972 (dev->ndev->mtu))); 1973 } 1974 1975 if (!qp->mtu) { 1976 qp->mtu = 1977 ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu)); 1978 pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu); 1979 } 1980 1981 SET_FIELD(qp_params.modify_flags, 1982 QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1); 1983 1984 qp_params.traffic_class_tos = grh->traffic_class; 1985 qp_params.flow_label = grh->flow_label; 1986 qp_params.hop_limit_ttl = grh->hop_limit; 1987 1988 qp->sgid_idx = grh->sgid_index; 1989 1990 rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params); 1991 if (rc) { 1992 DP_ERR(dev, 1993 "modify qp: problems with GID index %d (rc=%d)\n", 1994 grh->sgid_index, rc); 1995 return rc; 1996 } 1997 1998 rc = qedr_get_dmac(dev, &attr->ah_attr, 1999 qp_params.remote_mac_addr); 2000 if (rc) 2001 return rc; 2002 2003 qp_params.use_local_mac = true; 2004 ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr); 2005 2006 DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n", 2007 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1], 2008 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]); 2009 DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n", 2010 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1], 2011 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]); 2012 DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n", 2013 qp_params.remote_mac_addr); 2014 2015 qp_params.mtu = qp->mtu; 2016 qp_params.lb_indication = false; 2017 } 2018 2019 if (!qp_params.mtu) { 2020 /* Stay with current MTU */ 2021 if (qp->mtu) 2022 qp_params.mtu = qp->mtu; 2023 else 2024 qp_params.mtu = 2025 ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu)); 2026 } 2027 2028 if (attr_mask & IB_QP_TIMEOUT) { 2029 SET_FIELD(qp_params.modify_flags, 2030 QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1); 2031 2032 /* The received timeout value is an exponent used like this: 2033 * "12.7.34 LOCAL ACK TIMEOUT 2034 * Value representing the transport (ACK) timeout for use by 2035 * the remote, expressed as: 4.096 * 2^timeout [usec]" 2036 * The FW expects timeout in msec so we need to divide the usec 2037 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2, 2038 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8). 2039 * The value of zero means infinite so we use a 'max_t' to make 2040 * sure that sub 1 msec values will be configured as 1 msec. 2041 */ 2042 if (attr->timeout) 2043 qp_params.ack_timeout = 2044 1 << max_t(int, attr->timeout - 8, 0); 2045 else 2046 qp_params.ack_timeout = 0; 2047 } 2048 2049 if (attr_mask & IB_QP_RETRY_CNT) { 2050 SET_FIELD(qp_params.modify_flags, 2051 QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1); 2052 qp_params.retry_cnt = attr->retry_cnt; 2053 } 2054 2055 if (attr_mask & IB_QP_RNR_RETRY) { 2056 SET_FIELD(qp_params.modify_flags, 2057 QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1); 2058 qp_params.rnr_retry_cnt = attr->rnr_retry; 2059 } 2060 2061 if (attr_mask & IB_QP_RQ_PSN) { 2062 SET_FIELD(qp_params.modify_flags, 2063 QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1); 2064 qp_params.rq_psn = attr->rq_psn; 2065 qp->rq_psn = attr->rq_psn; 2066 } 2067 2068 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { 2069 if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) { 2070 rc = -EINVAL; 2071 DP_ERR(dev, 2072 "unsupported max_rd_atomic=%d, supported=%d\n", 2073 attr->max_rd_atomic, 2074 dev->attr.max_qp_req_rd_atomic_resc); 2075 goto err; 2076 } 2077 2078 SET_FIELD(qp_params.modify_flags, 2079 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1); 2080 qp_params.max_rd_atomic_req = attr->max_rd_atomic; 2081 } 2082 2083 if (attr_mask & IB_QP_MIN_RNR_TIMER) { 2084 SET_FIELD(qp_params.modify_flags, 2085 QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1); 2086 qp_params.min_rnr_nak_timer = attr->min_rnr_timer; 2087 } 2088 2089 if (attr_mask & IB_QP_SQ_PSN) { 2090 SET_FIELD(qp_params.modify_flags, 2091 QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1); 2092 qp_params.sq_psn = attr->sq_psn; 2093 qp->sq_psn = attr->sq_psn; 2094 } 2095 2096 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { 2097 if (attr->max_dest_rd_atomic > 2098 dev->attr.max_qp_resp_rd_atomic_resc) { 2099 DP_ERR(dev, 2100 "unsupported max_dest_rd_atomic=%d, supported=%d\n", 2101 attr->max_dest_rd_atomic, 2102 dev->attr.max_qp_resp_rd_atomic_resc); 2103 2104 rc = -EINVAL; 2105 goto err; 2106 } 2107 2108 SET_FIELD(qp_params.modify_flags, 2109 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1); 2110 qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic; 2111 } 2112 2113 if (attr_mask & IB_QP_DEST_QPN) { 2114 SET_FIELD(qp_params.modify_flags, 2115 QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1); 2116 2117 qp_params.dest_qp = attr->dest_qp_num; 2118 qp->dest_qp_num = attr->dest_qp_num; 2119 } 2120 2121 cur_state = qp->state; 2122 2123 /* Update the QP state before the actual ramrod to prevent a race with 2124 * fast path. Modifying the QP state to error will cause the device to 2125 * flush the CQEs and while polling the flushed CQEs will considered as 2126 * a potential issue if the QP isn't in error state. 2127 */ 2128 if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI && 2129 !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR) 2130 qp->state = QED_ROCE_QP_STATE_ERR; 2131 2132 if (qp->qp_type != IB_QPT_GSI) 2133 rc = dev->ops->rdma_modify_qp(dev->rdma_ctx, 2134 qp->qed_qp, &qp_params); 2135 2136 if (attr_mask & IB_QP_STATE) { 2137 if ((qp->qp_type != IB_QPT_GSI) && (!udata)) 2138 rc = qedr_update_qp_state(dev, qp, cur_state, 2139 qp_params.new_state); 2140 qp->state = qp_params.new_state; 2141 } 2142 2143 err: 2144 return rc; 2145 } 2146 2147 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params) 2148 { 2149 int ib_qp_acc_flags = 0; 2150 2151 if (params->incoming_rdma_write_en) 2152 ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE; 2153 if (params->incoming_rdma_read_en) 2154 ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ; 2155 if (params->incoming_atomic_en) 2156 ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC; 2157 ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE; 2158 return ib_qp_acc_flags; 2159 } 2160 2161 int qedr_query_qp(struct ib_qp *ibqp, 2162 struct ib_qp_attr *qp_attr, 2163 int attr_mask, struct ib_qp_init_attr *qp_init_attr) 2164 { 2165 struct qed_rdma_query_qp_out_params params; 2166 struct qedr_qp *qp = get_qedr_qp(ibqp); 2167 struct qedr_dev *dev = qp->dev; 2168 int rc = 0; 2169 2170 memset(¶ms, 0, sizeof(params)); 2171 2172 rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, ¶ms); 2173 if (rc) 2174 goto err; 2175 2176 memset(qp_attr, 0, sizeof(*qp_attr)); 2177 memset(qp_init_attr, 0, sizeof(*qp_init_attr)); 2178 2179 qp_attr->qp_state = qedr_get_ibqp_state(params.state); 2180 qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state); 2181 qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu); 2182 qp_attr->path_mig_state = IB_MIG_MIGRATED; 2183 qp_attr->rq_psn = params.rq_psn; 2184 qp_attr->sq_psn = params.sq_psn; 2185 qp_attr->dest_qp_num = params.dest_qp; 2186 2187 qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(¶ms); 2188 2189 qp_attr->cap.max_send_wr = qp->sq.max_wr; 2190 qp_attr->cap.max_recv_wr = qp->rq.max_wr; 2191 qp_attr->cap.max_send_sge = qp->sq.max_sges; 2192 qp_attr->cap.max_recv_sge = qp->rq.max_sges; 2193 qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE; 2194 qp_init_attr->cap = qp_attr->cap; 2195 2196 qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; 2197 rdma_ah_set_grh(&qp_attr->ah_attr, NULL, 2198 params.flow_label, qp->sgid_idx, 2199 params.hop_limit_ttl, params.traffic_class_tos); 2200 rdma_ah_set_dgid_raw(&qp_attr->ah_attr, ¶ms.dgid.bytes[0]); 2201 rdma_ah_set_port_num(&qp_attr->ah_attr, 1); 2202 rdma_ah_set_sl(&qp_attr->ah_attr, 0); 2203 qp_attr->timeout = params.timeout; 2204 qp_attr->rnr_retry = params.rnr_retry; 2205 qp_attr->retry_cnt = params.retry_cnt; 2206 qp_attr->min_rnr_timer = params.min_rnr_nak_timer; 2207 qp_attr->pkey_index = params.pkey_index; 2208 qp_attr->port_num = 1; 2209 rdma_ah_set_path_bits(&qp_attr->ah_attr, 0); 2210 rdma_ah_set_static_rate(&qp_attr->ah_attr, 0); 2211 qp_attr->alt_pkey_index = 0; 2212 qp_attr->alt_port_num = 0; 2213 qp_attr->alt_timeout = 0; 2214 memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr)); 2215 2216 qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0; 2217 qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic; 2218 qp_attr->max_rd_atomic = params.max_rd_atomic; 2219 qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0; 2220 2221 DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n", 2222 qp_attr->cap.max_inline_data); 2223 2224 err: 2225 return rc; 2226 } 2227 2228 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp) 2229 { 2230 int rc = 0; 2231 2232 if (qp->qp_type != IB_QPT_GSI) { 2233 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); 2234 if (rc) 2235 return rc; 2236 } 2237 2238 if (qp->ibqp.uobject && qp->ibqp.uobject->context) 2239 qedr_cleanup_user(dev, qp); 2240 else 2241 qedr_cleanup_kernel(dev, qp); 2242 2243 return 0; 2244 } 2245 2246 int qedr_destroy_qp(struct ib_qp *ibqp) 2247 { 2248 struct qedr_qp *qp = get_qedr_qp(ibqp); 2249 struct qedr_dev *dev = qp->dev; 2250 struct ib_qp_attr attr; 2251 int attr_mask = 0; 2252 int rc = 0; 2253 2254 DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n", 2255 qp, qp->qp_type); 2256 2257 if (rdma_protocol_roce(&dev->ibdev, 1)) { 2258 if ((qp->state != QED_ROCE_QP_STATE_RESET) && 2259 (qp->state != QED_ROCE_QP_STATE_ERR) && 2260 (qp->state != QED_ROCE_QP_STATE_INIT)) { 2261 2262 attr.qp_state = IB_QPS_ERR; 2263 attr_mask |= IB_QP_STATE; 2264 2265 /* Change the QP state to ERROR */ 2266 qedr_modify_qp(ibqp, &attr, attr_mask, NULL); 2267 } 2268 } else { 2269 /* Wait for the connect/accept to complete */ 2270 if (qp->ep) { 2271 int wait_count = 1; 2272 2273 while (qp->ep->during_connect) { 2274 DP_DEBUG(dev, QEDR_MSG_QP, 2275 "Still in during connect/accept\n"); 2276 2277 msleep(100); 2278 if (wait_count++ > 200) { 2279 DP_NOTICE(dev, 2280 "during connect timeout\n"); 2281 break; 2282 } 2283 } 2284 } 2285 } 2286 2287 if (qp->qp_type == IB_QPT_GSI) 2288 qedr_destroy_gsi_qp(dev); 2289 2290 qedr_free_qp_resources(dev, qp); 2291 2292 if (atomic_dec_and_test(&qp->refcnt)) { 2293 qedr_idr_remove(dev, qp->qp_id); 2294 kfree(qp); 2295 } 2296 return rc; 2297 } 2298 2299 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, 2300 struct ib_udata *udata) 2301 { 2302 struct qedr_ah *ah; 2303 2304 ah = kzalloc(sizeof(*ah), GFP_ATOMIC); 2305 if (!ah) 2306 return ERR_PTR(-ENOMEM); 2307 2308 ah->attr = *attr; 2309 2310 return &ah->ibah; 2311 } 2312 2313 int qedr_destroy_ah(struct ib_ah *ibah) 2314 { 2315 struct qedr_ah *ah = get_qedr_ah(ibah); 2316 2317 kfree(ah); 2318 return 0; 2319 } 2320 2321 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info) 2322 { 2323 struct qedr_pbl *pbl, *tmp; 2324 2325 if (info->pbl_table) 2326 list_add_tail(&info->pbl_table->list_entry, 2327 &info->free_pbl_list); 2328 2329 if (!list_empty(&info->inuse_pbl_list)) 2330 list_splice(&info->inuse_pbl_list, &info->free_pbl_list); 2331 2332 list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) { 2333 list_del(&pbl->list_entry); 2334 qedr_free_pbl(dev, &info->pbl_info, pbl); 2335 } 2336 } 2337 2338 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info, 2339 size_t page_list_len, bool two_layered) 2340 { 2341 struct qedr_pbl *tmp; 2342 int rc; 2343 2344 INIT_LIST_HEAD(&info->free_pbl_list); 2345 INIT_LIST_HEAD(&info->inuse_pbl_list); 2346 2347 rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info, 2348 page_list_len, two_layered); 2349 if (rc) 2350 goto done; 2351 2352 info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL); 2353 if (IS_ERR(info->pbl_table)) { 2354 rc = PTR_ERR(info->pbl_table); 2355 goto done; 2356 } 2357 2358 DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n", 2359 &info->pbl_table->pa); 2360 2361 /* in usual case we use 2 PBLs, so we add one to free 2362 * list and allocating another one 2363 */ 2364 tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL); 2365 if (IS_ERR(tmp)) { 2366 DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n"); 2367 goto done; 2368 } 2369 2370 list_add_tail(&tmp->list_entry, &info->free_pbl_list); 2371 2372 DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa); 2373 2374 done: 2375 if (rc) 2376 free_mr_info(dev, info); 2377 2378 return rc; 2379 } 2380 2381 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, 2382 u64 usr_addr, int acc, struct ib_udata *udata) 2383 { 2384 struct qedr_dev *dev = get_qedr_dev(ibpd->device); 2385 struct qedr_mr *mr; 2386 struct qedr_pd *pd; 2387 int rc = -ENOMEM; 2388 2389 pd = get_qedr_pd(ibpd); 2390 DP_DEBUG(dev, QEDR_MSG_MR, 2391 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n", 2392 pd->pd_id, start, len, usr_addr, acc); 2393 2394 if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) 2395 return ERR_PTR(-EINVAL); 2396 2397 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 2398 if (!mr) 2399 return ERR_PTR(rc); 2400 2401 mr->type = QEDR_MR_USER; 2402 2403 mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0); 2404 if (IS_ERR(mr->umem)) { 2405 rc = -EFAULT; 2406 goto err0; 2407 } 2408 2409 rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1); 2410 if (rc) 2411 goto err1; 2412 2413 qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table, 2414 &mr->info.pbl_info, mr->umem->page_shift); 2415 2416 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); 2417 if (rc) { 2418 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc); 2419 goto err1; 2420 } 2421 2422 /* Index only, 18 bit long, lkey = itid << 8 | key */ 2423 mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR; 2424 mr->hw_mr.key = 0; 2425 mr->hw_mr.pd = pd->pd_id; 2426 mr->hw_mr.local_read = 1; 2427 mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; 2428 mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; 2429 mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; 2430 mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; 2431 mr->hw_mr.mw_bind = false; 2432 mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa; 2433 mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; 2434 mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); 2435 mr->hw_mr.page_size_log = mr->umem->page_shift; 2436 mr->hw_mr.fbo = ib_umem_offset(mr->umem); 2437 mr->hw_mr.length = len; 2438 mr->hw_mr.vaddr = usr_addr; 2439 mr->hw_mr.zbva = false; 2440 mr->hw_mr.phy_mr = false; 2441 mr->hw_mr.dma_mr = false; 2442 2443 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); 2444 if (rc) { 2445 DP_ERR(dev, "roce register tid returned an error %d\n", rc); 2446 goto err2; 2447 } 2448 2449 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; 2450 if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || 2451 mr->hw_mr.remote_atomic) 2452 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; 2453 2454 DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n", 2455 mr->ibmr.lkey); 2456 return &mr->ibmr; 2457 2458 err2: 2459 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); 2460 err1: 2461 qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); 2462 err0: 2463 kfree(mr); 2464 return ERR_PTR(rc); 2465 } 2466 2467 int qedr_dereg_mr(struct ib_mr *ib_mr) 2468 { 2469 struct qedr_mr *mr = get_qedr_mr(ib_mr); 2470 struct qedr_dev *dev = get_qedr_dev(ib_mr->device); 2471 int rc = 0; 2472 2473 rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid); 2474 if (rc) 2475 return rc; 2476 2477 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); 2478 2479 if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR)) 2480 qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); 2481 2482 /* it could be user registered memory. */ 2483 if (mr->umem) 2484 ib_umem_release(mr->umem); 2485 2486 kfree(mr); 2487 2488 return rc; 2489 } 2490 2491 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, 2492 int max_page_list_len) 2493 { 2494 struct qedr_pd *pd = get_qedr_pd(ibpd); 2495 struct qedr_dev *dev = get_qedr_dev(ibpd->device); 2496 struct qedr_mr *mr; 2497 int rc = -ENOMEM; 2498 2499 DP_DEBUG(dev, QEDR_MSG_MR, 2500 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id, 2501 max_page_list_len); 2502 2503 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 2504 if (!mr) 2505 return ERR_PTR(rc); 2506 2507 mr->dev = dev; 2508 mr->type = QEDR_MR_FRMR; 2509 2510 rc = init_mr_info(dev, &mr->info, max_page_list_len, 1); 2511 if (rc) 2512 goto err0; 2513 2514 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); 2515 if (rc) { 2516 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc); 2517 goto err0; 2518 } 2519 2520 /* Index only, 18 bit long, lkey = itid << 8 | key */ 2521 mr->hw_mr.tid_type = QED_RDMA_TID_FMR; 2522 mr->hw_mr.key = 0; 2523 mr->hw_mr.pd = pd->pd_id; 2524 mr->hw_mr.local_read = 1; 2525 mr->hw_mr.local_write = 0; 2526 mr->hw_mr.remote_read = 0; 2527 mr->hw_mr.remote_write = 0; 2528 mr->hw_mr.remote_atomic = 0; 2529 mr->hw_mr.mw_bind = false; 2530 mr->hw_mr.pbl_ptr = 0; 2531 mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; 2532 mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); 2533 mr->hw_mr.fbo = 0; 2534 mr->hw_mr.length = 0; 2535 mr->hw_mr.vaddr = 0; 2536 mr->hw_mr.zbva = false; 2537 mr->hw_mr.phy_mr = true; 2538 mr->hw_mr.dma_mr = false; 2539 2540 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); 2541 if (rc) { 2542 DP_ERR(dev, "roce register tid returned an error %d\n", rc); 2543 goto err1; 2544 } 2545 2546 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; 2547 mr->ibmr.rkey = mr->ibmr.lkey; 2548 2549 DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey); 2550 return mr; 2551 2552 err1: 2553 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); 2554 err0: 2555 kfree(mr); 2556 return ERR_PTR(rc); 2557 } 2558 2559 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, 2560 enum ib_mr_type mr_type, u32 max_num_sg) 2561 { 2562 struct qedr_mr *mr; 2563 2564 if (mr_type != IB_MR_TYPE_MEM_REG) 2565 return ERR_PTR(-EINVAL); 2566 2567 mr = __qedr_alloc_mr(ibpd, max_num_sg); 2568 2569 if (IS_ERR(mr)) 2570 return ERR_PTR(-EINVAL); 2571 2572 return &mr->ibmr; 2573 } 2574 2575 static int qedr_set_page(struct ib_mr *ibmr, u64 addr) 2576 { 2577 struct qedr_mr *mr = get_qedr_mr(ibmr); 2578 struct qedr_pbl *pbl_table; 2579 struct regpair *pbe; 2580 u32 pbes_in_page; 2581 2582 if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) { 2583 DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages); 2584 return -ENOMEM; 2585 } 2586 2587 DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n", 2588 mr->npages, addr); 2589 2590 pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64); 2591 pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page); 2592 pbe = (struct regpair *)pbl_table->va; 2593 pbe += mr->npages % pbes_in_page; 2594 pbe->lo = cpu_to_le32((u32)addr); 2595 pbe->hi = cpu_to_le32((u32)upper_32_bits(addr)); 2596 2597 mr->npages++; 2598 2599 return 0; 2600 } 2601 2602 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info) 2603 { 2604 int work = info->completed - info->completed_handled - 1; 2605 2606 DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work); 2607 while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) { 2608 struct qedr_pbl *pbl; 2609 2610 /* Free all the page list that are possible to be freed 2611 * (all the ones that were invalidated), under the assumption 2612 * that if an FMR was completed successfully that means that 2613 * if there was an invalidate operation before it also ended 2614 */ 2615 pbl = list_first_entry(&info->inuse_pbl_list, 2616 struct qedr_pbl, list_entry); 2617 list_move_tail(&pbl->list_entry, &info->free_pbl_list); 2618 info->completed_handled++; 2619 } 2620 } 2621 2622 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 2623 int sg_nents, unsigned int *sg_offset) 2624 { 2625 struct qedr_mr *mr = get_qedr_mr(ibmr); 2626 2627 mr->npages = 0; 2628 2629 handle_completed_mrs(mr->dev, &mr->info); 2630 return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page); 2631 } 2632 2633 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc) 2634 { 2635 struct qedr_dev *dev = get_qedr_dev(ibpd->device); 2636 struct qedr_pd *pd = get_qedr_pd(ibpd); 2637 struct qedr_mr *mr; 2638 int rc; 2639 2640 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 2641 if (!mr) 2642 return ERR_PTR(-ENOMEM); 2643 2644 mr->type = QEDR_MR_DMA; 2645 2646 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); 2647 if (rc) { 2648 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc); 2649 goto err1; 2650 } 2651 2652 /* index only, 18 bit long, lkey = itid << 8 | key */ 2653 mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR; 2654 mr->hw_mr.pd = pd->pd_id; 2655 mr->hw_mr.local_read = 1; 2656 mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; 2657 mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; 2658 mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; 2659 mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; 2660 mr->hw_mr.dma_mr = true; 2661 2662 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); 2663 if (rc) { 2664 DP_ERR(dev, "roce register tid returned an error %d\n", rc); 2665 goto err2; 2666 } 2667 2668 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; 2669 if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || 2670 mr->hw_mr.remote_atomic) 2671 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; 2672 2673 DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey); 2674 return &mr->ibmr; 2675 2676 err2: 2677 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); 2678 err1: 2679 kfree(mr); 2680 return ERR_PTR(rc); 2681 } 2682 2683 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq) 2684 { 2685 return (((wq->prod + 1) % wq->max_wr) == wq->cons); 2686 } 2687 2688 static int sge_data_len(struct ib_sge *sg_list, int num_sge) 2689 { 2690 int i, len = 0; 2691 2692 for (i = 0; i < num_sge; i++) 2693 len += sg_list[i].length; 2694 2695 return len; 2696 } 2697 2698 static void swap_wqe_data64(u64 *p) 2699 { 2700 int i; 2701 2702 for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++) 2703 *p = cpu_to_be64(cpu_to_le64(*p)); 2704 } 2705 2706 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev, 2707 struct qedr_qp *qp, u8 *wqe_size, 2708 struct ib_send_wr *wr, 2709 struct ib_send_wr **bad_wr, u8 *bits, 2710 u8 bit) 2711 { 2712 u32 data_size = sge_data_len(wr->sg_list, wr->num_sge); 2713 char *seg_prt, *wqe; 2714 int i, seg_siz; 2715 2716 if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) { 2717 DP_ERR(dev, "Too much inline data in WR: %d\n", data_size); 2718 *bad_wr = wr; 2719 return 0; 2720 } 2721 2722 if (!data_size) 2723 return data_size; 2724 2725 *bits |= bit; 2726 2727 seg_prt = NULL; 2728 wqe = NULL; 2729 seg_siz = 0; 2730 2731 /* Copy data inline */ 2732 for (i = 0; i < wr->num_sge; i++) { 2733 u32 len = wr->sg_list[i].length; 2734 void *src = (void *)(uintptr_t)wr->sg_list[i].addr; 2735 2736 while (len > 0) { 2737 u32 cur; 2738 2739 /* New segment required */ 2740 if (!seg_siz) { 2741 wqe = (char *)qed_chain_produce(&qp->sq.pbl); 2742 seg_prt = wqe; 2743 seg_siz = sizeof(struct rdma_sq_common_wqe); 2744 (*wqe_size)++; 2745 } 2746 2747 /* Calculate currently allowed length */ 2748 cur = min_t(u32, len, seg_siz); 2749 memcpy(seg_prt, src, cur); 2750 2751 /* Update segment variables */ 2752 seg_prt += cur; 2753 seg_siz -= cur; 2754 2755 /* Update sge variables */ 2756 src += cur; 2757 len -= cur; 2758 2759 /* Swap fully-completed segments */ 2760 if (!seg_siz) 2761 swap_wqe_data64((u64 *)wqe); 2762 } 2763 } 2764 2765 /* swap last not completed segment */ 2766 if (seg_siz) 2767 swap_wqe_data64((u64 *)wqe); 2768 2769 return data_size; 2770 } 2771 2772 #define RQ_SGE_SET(sge, vaddr, vlength, vflags) \ 2773 do { \ 2774 DMA_REGPAIR_LE(sge->addr, vaddr); \ 2775 (sge)->length = cpu_to_le32(vlength); \ 2776 (sge)->flags = cpu_to_le32(vflags); \ 2777 } while (0) 2778 2779 #define SRQ_HDR_SET(hdr, vwr_id, num_sge) \ 2780 do { \ 2781 DMA_REGPAIR_LE(hdr->wr_id, vwr_id); \ 2782 (hdr)->num_sges = num_sge; \ 2783 } while (0) 2784 2785 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey) \ 2786 do { \ 2787 DMA_REGPAIR_LE(sge->addr, vaddr); \ 2788 (sge)->length = cpu_to_le32(vlength); \ 2789 (sge)->l_key = cpu_to_le32(vlkey); \ 2790 } while (0) 2791 2792 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size, 2793 struct ib_send_wr *wr) 2794 { 2795 u32 data_size = 0; 2796 int i; 2797 2798 for (i = 0; i < wr->num_sge; i++) { 2799 struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl); 2800 2801 DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr); 2802 sge->l_key = cpu_to_le32(wr->sg_list[i].lkey); 2803 sge->length = cpu_to_le32(wr->sg_list[i].length); 2804 data_size += wr->sg_list[i].length; 2805 } 2806 2807 if (wqe_size) 2808 *wqe_size += wr->num_sge; 2809 2810 return data_size; 2811 } 2812 2813 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev, 2814 struct qedr_qp *qp, 2815 struct rdma_sq_rdma_wqe_1st *rwqe, 2816 struct rdma_sq_rdma_wqe_2nd *rwqe2, 2817 struct ib_send_wr *wr, 2818 struct ib_send_wr **bad_wr) 2819 { 2820 rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey); 2821 DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr); 2822 2823 if (wr->send_flags & IB_SEND_INLINE && 2824 (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || 2825 wr->opcode == IB_WR_RDMA_WRITE)) { 2826 u8 flags = 0; 2827 2828 SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1); 2829 return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr, 2830 bad_wr, &rwqe->flags, flags); 2831 } 2832 2833 return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr); 2834 } 2835 2836 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev, 2837 struct qedr_qp *qp, 2838 struct rdma_sq_send_wqe_1st *swqe, 2839 struct rdma_sq_send_wqe_2st *swqe2, 2840 struct ib_send_wr *wr, 2841 struct ib_send_wr **bad_wr) 2842 { 2843 memset(swqe2, 0, sizeof(*swqe2)); 2844 if (wr->send_flags & IB_SEND_INLINE) { 2845 u8 flags = 0; 2846 2847 SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1); 2848 return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr, 2849 bad_wr, &swqe->flags, flags); 2850 } 2851 2852 return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr); 2853 } 2854 2855 static int qedr_prepare_reg(struct qedr_qp *qp, 2856 struct rdma_sq_fmr_wqe_1st *fwqe1, 2857 struct ib_reg_wr *wr) 2858 { 2859 struct qedr_mr *mr = get_qedr_mr(wr->mr); 2860 struct rdma_sq_fmr_wqe_2nd *fwqe2; 2861 2862 fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl); 2863 fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova); 2864 fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova); 2865 fwqe1->l_key = wr->key; 2866 2867 fwqe2->access_ctrl = 0; 2868 2869 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ, 2870 !!(wr->access & IB_ACCESS_REMOTE_READ)); 2871 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE, 2872 !!(wr->access & IB_ACCESS_REMOTE_WRITE)); 2873 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC, 2874 !!(wr->access & IB_ACCESS_REMOTE_ATOMIC)); 2875 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1); 2876 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE, 2877 !!(wr->access & IB_ACCESS_LOCAL_WRITE)); 2878 fwqe2->fmr_ctrl = 0; 2879 2880 SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG, 2881 ilog2(mr->ibmr.page_size) - 12); 2882 2883 fwqe2->length_hi = 0; 2884 fwqe2->length_lo = mr->ibmr.length; 2885 fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa); 2886 fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa); 2887 2888 qp->wqe_wr_id[qp->sq.prod].mr = mr; 2889 2890 return 0; 2891 } 2892 2893 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode) 2894 { 2895 switch (opcode) { 2896 case IB_WR_RDMA_WRITE: 2897 case IB_WR_RDMA_WRITE_WITH_IMM: 2898 return IB_WC_RDMA_WRITE; 2899 case IB_WR_SEND_WITH_IMM: 2900 case IB_WR_SEND: 2901 case IB_WR_SEND_WITH_INV: 2902 return IB_WC_SEND; 2903 case IB_WR_RDMA_READ: 2904 case IB_WR_RDMA_READ_WITH_INV: 2905 return IB_WC_RDMA_READ; 2906 case IB_WR_ATOMIC_CMP_AND_SWP: 2907 return IB_WC_COMP_SWAP; 2908 case IB_WR_ATOMIC_FETCH_AND_ADD: 2909 return IB_WC_FETCH_ADD; 2910 case IB_WR_REG_MR: 2911 return IB_WC_REG_MR; 2912 case IB_WR_LOCAL_INV: 2913 return IB_WC_LOCAL_INV; 2914 default: 2915 return IB_WC_SEND; 2916 } 2917 } 2918 2919 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr) 2920 { 2921 int wq_is_full, err_wr, pbl_is_full; 2922 struct qedr_dev *dev = qp->dev; 2923 2924 /* prevent SQ overflow and/or processing of a bad WR */ 2925 err_wr = wr->num_sge > qp->sq.max_sges; 2926 wq_is_full = qedr_wq_is_full(&qp->sq); 2927 pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) < 2928 QEDR_MAX_SQE_ELEMENTS_PER_SQE; 2929 if (wq_is_full || err_wr || pbl_is_full) { 2930 if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) { 2931 DP_ERR(dev, 2932 "error: WQ is full. Post send on QP %p failed (this error appears only once)\n", 2933 qp); 2934 qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL; 2935 } 2936 2937 if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) { 2938 DP_ERR(dev, 2939 "error: WR is bad. Post send on QP %p failed (this error appears only once)\n", 2940 qp); 2941 qp->err_bitmap |= QEDR_QP_ERR_BAD_SR; 2942 } 2943 2944 if (pbl_is_full && 2945 !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) { 2946 DP_ERR(dev, 2947 "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n", 2948 qp); 2949 qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL; 2950 } 2951 return false; 2952 } 2953 return true; 2954 } 2955 2956 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 2957 struct ib_send_wr **bad_wr) 2958 { 2959 struct qedr_dev *dev = get_qedr_dev(ibqp->device); 2960 struct qedr_qp *qp = get_qedr_qp(ibqp); 2961 struct rdma_sq_atomic_wqe_1st *awqe1; 2962 struct rdma_sq_atomic_wqe_2nd *awqe2; 2963 struct rdma_sq_atomic_wqe_3rd *awqe3; 2964 struct rdma_sq_send_wqe_2st *swqe2; 2965 struct rdma_sq_local_inv_wqe *iwqe; 2966 struct rdma_sq_rdma_wqe_2nd *rwqe2; 2967 struct rdma_sq_send_wqe_1st *swqe; 2968 struct rdma_sq_rdma_wqe_1st *rwqe; 2969 struct rdma_sq_fmr_wqe_1st *fwqe1; 2970 struct rdma_sq_common_wqe *wqe; 2971 u32 length; 2972 int rc = 0; 2973 bool comp; 2974 2975 if (!qedr_can_post_send(qp, wr)) { 2976 *bad_wr = wr; 2977 return -ENOMEM; 2978 } 2979 2980 wqe = qed_chain_produce(&qp->sq.pbl); 2981 qp->wqe_wr_id[qp->sq.prod].signaled = 2982 !!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled; 2983 2984 wqe->flags = 0; 2985 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG, 2986 !!(wr->send_flags & IB_SEND_SOLICITED)); 2987 comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled; 2988 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp); 2989 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG, 2990 !!(wr->send_flags & IB_SEND_FENCE)); 2991 wqe->prev_wqe_size = qp->prev_wqe_size; 2992 2993 qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode); 2994 2995 switch (wr->opcode) { 2996 case IB_WR_SEND_WITH_IMM: 2997 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) { 2998 rc = -EINVAL; 2999 *bad_wr = wr; 3000 break; 3001 } 3002 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM; 3003 swqe = (struct rdma_sq_send_wqe_1st *)wqe; 3004 swqe->wqe_size = 2; 3005 swqe2 = qed_chain_produce(&qp->sq.pbl); 3006 3007 swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); 3008 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2, 3009 wr, bad_wr); 3010 swqe->length = cpu_to_le32(length); 3011 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; 3012 qp->prev_wqe_size = swqe->wqe_size; 3013 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; 3014 break; 3015 case IB_WR_SEND: 3016 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND; 3017 swqe = (struct rdma_sq_send_wqe_1st *)wqe; 3018 3019 swqe->wqe_size = 2; 3020 swqe2 = qed_chain_produce(&qp->sq.pbl); 3021 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2, 3022 wr, bad_wr); 3023 swqe->length = cpu_to_le32(length); 3024 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; 3025 qp->prev_wqe_size = swqe->wqe_size; 3026 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; 3027 break; 3028 case IB_WR_SEND_WITH_INV: 3029 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE; 3030 swqe = (struct rdma_sq_send_wqe_1st *)wqe; 3031 swqe2 = qed_chain_produce(&qp->sq.pbl); 3032 swqe->wqe_size = 2; 3033 swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey); 3034 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2, 3035 wr, bad_wr); 3036 swqe->length = cpu_to_le32(length); 3037 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; 3038 qp->prev_wqe_size = swqe->wqe_size; 3039 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; 3040 break; 3041 3042 case IB_WR_RDMA_WRITE_WITH_IMM: 3043 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) { 3044 rc = -EINVAL; 3045 *bad_wr = wr; 3046 break; 3047 } 3048 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM; 3049 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; 3050 3051 rwqe->wqe_size = 2; 3052 rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data)); 3053 rwqe2 = qed_chain_produce(&qp->sq.pbl); 3054 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2, 3055 wr, bad_wr); 3056 rwqe->length = cpu_to_le32(length); 3057 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; 3058 qp->prev_wqe_size = rwqe->wqe_size; 3059 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; 3060 break; 3061 case IB_WR_RDMA_WRITE: 3062 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR; 3063 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; 3064 3065 rwqe->wqe_size = 2; 3066 rwqe2 = qed_chain_produce(&qp->sq.pbl); 3067 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2, 3068 wr, bad_wr); 3069 rwqe->length = cpu_to_le32(length); 3070 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; 3071 qp->prev_wqe_size = rwqe->wqe_size; 3072 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; 3073 break; 3074 case IB_WR_RDMA_READ_WITH_INV: 3075 SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1); 3076 /* fallthrough -- same is identical to RDMA READ */ 3077 3078 case IB_WR_RDMA_READ: 3079 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD; 3080 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; 3081 3082 rwqe->wqe_size = 2; 3083 rwqe2 = qed_chain_produce(&qp->sq.pbl); 3084 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2, 3085 wr, bad_wr); 3086 rwqe->length = cpu_to_le32(length); 3087 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; 3088 qp->prev_wqe_size = rwqe->wqe_size; 3089 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; 3090 break; 3091 3092 case IB_WR_ATOMIC_CMP_AND_SWP: 3093 case IB_WR_ATOMIC_FETCH_AND_ADD: 3094 awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe; 3095 awqe1->wqe_size = 4; 3096 3097 awqe2 = qed_chain_produce(&qp->sq.pbl); 3098 DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr); 3099 awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey); 3100 3101 awqe3 = qed_chain_produce(&qp->sq.pbl); 3102 3103 if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { 3104 wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD; 3105 DMA_REGPAIR_LE(awqe3->swap_data, 3106 atomic_wr(wr)->compare_add); 3107 } else { 3108 wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP; 3109 DMA_REGPAIR_LE(awqe3->swap_data, 3110 atomic_wr(wr)->swap); 3111 DMA_REGPAIR_LE(awqe3->cmp_data, 3112 atomic_wr(wr)->compare_add); 3113 } 3114 3115 qedr_prepare_sq_sges(qp, NULL, wr); 3116 3117 qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size; 3118 qp->prev_wqe_size = awqe1->wqe_size; 3119 break; 3120 3121 case IB_WR_LOCAL_INV: 3122 iwqe = (struct rdma_sq_local_inv_wqe *)wqe; 3123 iwqe->wqe_size = 1; 3124 3125 iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE; 3126 iwqe->inv_l_key = wr->ex.invalidate_rkey; 3127 qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size; 3128 qp->prev_wqe_size = iwqe->wqe_size; 3129 break; 3130 case IB_WR_REG_MR: 3131 DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n"); 3132 wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR; 3133 fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe; 3134 fwqe1->wqe_size = 2; 3135 3136 rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr)); 3137 if (rc) { 3138 DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc); 3139 *bad_wr = wr; 3140 break; 3141 } 3142 3143 qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size; 3144 qp->prev_wqe_size = fwqe1->wqe_size; 3145 break; 3146 default: 3147 DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode); 3148 rc = -EINVAL; 3149 *bad_wr = wr; 3150 break; 3151 } 3152 3153 if (*bad_wr) { 3154 u16 value; 3155 3156 /* Restore prod to its position before 3157 * this WR was processed 3158 */ 3159 value = le16_to_cpu(qp->sq.db_data.data.value); 3160 qed_chain_set_prod(&qp->sq.pbl, value, wqe); 3161 3162 /* Restore prev_wqe_size */ 3163 qp->prev_wqe_size = wqe->prev_wqe_size; 3164 rc = -EINVAL; 3165 DP_ERR(dev, "POST SEND FAILED\n"); 3166 } 3167 3168 return rc; 3169 } 3170 3171 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 3172 struct ib_send_wr **bad_wr) 3173 { 3174 struct qedr_dev *dev = get_qedr_dev(ibqp->device); 3175 struct qedr_qp *qp = get_qedr_qp(ibqp); 3176 unsigned long flags; 3177 int rc = 0; 3178 3179 *bad_wr = NULL; 3180 3181 if (qp->qp_type == IB_QPT_GSI) 3182 return qedr_gsi_post_send(ibqp, wr, bad_wr); 3183 3184 spin_lock_irqsave(&qp->q_lock, flags); 3185 3186 if (rdma_protocol_roce(&dev->ibdev, 1)) { 3187 if ((qp->state != QED_ROCE_QP_STATE_RTS) && 3188 (qp->state != QED_ROCE_QP_STATE_ERR) && 3189 (qp->state != QED_ROCE_QP_STATE_SQD)) { 3190 spin_unlock_irqrestore(&qp->q_lock, flags); 3191 *bad_wr = wr; 3192 DP_DEBUG(dev, QEDR_MSG_CQ, 3193 "QP in wrong state! QP icid=0x%x state %d\n", 3194 qp->icid, qp->state); 3195 return -EINVAL; 3196 } 3197 } 3198 3199 while (wr) { 3200 rc = __qedr_post_send(ibqp, wr, bad_wr); 3201 if (rc) 3202 break; 3203 3204 qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id; 3205 3206 qedr_inc_sw_prod(&qp->sq); 3207 3208 qp->sq.db_data.data.value++; 3209 3210 wr = wr->next; 3211 } 3212 3213 /* Trigger doorbell 3214 * If there was a failure in the first WR then it will be triggered in 3215 * vane. However this is not harmful (as long as the producer value is 3216 * unchanged). For performance reasons we avoid checking for this 3217 * redundant doorbell. 3218 * 3219 * qp->wqe_wr_id is accessed during qedr_poll_cq, as 3220 * soon as we give the doorbell, we could get a completion 3221 * for this wr, therefore we need to make sure that the 3222 * memory is updated before giving the doorbell. 3223 * During qedr_poll_cq, rmb is called before accessing the 3224 * cqe. This covers for the smp_rmb as well. 3225 */ 3226 smp_wmb(); 3227 writel(qp->sq.db_data.raw, qp->sq.db); 3228 3229 /* Make sure write sticks */ 3230 mmiowb(); 3231 3232 spin_unlock_irqrestore(&qp->q_lock, flags); 3233 3234 return rc; 3235 } 3236 3237 int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 3238 struct ib_recv_wr **bad_wr) 3239 { 3240 struct qedr_qp *qp = get_qedr_qp(ibqp); 3241 struct qedr_dev *dev = qp->dev; 3242 unsigned long flags; 3243 int status = 0; 3244 3245 if (qp->qp_type == IB_QPT_GSI) 3246 return qedr_gsi_post_recv(ibqp, wr, bad_wr); 3247 3248 spin_lock_irqsave(&qp->q_lock, flags); 3249 3250 if (qp->state == QED_ROCE_QP_STATE_RESET) { 3251 spin_unlock_irqrestore(&qp->q_lock, flags); 3252 *bad_wr = wr; 3253 return -EINVAL; 3254 } 3255 3256 while (wr) { 3257 int i; 3258 3259 if (qed_chain_get_elem_left_u32(&qp->rq.pbl) < 3260 QEDR_MAX_RQE_ELEMENTS_PER_RQE || 3261 wr->num_sge > qp->rq.max_sges) { 3262 DP_ERR(dev, "Can't post WR (%d < %d) || (%d > %d)\n", 3263 qed_chain_get_elem_left_u32(&qp->rq.pbl), 3264 QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge, 3265 qp->rq.max_sges); 3266 status = -ENOMEM; 3267 *bad_wr = wr; 3268 break; 3269 } 3270 for (i = 0; i < wr->num_sge; i++) { 3271 u32 flags = 0; 3272 struct rdma_rq_sge *rqe = 3273 qed_chain_produce(&qp->rq.pbl); 3274 3275 /* First one must include the number 3276 * of SGE in the list 3277 */ 3278 if (!i) 3279 SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 3280 wr->num_sge); 3281 3282 SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 3283 wr->sg_list[i].lkey); 3284 3285 RQ_SGE_SET(rqe, wr->sg_list[i].addr, 3286 wr->sg_list[i].length, flags); 3287 } 3288 3289 /* Special case of no sges. FW requires between 1-4 sges... 3290 * in this case we need to post 1 sge with length zero. this is 3291 * because rdma write with immediate consumes an RQ. 3292 */ 3293 if (!wr->num_sge) { 3294 u32 flags = 0; 3295 struct rdma_rq_sge *rqe = 3296 qed_chain_produce(&qp->rq.pbl); 3297 3298 /* First one must include the number 3299 * of SGE in the list 3300 */ 3301 SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0); 3302 SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1); 3303 3304 RQ_SGE_SET(rqe, 0, 0, flags); 3305 i = 1; 3306 } 3307 3308 qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id; 3309 qp->rqe_wr_id[qp->rq.prod].wqe_size = i; 3310 3311 qedr_inc_sw_prod(&qp->rq); 3312 3313 /* qp->rqe_wr_id is accessed during qedr_poll_cq, as 3314 * soon as we give the doorbell, we could get a completion 3315 * for this wr, therefore we need to make sure that the 3316 * memory is update before giving the doorbell. 3317 * During qedr_poll_cq, rmb is called before accessing the 3318 * cqe. This covers for the smp_rmb as well. 3319 */ 3320 smp_wmb(); 3321 3322 qp->rq.db_data.data.value++; 3323 3324 writel(qp->rq.db_data.raw, qp->rq.db); 3325 3326 /* Make sure write sticks */ 3327 mmiowb(); 3328 3329 if (rdma_protocol_iwarp(&dev->ibdev, 1)) { 3330 writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2); 3331 mmiowb(); /* for second doorbell */ 3332 } 3333 3334 wr = wr->next; 3335 } 3336 3337 spin_unlock_irqrestore(&qp->q_lock, flags); 3338 3339 return status; 3340 } 3341 3342 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe) 3343 { 3344 struct rdma_cqe_requester *resp_cqe = &cqe->req; 3345 3346 return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) == 3347 cq->pbl_toggle; 3348 } 3349 3350 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe) 3351 { 3352 struct rdma_cqe_requester *resp_cqe = &cqe->req; 3353 struct qedr_qp *qp; 3354 3355 qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi, 3356 resp_cqe->qp_handle.lo, 3357 u64); 3358 return qp; 3359 } 3360 3361 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe) 3362 { 3363 struct rdma_cqe_requester *resp_cqe = &cqe->req; 3364 3365 return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE); 3366 } 3367 3368 /* Return latest CQE (needs processing) */ 3369 static union rdma_cqe *get_cqe(struct qedr_cq *cq) 3370 { 3371 return cq->latest_cqe; 3372 } 3373 3374 /* In fmr we need to increase the number of fmr completed counter for the fmr 3375 * algorithm determining whether we can free a pbl or not. 3376 * we need to perform this whether the work request was signaled or not. for 3377 * this purpose we call this function from the condition that checks if a wr 3378 * should be skipped, to make sure we don't miss it ( possibly this fmr 3379 * operation was not signalted) 3380 */ 3381 static inline void qedr_chk_if_fmr(struct qedr_qp *qp) 3382 { 3383 if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR) 3384 qp->wqe_wr_id[qp->sq.cons].mr->info.completed++; 3385 } 3386 3387 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp, 3388 struct qedr_cq *cq, int num_entries, 3389 struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status, 3390 int force) 3391 { 3392 u16 cnt = 0; 3393 3394 while (num_entries && qp->sq.wqe_cons != hw_cons) { 3395 if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) { 3396 qedr_chk_if_fmr(qp); 3397 /* skip WC */ 3398 goto next_cqe; 3399 } 3400 3401 /* fill WC */ 3402 wc->status = status; 3403 wc->vendor_err = 0; 3404 wc->wc_flags = 0; 3405 wc->src_qp = qp->id; 3406 wc->qp = &qp->ibqp; 3407 3408 wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id; 3409 wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode; 3410 3411 switch (wc->opcode) { 3412 case IB_WC_RDMA_WRITE: 3413 wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len; 3414 break; 3415 case IB_WC_COMP_SWAP: 3416 case IB_WC_FETCH_ADD: 3417 wc->byte_len = 8; 3418 break; 3419 case IB_WC_REG_MR: 3420 qp->wqe_wr_id[qp->sq.cons].mr->info.completed++; 3421 break; 3422 case IB_WC_RDMA_READ: 3423 case IB_WC_SEND: 3424 wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len; 3425 break; 3426 default: 3427 break; 3428 } 3429 3430 num_entries--; 3431 wc++; 3432 cnt++; 3433 next_cqe: 3434 while (qp->wqe_wr_id[qp->sq.cons].wqe_size--) 3435 qed_chain_consume(&qp->sq.pbl); 3436 qedr_inc_sw_cons(&qp->sq); 3437 } 3438 3439 return cnt; 3440 } 3441 3442 static int qedr_poll_cq_req(struct qedr_dev *dev, 3443 struct qedr_qp *qp, struct qedr_cq *cq, 3444 int num_entries, struct ib_wc *wc, 3445 struct rdma_cqe_requester *req) 3446 { 3447 int cnt = 0; 3448 3449 switch (req->status) { 3450 case RDMA_CQE_REQ_STS_OK: 3451 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons, 3452 IB_WC_SUCCESS, 0); 3453 break; 3454 case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR: 3455 if (qp->state != QED_ROCE_QP_STATE_ERR) 3456 DP_DEBUG(dev, QEDR_MSG_CQ, 3457 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3458 cq->icid, qp->icid); 3459 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons, 3460 IB_WC_WR_FLUSH_ERR, 1); 3461 break; 3462 default: 3463 /* process all WQE before the cosumer */ 3464 qp->state = QED_ROCE_QP_STATE_ERR; 3465 cnt = process_req(dev, qp, cq, num_entries, wc, 3466 req->sq_cons - 1, IB_WC_SUCCESS, 0); 3467 wc += cnt; 3468 /* if we have extra WC fill it with actual error info */ 3469 if (cnt < num_entries) { 3470 enum ib_wc_status wc_status; 3471 3472 switch (req->status) { 3473 case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR: 3474 DP_ERR(dev, 3475 "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3476 cq->icid, qp->icid); 3477 wc_status = IB_WC_BAD_RESP_ERR; 3478 break; 3479 case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR: 3480 DP_ERR(dev, 3481 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3482 cq->icid, qp->icid); 3483 wc_status = IB_WC_LOC_LEN_ERR; 3484 break; 3485 case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR: 3486 DP_ERR(dev, 3487 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3488 cq->icid, qp->icid); 3489 wc_status = IB_WC_LOC_QP_OP_ERR; 3490 break; 3491 case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR: 3492 DP_ERR(dev, 3493 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3494 cq->icid, qp->icid); 3495 wc_status = IB_WC_LOC_PROT_ERR; 3496 break; 3497 case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR: 3498 DP_ERR(dev, 3499 "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3500 cq->icid, qp->icid); 3501 wc_status = IB_WC_MW_BIND_ERR; 3502 break; 3503 case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR: 3504 DP_ERR(dev, 3505 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3506 cq->icid, qp->icid); 3507 wc_status = IB_WC_REM_INV_REQ_ERR; 3508 break; 3509 case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR: 3510 DP_ERR(dev, 3511 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3512 cq->icid, qp->icid); 3513 wc_status = IB_WC_REM_ACCESS_ERR; 3514 break; 3515 case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR: 3516 DP_ERR(dev, 3517 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3518 cq->icid, qp->icid); 3519 wc_status = IB_WC_REM_OP_ERR; 3520 break; 3521 case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR: 3522 DP_ERR(dev, 3523 "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3524 cq->icid, qp->icid); 3525 wc_status = IB_WC_RNR_RETRY_EXC_ERR; 3526 break; 3527 case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR: 3528 DP_ERR(dev, 3529 "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3530 cq->icid, qp->icid); 3531 wc_status = IB_WC_RETRY_EXC_ERR; 3532 break; 3533 default: 3534 DP_ERR(dev, 3535 "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3536 cq->icid, qp->icid); 3537 wc_status = IB_WC_GENERAL_ERR; 3538 } 3539 cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons, 3540 wc_status, 1); 3541 } 3542 } 3543 3544 return cnt; 3545 } 3546 3547 static inline int qedr_cqe_resp_status_to_ib(u8 status) 3548 { 3549 switch (status) { 3550 case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR: 3551 return IB_WC_LOC_ACCESS_ERR; 3552 case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR: 3553 return IB_WC_LOC_LEN_ERR; 3554 case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR: 3555 return IB_WC_LOC_QP_OP_ERR; 3556 case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR: 3557 return IB_WC_LOC_PROT_ERR; 3558 case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR: 3559 return IB_WC_MW_BIND_ERR; 3560 case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR: 3561 return IB_WC_REM_INV_RD_REQ_ERR; 3562 case RDMA_CQE_RESP_STS_OK: 3563 return IB_WC_SUCCESS; 3564 default: 3565 return IB_WC_GENERAL_ERR; 3566 } 3567 } 3568 3569 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp, 3570 struct ib_wc *wc) 3571 { 3572 wc->status = IB_WC_SUCCESS; 3573 wc->byte_len = le32_to_cpu(resp->length); 3574 3575 if (resp->flags & QEDR_RESP_IMM) { 3576 wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key)); 3577 wc->wc_flags |= IB_WC_WITH_IMM; 3578 3579 if (resp->flags & QEDR_RESP_RDMA) 3580 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; 3581 3582 if (resp->flags & QEDR_RESP_INV) 3583 return -EINVAL; 3584 3585 } else if (resp->flags & QEDR_RESP_INV) { 3586 wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key); 3587 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 3588 3589 if (resp->flags & QEDR_RESP_RDMA) 3590 return -EINVAL; 3591 3592 } else if (resp->flags & QEDR_RESP_RDMA) { 3593 return -EINVAL; 3594 } 3595 3596 return 0; 3597 } 3598 3599 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp, 3600 struct qedr_cq *cq, struct ib_wc *wc, 3601 struct rdma_cqe_responder *resp, u64 wr_id) 3602 { 3603 /* Must fill fields before qedr_set_ok_cqe_resp_wc() */ 3604 wc->opcode = IB_WC_RECV; 3605 wc->wc_flags = 0; 3606 3607 if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) { 3608 if (qedr_set_ok_cqe_resp_wc(resp, wc)) 3609 DP_ERR(dev, 3610 "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n", 3611 cq, cq->icid, resp->flags); 3612 3613 } else { 3614 wc->status = qedr_cqe_resp_status_to_ib(resp->status); 3615 if (wc->status == IB_WC_GENERAL_ERR) 3616 DP_ERR(dev, 3617 "CQ %p (icid=%d) contains an invalid CQE status %d\n", 3618 cq, cq->icid, resp->status); 3619 } 3620 3621 /* Fill the rest of the WC */ 3622 wc->vendor_err = 0; 3623 wc->src_qp = qp->id; 3624 wc->qp = &qp->ibqp; 3625 wc->wr_id = wr_id; 3626 } 3627 3628 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp, 3629 struct qedr_cq *cq, struct ib_wc *wc, 3630 struct rdma_cqe_responder *resp) 3631 { 3632 u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id; 3633 3634 __process_resp_one(dev, qp, cq, wc, resp, wr_id); 3635 3636 while (qp->rqe_wr_id[qp->rq.cons].wqe_size--) 3637 qed_chain_consume(&qp->rq.pbl); 3638 qedr_inc_sw_cons(&qp->rq); 3639 3640 return 1; 3641 } 3642 3643 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq, 3644 int num_entries, struct ib_wc *wc, u16 hw_cons) 3645 { 3646 u16 cnt = 0; 3647 3648 while (num_entries && qp->rq.wqe_cons != hw_cons) { 3649 /* fill WC */ 3650 wc->status = IB_WC_WR_FLUSH_ERR; 3651 wc->vendor_err = 0; 3652 wc->wc_flags = 0; 3653 wc->src_qp = qp->id; 3654 wc->byte_len = 0; 3655 wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id; 3656 wc->qp = &qp->ibqp; 3657 num_entries--; 3658 wc++; 3659 cnt++; 3660 while (qp->rqe_wr_id[qp->rq.cons].wqe_size--) 3661 qed_chain_consume(&qp->rq.pbl); 3662 qedr_inc_sw_cons(&qp->rq); 3663 } 3664 3665 return cnt; 3666 } 3667 3668 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp, 3669 struct rdma_cqe_responder *resp, int *update) 3670 { 3671 if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) { 3672 consume_cqe(cq); 3673 *update |= 1; 3674 } 3675 } 3676 3677 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp, 3678 struct qedr_cq *cq, int num_entries, 3679 struct ib_wc *wc, struct rdma_cqe_responder *resp, 3680 int *update) 3681 { 3682 int cnt; 3683 3684 if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) { 3685 cnt = process_resp_flush(qp, cq, num_entries, wc, 3686 resp->rq_cons_or_srq_id); 3687 try_consume_resp_cqe(cq, qp, resp, update); 3688 } else { 3689 cnt = process_resp_one(dev, qp, cq, wc, resp); 3690 consume_cqe(cq); 3691 *update |= 1; 3692 } 3693 3694 return cnt; 3695 } 3696 3697 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp, 3698 struct rdma_cqe_requester *req, int *update) 3699 { 3700 if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) { 3701 consume_cqe(cq); 3702 *update |= 1; 3703 } 3704 } 3705 3706 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 3707 { 3708 struct qedr_dev *dev = get_qedr_dev(ibcq->device); 3709 struct qedr_cq *cq = get_qedr_cq(ibcq); 3710 union rdma_cqe *cqe; 3711 u32 old_cons, new_cons; 3712 unsigned long flags; 3713 int update = 0; 3714 int done = 0; 3715 3716 if (cq->destroyed) { 3717 DP_ERR(dev, 3718 "warning: poll was invoked after destroy for cq %p (icid=%d)\n", 3719 cq, cq->icid); 3720 return 0; 3721 } 3722 3723 if (cq->cq_type == QEDR_CQ_TYPE_GSI) 3724 return qedr_gsi_poll_cq(ibcq, num_entries, wc); 3725 3726 spin_lock_irqsave(&cq->cq_lock, flags); 3727 cqe = cq->latest_cqe; 3728 old_cons = qed_chain_get_cons_idx_u32(&cq->pbl); 3729 while (num_entries && is_valid_cqe(cq, cqe)) { 3730 struct qedr_qp *qp; 3731 int cnt = 0; 3732 3733 /* prevent speculative reads of any field of CQE */ 3734 rmb(); 3735 3736 qp = cqe_get_qp(cqe); 3737 if (!qp) { 3738 WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe); 3739 break; 3740 } 3741 3742 wc->qp = &qp->ibqp; 3743 3744 switch (cqe_get_type(cqe)) { 3745 case RDMA_CQE_TYPE_REQUESTER: 3746 cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc, 3747 &cqe->req); 3748 try_consume_req_cqe(cq, qp, &cqe->req, &update); 3749 break; 3750 case RDMA_CQE_TYPE_RESPONDER_RQ: 3751 cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc, 3752 &cqe->resp, &update); 3753 break; 3754 case RDMA_CQE_TYPE_INVALID: 3755 default: 3756 DP_ERR(dev, "Error: invalid CQE type = %d\n", 3757 cqe_get_type(cqe)); 3758 } 3759 num_entries -= cnt; 3760 wc += cnt; 3761 done += cnt; 3762 3763 cqe = get_cqe(cq); 3764 } 3765 new_cons = qed_chain_get_cons_idx_u32(&cq->pbl); 3766 3767 cq->cq_cons += new_cons - old_cons; 3768 3769 if (update) 3770 /* doorbell notifies abount latest VALID entry, 3771 * but chain already point to the next INVALID one 3772 */ 3773 doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags); 3774 3775 spin_unlock_irqrestore(&cq->cq_lock, flags); 3776 return done; 3777 } 3778 3779 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, 3780 u8 port_num, 3781 const struct ib_wc *in_wc, 3782 const struct ib_grh *in_grh, 3783 const struct ib_mad_hdr *mad_hdr, 3784 size_t in_mad_size, struct ib_mad_hdr *out_mad, 3785 size_t *out_mad_size, u16 *out_mad_pkey_index) 3786 { 3787 struct qedr_dev *dev = get_qedr_dev(ibdev); 3788 3789 DP_DEBUG(dev, QEDR_MSG_GSI, 3790 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n", 3791 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod, 3792 mad_hdr->class_specific, mad_hdr->class_version, 3793 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status); 3794 return IB_MAD_RESULT_SUCCESS; 3795 } 3796