1 /* QLogic qedr NIC Driver 2 * Copyright (c) 2015-2016 QLogic Corporation 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and /or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #include <linux/dma-mapping.h> 33 #include <linux/crc32.h> 34 #include <net/ip.h> 35 #include <net/ipv6.h> 36 #include <net/udp.h> 37 #include <linux/iommu.h> 38 39 #include <rdma/ib_verbs.h> 40 #include <rdma/ib_user_verbs.h> 41 #include <rdma/iw_cm.h> 42 #include <rdma/ib_umem.h> 43 #include <rdma/ib_addr.h> 44 #include <rdma/ib_cache.h> 45 46 #include <linux/qed/common_hsi.h> 47 #include "qedr_hsi_rdma.h" 48 #include <linux/qed/qed_if.h> 49 #include "qedr.h" 50 #include "verbs.h" 51 #include <rdma/qedr-abi.h> 52 #include "qedr_roce_cm.h" 53 54 #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) 55 56 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, 57 size_t len) 58 { 59 size_t min_len = min_t(size_t, len, udata->outlen); 60 61 return ib_copy_to_udata(udata, src, min_len); 62 } 63 64 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) 65 { 66 if (index > QEDR_ROCE_PKEY_TABLE_LEN) 67 return -EINVAL; 68 69 *pkey = QEDR_ROCE_PKEY_DEFAULT; 70 return 0; 71 } 72 73 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port, 74 int index, union ib_gid *sgid) 75 { 76 struct qedr_dev *dev = get_qedr_dev(ibdev); 77 78 memset(sgid->raw, 0, sizeof(sgid->raw)); 79 ether_addr_copy(sgid->raw, dev->ndev->dev_addr); 80 81 DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index, 82 sgid->global.interface_id, sgid->global.subnet_prefix); 83 84 return 0; 85 } 86 87 int qedr_query_gid(struct ib_device *ibdev, u8 port, int index, 88 union ib_gid *sgid) 89 { 90 struct qedr_dev *dev = get_qedr_dev(ibdev); 91 int rc = 0; 92 93 if (!rdma_cap_roce_gid_table(ibdev, port)) 94 return -ENODEV; 95 96 rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL); 97 if (rc == -EAGAIN) { 98 memcpy(sgid, &zgid, sizeof(*sgid)); 99 return 0; 100 } 101 102 DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index, 103 sgid->global.interface_id, sgid->global.subnet_prefix); 104 105 return rc; 106 } 107 108 int qedr_add_gid(struct ib_device *device, u8 port_num, 109 unsigned int index, const union ib_gid *gid, 110 const struct ib_gid_attr *attr, void **context) 111 { 112 if (!rdma_cap_roce_gid_table(device, port_num)) 113 return -EINVAL; 114 115 if (port_num > QEDR_MAX_PORT) 116 return -EINVAL; 117 118 if (!context) 119 return -EINVAL; 120 121 return 0; 122 } 123 124 int qedr_del_gid(struct ib_device *device, u8 port_num, 125 unsigned int index, void **context) 126 { 127 if (!rdma_cap_roce_gid_table(device, port_num)) 128 return -EINVAL; 129 130 if (port_num > QEDR_MAX_PORT) 131 return -EINVAL; 132 133 if (!context) 134 return -EINVAL; 135 136 return 0; 137 } 138 139 int qedr_query_device(struct ib_device *ibdev, 140 struct ib_device_attr *attr, struct ib_udata *udata) 141 { 142 struct qedr_dev *dev = get_qedr_dev(ibdev); 143 struct qedr_device_attr *qattr = &dev->attr; 144 145 if (!dev->rdma_ctx) { 146 DP_ERR(dev, 147 "qedr_query_device called with invalid params rdma_ctx=%p\n", 148 dev->rdma_ctx); 149 return -EINVAL; 150 } 151 152 memset(attr, 0, sizeof(*attr)); 153 154 attr->fw_ver = qattr->fw_ver; 155 attr->sys_image_guid = qattr->sys_image_guid; 156 attr->max_mr_size = qattr->max_mr_size; 157 attr->page_size_cap = qattr->page_size_caps; 158 attr->vendor_id = qattr->vendor_id; 159 attr->vendor_part_id = qattr->vendor_part_id; 160 attr->hw_ver = qattr->hw_ver; 161 attr->max_qp = qattr->max_qp; 162 attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe); 163 attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD | 164 IB_DEVICE_RC_RNR_NAK_GEN | 165 IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; 166 167 attr->max_sge = qattr->max_sge; 168 attr->max_sge_rd = qattr->max_sge; 169 attr->max_cq = qattr->max_cq; 170 attr->max_cqe = qattr->max_cqe; 171 attr->max_mr = qattr->max_mr; 172 attr->max_mw = qattr->max_mw; 173 attr->max_pd = qattr->max_pd; 174 attr->atomic_cap = dev->atomic_cap; 175 attr->max_fmr = qattr->max_fmr; 176 attr->max_map_per_fmr = 16; 177 attr->max_qp_init_rd_atom = 178 1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1); 179 attr->max_qp_rd_atom = 180 min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1), 181 attr->max_qp_init_rd_atom); 182 183 attr->max_srq = qattr->max_srq; 184 attr->max_srq_sge = qattr->max_srq_sge; 185 attr->max_srq_wr = qattr->max_srq_wr; 186 187 attr->local_ca_ack_delay = qattr->dev_ack_delay; 188 attr->max_fast_reg_page_list_len = qattr->max_mr / 8; 189 attr->max_pkeys = QEDR_ROCE_PKEY_MAX; 190 attr->max_ah = qattr->max_ah; 191 192 return 0; 193 } 194 195 #define QEDR_SPEED_SDR (1) 196 #define QEDR_SPEED_DDR (2) 197 #define QEDR_SPEED_QDR (4) 198 #define QEDR_SPEED_FDR10 (8) 199 #define QEDR_SPEED_FDR (16) 200 #define QEDR_SPEED_EDR (32) 201 202 static inline void get_link_speed_and_width(int speed, u8 *ib_speed, 203 u8 *ib_width) 204 { 205 switch (speed) { 206 case 1000: 207 *ib_speed = QEDR_SPEED_SDR; 208 *ib_width = IB_WIDTH_1X; 209 break; 210 case 10000: 211 *ib_speed = QEDR_SPEED_QDR; 212 *ib_width = IB_WIDTH_1X; 213 break; 214 215 case 20000: 216 *ib_speed = QEDR_SPEED_DDR; 217 *ib_width = IB_WIDTH_4X; 218 break; 219 220 case 25000: 221 *ib_speed = QEDR_SPEED_EDR; 222 *ib_width = IB_WIDTH_1X; 223 break; 224 225 case 40000: 226 *ib_speed = QEDR_SPEED_QDR; 227 *ib_width = IB_WIDTH_4X; 228 break; 229 230 case 50000: 231 *ib_speed = QEDR_SPEED_QDR; 232 *ib_width = IB_WIDTH_4X; 233 break; 234 235 case 100000: 236 *ib_speed = QEDR_SPEED_EDR; 237 *ib_width = IB_WIDTH_4X; 238 break; 239 240 default: 241 /* Unsupported */ 242 *ib_speed = QEDR_SPEED_SDR; 243 *ib_width = IB_WIDTH_1X; 244 } 245 } 246 247 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) 248 { 249 struct qedr_dev *dev; 250 struct qed_rdma_port *rdma_port; 251 252 dev = get_qedr_dev(ibdev); 253 if (port > 1) { 254 DP_ERR(dev, "invalid_port=0x%x\n", port); 255 return -EINVAL; 256 } 257 258 if (!dev->rdma_ctx) { 259 DP_ERR(dev, "rdma_ctx is NULL\n"); 260 return -EINVAL; 261 } 262 263 rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx); 264 265 /* *attr being zeroed by the caller, avoid zeroing it here */ 266 if (rdma_port->port_state == QED_RDMA_PORT_UP) { 267 attr->state = IB_PORT_ACTIVE; 268 attr->phys_state = 5; 269 } else { 270 attr->state = IB_PORT_DOWN; 271 attr->phys_state = 3; 272 } 273 attr->max_mtu = IB_MTU_4096; 274 attr->active_mtu = iboe_get_mtu(dev->ndev->mtu); 275 attr->lid = 0; 276 attr->lmc = 0; 277 attr->sm_lid = 0; 278 attr->sm_sl = 0; 279 attr->port_cap_flags = IB_PORT_IP_BASED_GIDS; 280 if (rdma_protocol_iwarp(&dev->ibdev, 1)) { 281 attr->gid_tbl_len = 1; 282 attr->pkey_tbl_len = 1; 283 } else { 284 attr->gid_tbl_len = QEDR_MAX_SGID; 285 attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN; 286 } 287 attr->bad_pkey_cntr = rdma_port->pkey_bad_counter; 288 attr->qkey_viol_cntr = 0; 289 get_link_speed_and_width(rdma_port->link_speed, 290 &attr->active_speed, &attr->active_width); 291 attr->max_msg_sz = rdma_port->max_msg_size; 292 attr->max_vl_num = 4; 293 294 return 0; 295 } 296 297 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask, 298 struct ib_port_modify *props) 299 { 300 struct qedr_dev *dev; 301 302 dev = get_qedr_dev(ibdev); 303 if (port > 1) { 304 DP_ERR(dev, "invalid_port=0x%x\n", port); 305 return -EINVAL; 306 } 307 308 return 0; 309 } 310 311 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr, 312 unsigned long len) 313 { 314 struct qedr_mm *mm; 315 316 mm = kzalloc(sizeof(*mm), GFP_KERNEL); 317 if (!mm) 318 return -ENOMEM; 319 320 mm->key.phy_addr = phy_addr; 321 /* This function might be called with a length which is not a multiple 322 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel 323 * forces this granularity by increasing the requested size if needed. 324 * When qedr_mmap is called, it will search the list with the updated 325 * length as a key. To prevent search failures, the length is rounded up 326 * in advance to PAGE_SIZE. 327 */ 328 mm->key.len = roundup(len, PAGE_SIZE); 329 INIT_LIST_HEAD(&mm->entry); 330 331 mutex_lock(&uctx->mm_list_lock); 332 list_add(&mm->entry, &uctx->mm_head); 333 mutex_unlock(&uctx->mm_list_lock); 334 335 DP_DEBUG(uctx->dev, QEDR_MSG_MISC, 336 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n", 337 (unsigned long long)mm->key.phy_addr, 338 (unsigned long)mm->key.len, uctx); 339 340 return 0; 341 } 342 343 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr, 344 unsigned long len) 345 { 346 bool found = false; 347 struct qedr_mm *mm; 348 349 mutex_lock(&uctx->mm_list_lock); 350 list_for_each_entry(mm, &uctx->mm_head, entry) { 351 if (len != mm->key.len || phy_addr != mm->key.phy_addr) 352 continue; 353 354 found = true; 355 break; 356 } 357 mutex_unlock(&uctx->mm_list_lock); 358 DP_DEBUG(uctx->dev, QEDR_MSG_MISC, 359 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n", 360 mm->key.phy_addr, mm->key.len, uctx, found); 361 362 return found; 363 } 364 365 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev, 366 struct ib_udata *udata) 367 { 368 int rc; 369 struct qedr_ucontext *ctx; 370 struct qedr_alloc_ucontext_resp uresp; 371 struct qedr_dev *dev = get_qedr_dev(ibdev); 372 struct qed_rdma_add_user_out_params oparams; 373 374 if (!udata) 375 return ERR_PTR(-EFAULT); 376 377 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 378 if (!ctx) 379 return ERR_PTR(-ENOMEM); 380 381 rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams); 382 if (rc) { 383 DP_ERR(dev, 384 "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n", 385 rc); 386 goto err; 387 } 388 389 ctx->dpi = oparams.dpi; 390 ctx->dpi_addr = oparams.dpi_addr; 391 ctx->dpi_phys_addr = oparams.dpi_phys_addr; 392 ctx->dpi_size = oparams.dpi_size; 393 INIT_LIST_HEAD(&ctx->mm_head); 394 mutex_init(&ctx->mm_list_lock); 395 396 memset(&uresp, 0, sizeof(uresp)); 397 398 uresp.dpm_enabled = dev->user_dpm_enabled; 399 uresp.wids_enabled = 1; 400 uresp.wid_count = oparams.wid_count; 401 uresp.db_pa = ctx->dpi_phys_addr; 402 uresp.db_size = ctx->dpi_size; 403 uresp.max_send_wr = dev->attr.max_sqe; 404 uresp.max_recv_wr = dev->attr.max_rqe; 405 uresp.max_srq_wr = dev->attr.max_srq_wr; 406 uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE; 407 uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE; 408 uresp.sges_per_srq_wr = dev->attr.max_srq_sge; 409 uresp.max_cqes = QEDR_MAX_CQES; 410 411 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 412 if (rc) 413 goto err; 414 415 ctx->dev = dev; 416 417 rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size); 418 if (rc) 419 goto err; 420 421 DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n", 422 &ctx->ibucontext); 423 return &ctx->ibucontext; 424 425 err: 426 kfree(ctx); 427 return ERR_PTR(rc); 428 } 429 430 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx) 431 { 432 struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx); 433 struct qedr_mm *mm, *tmp; 434 int status = 0; 435 436 DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n", 437 uctx); 438 uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi); 439 440 list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { 441 DP_DEBUG(uctx->dev, QEDR_MSG_MISC, 442 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n", 443 mm->key.phy_addr, mm->key.len, uctx); 444 list_del(&mm->entry); 445 kfree(mm); 446 } 447 448 kfree(uctx); 449 return status; 450 } 451 452 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) 453 { 454 struct qedr_ucontext *ucontext = get_qedr_ucontext(context); 455 struct qedr_dev *dev = get_qedr_dev(context->device); 456 unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; 457 u64 unmapped_db = dev->db_phys_addr; 458 unsigned long len = (vma->vm_end - vma->vm_start); 459 int rc = 0; 460 bool found; 461 462 DP_DEBUG(dev, QEDR_MSG_INIT, 463 "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n", 464 vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len); 465 if (vma->vm_start & (PAGE_SIZE - 1)) { 466 DP_ERR(dev, "Vma_start not page aligned = %ld\n", 467 vma->vm_start); 468 return -EINVAL; 469 } 470 471 found = qedr_search_mmap(ucontext, vm_page, len); 472 if (!found) { 473 DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n", 474 vma->vm_pgoff); 475 return -EINVAL; 476 } 477 478 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n"); 479 480 if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db + 481 dev->db_size))) { 482 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n"); 483 if (vma->vm_flags & VM_READ) { 484 DP_ERR(dev, "Trying to map doorbell bar for read\n"); 485 return -EPERM; 486 } 487 488 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 489 490 rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, 491 PAGE_SIZE, vma->vm_page_prot); 492 } else { 493 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n"); 494 rc = remap_pfn_range(vma, vma->vm_start, 495 vma->vm_pgoff, len, vma->vm_page_prot); 496 } 497 DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc); 498 return rc; 499 } 500 501 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, 502 struct ib_ucontext *context, struct ib_udata *udata) 503 { 504 struct qedr_dev *dev = get_qedr_dev(ibdev); 505 struct qedr_pd *pd; 506 u16 pd_id; 507 int rc; 508 509 DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n", 510 (udata && context) ? "User Lib" : "Kernel"); 511 512 if (!dev->rdma_ctx) { 513 DP_ERR(dev, "invalid RDMA context\n"); 514 return ERR_PTR(-EINVAL); 515 } 516 517 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 518 if (!pd) 519 return ERR_PTR(-ENOMEM); 520 521 rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id); 522 if (rc) 523 goto err; 524 525 pd->pd_id = pd_id; 526 527 if (udata && context) { 528 struct qedr_alloc_pd_uresp uresp; 529 530 uresp.pd_id = pd_id; 531 532 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 533 if (rc) { 534 DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id); 535 dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id); 536 goto err; 537 } 538 539 pd->uctx = get_qedr_ucontext(context); 540 pd->uctx->pd = pd; 541 } 542 543 return &pd->ibpd; 544 545 err: 546 kfree(pd); 547 return ERR_PTR(rc); 548 } 549 550 int qedr_dealloc_pd(struct ib_pd *ibpd) 551 { 552 struct qedr_dev *dev = get_qedr_dev(ibpd->device); 553 struct qedr_pd *pd = get_qedr_pd(ibpd); 554 555 if (!pd) { 556 pr_err("Invalid PD received in dealloc_pd\n"); 557 return -EINVAL; 558 } 559 560 DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id); 561 dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id); 562 563 kfree(pd); 564 565 return 0; 566 } 567 568 static void qedr_free_pbl(struct qedr_dev *dev, 569 struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl) 570 { 571 struct pci_dev *pdev = dev->pdev; 572 int i; 573 574 for (i = 0; i < pbl_info->num_pbls; i++) { 575 if (!pbl[i].va) 576 continue; 577 dma_free_coherent(&pdev->dev, pbl_info->pbl_size, 578 pbl[i].va, pbl[i].pa); 579 } 580 581 kfree(pbl); 582 } 583 584 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024) 585 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024) 586 587 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64)) 588 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE) 589 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE) 590 591 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev, 592 struct qedr_pbl_info *pbl_info, 593 gfp_t flags) 594 { 595 struct pci_dev *pdev = dev->pdev; 596 struct qedr_pbl *pbl_table; 597 dma_addr_t *pbl_main_tbl; 598 dma_addr_t pa; 599 void *va; 600 int i; 601 602 pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags); 603 if (!pbl_table) 604 return ERR_PTR(-ENOMEM); 605 606 for (i = 0; i < pbl_info->num_pbls; i++) { 607 va = dma_zalloc_coherent(&pdev->dev, pbl_info->pbl_size, 608 &pa, flags); 609 if (!va) 610 goto err; 611 612 pbl_table[i].va = va; 613 pbl_table[i].pa = pa; 614 } 615 616 /* Two-Layer PBLs, if we have more than one pbl we need to initialize 617 * the first one with physical pointers to all of the rest 618 */ 619 pbl_main_tbl = (dma_addr_t *)pbl_table[0].va; 620 for (i = 0; i < pbl_info->num_pbls - 1; i++) 621 pbl_main_tbl[i] = pbl_table[i + 1].pa; 622 623 return pbl_table; 624 625 err: 626 for (i--; i >= 0; i--) 627 dma_free_coherent(&pdev->dev, pbl_info->pbl_size, 628 pbl_table[i].va, pbl_table[i].pa); 629 630 qedr_free_pbl(dev, pbl_info, pbl_table); 631 632 return ERR_PTR(-ENOMEM); 633 } 634 635 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev, 636 struct qedr_pbl_info *pbl_info, 637 u32 num_pbes, int two_layer_capable) 638 { 639 u32 pbl_capacity; 640 u32 pbl_size; 641 u32 num_pbls; 642 643 if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) { 644 if (num_pbes > MAX_PBES_TWO_LAYER) { 645 DP_ERR(dev, "prepare pbl table: too many pages %d\n", 646 num_pbes); 647 return -EINVAL; 648 } 649 650 /* calculate required pbl page size */ 651 pbl_size = MIN_FW_PBL_PAGE_SIZE; 652 pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) * 653 NUM_PBES_ON_PAGE(pbl_size); 654 655 while (pbl_capacity < num_pbes) { 656 pbl_size *= 2; 657 pbl_capacity = pbl_size / sizeof(u64); 658 pbl_capacity = pbl_capacity * pbl_capacity; 659 } 660 661 num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size)); 662 num_pbls++; /* One for the layer0 ( points to the pbls) */ 663 pbl_info->two_layered = true; 664 } else { 665 /* One layered PBL */ 666 num_pbls = 1; 667 pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE, 668 roundup_pow_of_two((num_pbes * sizeof(u64)))); 669 pbl_info->two_layered = false; 670 } 671 672 pbl_info->num_pbls = num_pbls; 673 pbl_info->pbl_size = pbl_size; 674 pbl_info->num_pbes = num_pbes; 675 676 DP_DEBUG(dev, QEDR_MSG_MR, 677 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n", 678 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size); 679 680 return 0; 681 } 682 683 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, 684 struct qedr_pbl *pbl, 685 struct qedr_pbl_info *pbl_info, u32 pg_shift) 686 { 687 int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0; 688 u32 fw_pg_cnt, fw_pg_per_umem_pg; 689 struct qedr_pbl *pbl_tbl; 690 struct scatterlist *sg; 691 struct regpair *pbe; 692 u64 pg_addr; 693 int entry; 694 695 if (!pbl_info->num_pbes) 696 return; 697 698 /* If we have a two layered pbl, the first pbl points to the rest 699 * of the pbls and the first entry lays on the second pbl in the table 700 */ 701 if (pbl_info->two_layered) 702 pbl_tbl = &pbl[1]; 703 else 704 pbl_tbl = pbl; 705 706 pbe = (struct regpair *)pbl_tbl->va; 707 if (!pbe) { 708 DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n"); 709 return; 710 } 711 712 pbe_cnt = 0; 713 714 shift = umem->page_shift; 715 716 fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift); 717 718 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 719 pages = sg_dma_len(sg) >> shift; 720 pg_addr = sg_dma_address(sg); 721 for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { 722 for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) { 723 pbe->lo = cpu_to_le32(pg_addr); 724 pbe->hi = cpu_to_le32(upper_32_bits(pg_addr)); 725 726 pg_addr += BIT(pg_shift); 727 pbe_cnt++; 728 total_num_pbes++; 729 pbe++; 730 731 if (total_num_pbes == pbl_info->num_pbes) 732 return; 733 734 /* If the given pbl is full storing the pbes, 735 * move to next pbl. 736 */ 737 if (pbe_cnt == 738 (pbl_info->pbl_size / sizeof(u64))) { 739 pbl_tbl++; 740 pbe = (struct regpair *)pbl_tbl->va; 741 pbe_cnt = 0; 742 } 743 744 fw_pg_cnt++; 745 } 746 } 747 } 748 } 749 750 static int qedr_copy_cq_uresp(struct qedr_dev *dev, 751 struct qedr_cq *cq, struct ib_udata *udata) 752 { 753 struct qedr_create_cq_uresp uresp; 754 int rc; 755 756 memset(&uresp, 0, sizeof(uresp)); 757 758 uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); 759 uresp.icid = cq->icid; 760 761 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 762 if (rc) 763 DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid); 764 765 return rc; 766 } 767 768 static void consume_cqe(struct qedr_cq *cq) 769 { 770 if (cq->latest_cqe == cq->toggle_cqe) 771 cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK; 772 773 cq->latest_cqe = qed_chain_consume(&cq->pbl); 774 } 775 776 static inline int qedr_align_cq_entries(int entries) 777 { 778 u64 size, aligned_size; 779 780 /* We allocate an extra entry that we don't report to the FW. */ 781 size = (entries + 1) * QEDR_CQE_SIZE; 782 aligned_size = ALIGN(size, PAGE_SIZE); 783 784 return aligned_size / QEDR_CQE_SIZE; 785 } 786 787 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx, 788 struct qedr_dev *dev, 789 struct qedr_userq *q, 790 u64 buf_addr, size_t buf_len, 791 int access, int dmasync, 792 int alloc_and_init) 793 { 794 u32 fw_pages; 795 int rc; 796 797 q->buf_addr = buf_addr; 798 q->buf_len = buf_len; 799 q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync); 800 if (IS_ERR(q->umem)) { 801 DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n", 802 PTR_ERR(q->umem)); 803 return PTR_ERR(q->umem); 804 } 805 806 fw_pages = ib_umem_page_count(q->umem) << 807 (q->umem->page_shift - FW_PAGE_SHIFT); 808 809 rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0); 810 if (rc) 811 goto err0; 812 813 if (alloc_and_init) { 814 q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL); 815 if (IS_ERR(q->pbl_tbl)) { 816 rc = PTR_ERR(q->pbl_tbl); 817 goto err0; 818 } 819 qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info, 820 FW_PAGE_SHIFT); 821 } else { 822 q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL); 823 if (!q->pbl_tbl) { 824 rc = -ENOMEM; 825 goto err0; 826 } 827 } 828 829 return 0; 830 831 err0: 832 ib_umem_release(q->umem); 833 q->umem = NULL; 834 835 return rc; 836 } 837 838 static inline void qedr_init_cq_params(struct qedr_cq *cq, 839 struct qedr_ucontext *ctx, 840 struct qedr_dev *dev, int vector, 841 int chain_entries, int page_cnt, 842 u64 pbl_ptr, 843 struct qed_rdma_create_cq_in_params 844 *params) 845 { 846 memset(params, 0, sizeof(*params)); 847 params->cq_handle_hi = upper_32_bits((uintptr_t)cq); 848 params->cq_handle_lo = lower_32_bits((uintptr_t)cq); 849 params->cnq_id = vector; 850 params->cq_size = chain_entries - 1; 851 params->dpi = (ctx) ? ctx->dpi : dev->dpi; 852 params->pbl_num_pages = page_cnt; 853 params->pbl_ptr = pbl_ptr; 854 params->pbl_two_level = 0; 855 } 856 857 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags) 858 { 859 /* Flush data before signalling doorbell */ 860 wmb(); 861 cq->db.data.agg_flags = flags; 862 cq->db.data.value = cpu_to_le32(cons); 863 writeq(cq->db.raw, cq->db_addr); 864 865 /* Make sure write would stick */ 866 mmiowb(); 867 } 868 869 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 870 { 871 struct qedr_cq *cq = get_qedr_cq(ibcq); 872 unsigned long sflags; 873 struct qedr_dev *dev; 874 875 dev = get_qedr_dev(ibcq->device); 876 877 if (cq->destroyed) { 878 DP_ERR(dev, 879 "warning: arm was invoked after destroy for cq %p (icid=%d)\n", 880 cq, cq->icid); 881 return -EINVAL; 882 } 883 884 885 if (cq->cq_type == QEDR_CQ_TYPE_GSI) 886 return 0; 887 888 spin_lock_irqsave(&cq->cq_lock, sflags); 889 890 cq->arm_flags = 0; 891 892 if (flags & IB_CQ_SOLICITED) 893 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD; 894 895 if (flags & IB_CQ_NEXT_COMP) 896 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD; 897 898 doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags); 899 900 spin_unlock_irqrestore(&cq->cq_lock, sflags); 901 902 return 0; 903 } 904 905 struct ib_cq *qedr_create_cq(struct ib_device *ibdev, 906 const struct ib_cq_init_attr *attr, 907 struct ib_ucontext *ib_ctx, struct ib_udata *udata) 908 { 909 struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx); 910 struct qed_rdma_destroy_cq_out_params destroy_oparams; 911 struct qed_rdma_destroy_cq_in_params destroy_iparams; 912 struct qedr_dev *dev = get_qedr_dev(ibdev); 913 struct qed_rdma_create_cq_in_params params; 914 struct qedr_create_cq_ureq ureq; 915 int vector = attr->comp_vector; 916 int entries = attr->cqe; 917 struct qedr_cq *cq; 918 int chain_entries; 919 int page_cnt; 920 u64 pbl_ptr; 921 u16 icid; 922 int rc; 923 924 DP_DEBUG(dev, QEDR_MSG_INIT, 925 "create_cq: called from %s. entries=%d, vector=%d\n", 926 udata ? "User Lib" : "Kernel", entries, vector); 927 928 if (entries > QEDR_MAX_CQES) { 929 DP_ERR(dev, 930 "create cq: the number of entries %d is too high. Must be equal or below %d.\n", 931 entries, QEDR_MAX_CQES); 932 return ERR_PTR(-EINVAL); 933 } 934 935 chain_entries = qedr_align_cq_entries(entries); 936 chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES); 937 938 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 939 if (!cq) 940 return ERR_PTR(-ENOMEM); 941 942 if (udata) { 943 memset(&ureq, 0, sizeof(ureq)); 944 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { 945 DP_ERR(dev, 946 "create cq: problem copying data from user space\n"); 947 goto err0; 948 } 949 950 if (!ureq.len) { 951 DP_ERR(dev, 952 "create cq: cannot create a cq with 0 entries\n"); 953 goto err0; 954 } 955 956 cq->cq_type = QEDR_CQ_TYPE_USER; 957 958 rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr, 959 ureq.len, IB_ACCESS_LOCAL_WRITE, 960 1, 1); 961 if (rc) 962 goto err0; 963 964 pbl_ptr = cq->q.pbl_tbl->pa; 965 page_cnt = cq->q.pbl_info.num_pbes; 966 967 cq->ibcq.cqe = chain_entries; 968 } else { 969 cq->cq_type = QEDR_CQ_TYPE_KERNEL; 970 971 rc = dev->ops->common->chain_alloc(dev->cdev, 972 QED_CHAIN_USE_TO_CONSUME, 973 QED_CHAIN_MODE_PBL, 974 QED_CHAIN_CNT_TYPE_U32, 975 chain_entries, 976 sizeof(union rdma_cqe), 977 &cq->pbl, NULL); 978 if (rc) 979 goto err1; 980 981 page_cnt = qed_chain_get_page_cnt(&cq->pbl); 982 pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl); 983 cq->ibcq.cqe = cq->pbl.capacity; 984 } 985 986 qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt, 987 pbl_ptr, ¶ms); 988 989 rc = dev->ops->rdma_create_cq(dev->rdma_ctx, ¶ms, &icid); 990 if (rc) 991 goto err2; 992 993 cq->icid = icid; 994 cq->sig = QEDR_CQ_MAGIC_NUMBER; 995 spin_lock_init(&cq->cq_lock); 996 997 if (ib_ctx) { 998 rc = qedr_copy_cq_uresp(dev, cq, udata); 999 if (rc) 1000 goto err3; 1001 } else { 1002 /* Generate doorbell address. */ 1003 cq->db_addr = dev->db_addr + 1004 DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); 1005 cq->db.data.icid = cq->icid; 1006 cq->db.data.params = DB_AGG_CMD_SET << 1007 RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT; 1008 1009 /* point to the very last element, passing it we will toggle */ 1010 cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl); 1011 cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK; 1012 cq->latest_cqe = NULL; 1013 consume_cqe(cq); 1014 cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl); 1015 } 1016 1017 DP_DEBUG(dev, QEDR_MSG_CQ, 1018 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n", 1019 cq->icid, cq, params.cq_size); 1020 1021 return &cq->ibcq; 1022 1023 err3: 1024 destroy_iparams.icid = cq->icid; 1025 dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams, 1026 &destroy_oparams); 1027 err2: 1028 if (udata) 1029 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); 1030 else 1031 dev->ops->common->chain_free(dev->cdev, &cq->pbl); 1032 err1: 1033 if (udata) 1034 ib_umem_release(cq->q.umem); 1035 err0: 1036 kfree(cq); 1037 return ERR_PTR(-EINVAL); 1038 } 1039 1040 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) 1041 { 1042 struct qedr_dev *dev = get_qedr_dev(ibcq->device); 1043 struct qedr_cq *cq = get_qedr_cq(ibcq); 1044 1045 DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq); 1046 1047 return 0; 1048 } 1049 1050 #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10) 1051 #define QEDR_DESTROY_CQ_ITER_DURATION (10) 1052 1053 int qedr_destroy_cq(struct ib_cq *ibcq) 1054 { 1055 struct qedr_dev *dev = get_qedr_dev(ibcq->device); 1056 struct qed_rdma_destroy_cq_out_params oparams; 1057 struct qed_rdma_destroy_cq_in_params iparams; 1058 struct qedr_cq *cq = get_qedr_cq(ibcq); 1059 int iter; 1060 int rc; 1061 1062 DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid); 1063 1064 cq->destroyed = 1; 1065 1066 /* GSIs CQs are handled by driver, so they don't exist in the FW */ 1067 if (cq->cq_type == QEDR_CQ_TYPE_GSI) 1068 goto done; 1069 1070 iparams.icid = cq->icid; 1071 rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); 1072 if (rc) 1073 return rc; 1074 1075 dev->ops->common->chain_free(dev->cdev, &cq->pbl); 1076 1077 if (ibcq->uobject && ibcq->uobject->context) { 1078 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); 1079 ib_umem_release(cq->q.umem); 1080 } 1081 1082 /* We don't want the IRQ handler to handle a non-existing CQ so we 1083 * wait until all CNQ interrupts, if any, are received. This will always 1084 * happen and will always happen very fast. If not, then a serious error 1085 * has occured. That is why we can use a long delay. 1086 * We spin for a short time so we don’t lose time on context switching 1087 * in case all the completions are handled in that span. Otherwise 1088 * we sleep for a while and check again. Since the CNQ may be 1089 * associated with (only) the current CPU we use msleep to allow the 1090 * current CPU to be freed. 1091 * The CNQ notification is increased in qedr_irq_handler(). 1092 */ 1093 iter = QEDR_DESTROY_CQ_MAX_ITERATIONS; 1094 while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) { 1095 udelay(QEDR_DESTROY_CQ_ITER_DURATION); 1096 iter--; 1097 } 1098 1099 iter = QEDR_DESTROY_CQ_MAX_ITERATIONS; 1100 while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) { 1101 msleep(QEDR_DESTROY_CQ_ITER_DURATION); 1102 iter--; 1103 } 1104 1105 if (oparams.num_cq_notif != cq->cnq_notif) 1106 goto err; 1107 1108 /* Note that we don't need to have explicit code to wait for the 1109 * completion of the event handler because it is invoked from the EQ. 1110 * Since the destroy CQ ramrod has also been received on the EQ we can 1111 * be certain that there's no event handler in process. 1112 */ 1113 done: 1114 cq->sig = ~cq->sig; 1115 1116 kfree(cq); 1117 1118 return 0; 1119 1120 err: 1121 DP_ERR(dev, 1122 "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n", 1123 cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif); 1124 1125 return -EINVAL; 1126 } 1127 1128 static inline int get_gid_info_from_table(struct ib_qp *ibqp, 1129 struct ib_qp_attr *attr, 1130 int attr_mask, 1131 struct qed_rdma_modify_qp_in_params 1132 *qp_params) 1133 { 1134 enum rdma_network_type nw_type; 1135 struct ib_gid_attr gid_attr; 1136 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); 1137 union ib_gid gid; 1138 u32 ipv4_addr; 1139 int rc = 0; 1140 int i; 1141 1142 rc = ib_get_cached_gid(ibqp->device, 1143 rdma_ah_get_port_num(&attr->ah_attr), 1144 grh->sgid_index, &gid, &gid_attr); 1145 if (rc) 1146 return rc; 1147 1148 if (!memcmp(&gid, &zgid, sizeof(gid))) 1149 return -ENOENT; 1150 1151 if (gid_attr.ndev) { 1152 qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev); 1153 1154 dev_put(gid_attr.ndev); 1155 nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid); 1156 switch (nw_type) { 1157 case RDMA_NETWORK_IPV6: 1158 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], 1159 sizeof(qp_params->sgid)); 1160 memcpy(&qp_params->dgid.bytes[0], 1161 &grh->dgid, 1162 sizeof(qp_params->dgid)); 1163 qp_params->roce_mode = ROCE_V2_IPV6; 1164 SET_FIELD(qp_params->modify_flags, 1165 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); 1166 break; 1167 case RDMA_NETWORK_IB: 1168 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], 1169 sizeof(qp_params->sgid)); 1170 memcpy(&qp_params->dgid.bytes[0], 1171 &grh->dgid, 1172 sizeof(qp_params->dgid)); 1173 qp_params->roce_mode = ROCE_V1; 1174 break; 1175 case RDMA_NETWORK_IPV4: 1176 memset(&qp_params->sgid, 0, sizeof(qp_params->sgid)); 1177 memset(&qp_params->dgid, 0, sizeof(qp_params->dgid)); 1178 ipv4_addr = qedr_get_ipv4_from_gid(gid.raw); 1179 qp_params->sgid.ipv4_addr = ipv4_addr; 1180 ipv4_addr = 1181 qedr_get_ipv4_from_gid(grh->dgid.raw); 1182 qp_params->dgid.ipv4_addr = ipv4_addr; 1183 SET_FIELD(qp_params->modify_flags, 1184 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); 1185 qp_params->roce_mode = ROCE_V2_IPV4; 1186 break; 1187 } 1188 } 1189 1190 for (i = 0; i < 4; i++) { 1191 qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]); 1192 qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]); 1193 } 1194 1195 if (qp_params->vlan_id >= VLAN_CFI_MASK) 1196 qp_params->vlan_id = 0; 1197 1198 return 0; 1199 } 1200 1201 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, 1202 struct ib_qp_init_attr *attrs) 1203 { 1204 struct qedr_device_attr *qattr = &dev->attr; 1205 1206 /* QP0... attrs->qp_type == IB_QPT_GSI */ 1207 if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) { 1208 DP_DEBUG(dev, QEDR_MSG_QP, 1209 "create qp: unsupported qp type=0x%x requested\n", 1210 attrs->qp_type); 1211 return -EINVAL; 1212 } 1213 1214 if (attrs->cap.max_send_wr > qattr->max_sqe) { 1215 DP_ERR(dev, 1216 "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n", 1217 attrs->cap.max_send_wr, qattr->max_sqe); 1218 return -EINVAL; 1219 } 1220 1221 if (attrs->cap.max_inline_data > qattr->max_inline) { 1222 DP_ERR(dev, 1223 "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n", 1224 attrs->cap.max_inline_data, qattr->max_inline); 1225 return -EINVAL; 1226 } 1227 1228 if (attrs->cap.max_send_sge > qattr->max_sge) { 1229 DP_ERR(dev, 1230 "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n", 1231 attrs->cap.max_send_sge, qattr->max_sge); 1232 return -EINVAL; 1233 } 1234 1235 if (attrs->cap.max_recv_sge > qattr->max_sge) { 1236 DP_ERR(dev, 1237 "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n", 1238 attrs->cap.max_recv_sge, qattr->max_sge); 1239 return -EINVAL; 1240 } 1241 1242 /* Unprivileged user space cannot create special QP */ 1243 if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { 1244 DP_ERR(dev, 1245 "create qp: userspace can't create special QPs of type=0x%x\n", 1246 attrs->qp_type); 1247 return -EINVAL; 1248 } 1249 1250 return 0; 1251 } 1252 1253 static void qedr_copy_rq_uresp(struct qedr_dev *dev, 1254 struct qedr_create_qp_uresp *uresp, 1255 struct qedr_qp *qp) 1256 { 1257 /* iWARP requires two doorbells per RQ. */ 1258 if (rdma_protocol_iwarp(&dev->ibdev, 1)) { 1259 uresp->rq_db_offset = 1260 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); 1261 uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); 1262 } else { 1263 uresp->rq_db_offset = 1264 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); 1265 } 1266 1267 uresp->rq_icid = qp->icid; 1268 } 1269 1270 static void qedr_copy_sq_uresp(struct qedr_dev *dev, 1271 struct qedr_create_qp_uresp *uresp, 1272 struct qedr_qp *qp) 1273 { 1274 uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); 1275 1276 /* iWARP uses the same cid for rq and sq */ 1277 if (rdma_protocol_iwarp(&dev->ibdev, 1)) 1278 uresp->sq_icid = qp->icid; 1279 else 1280 uresp->sq_icid = qp->icid + 1; 1281 } 1282 1283 static int qedr_copy_qp_uresp(struct qedr_dev *dev, 1284 struct qedr_qp *qp, struct ib_udata *udata) 1285 { 1286 struct qedr_create_qp_uresp uresp; 1287 int rc; 1288 1289 memset(&uresp, 0, sizeof(uresp)); 1290 qedr_copy_sq_uresp(dev, &uresp, qp); 1291 qedr_copy_rq_uresp(dev, &uresp, qp); 1292 1293 uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; 1294 uresp.qp_id = qp->qp_id; 1295 1296 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 1297 if (rc) 1298 DP_ERR(dev, 1299 "create qp: failed a copy to user space with qp icid=0x%x.\n", 1300 qp->icid); 1301 1302 return rc; 1303 } 1304 1305 static void qedr_set_common_qp_params(struct qedr_dev *dev, 1306 struct qedr_qp *qp, 1307 struct qedr_pd *pd, 1308 struct ib_qp_init_attr *attrs) 1309 { 1310 spin_lock_init(&qp->q_lock); 1311 atomic_set(&qp->refcnt, 1); 1312 qp->pd = pd; 1313 qp->qp_type = attrs->qp_type; 1314 qp->max_inline_data = attrs->cap.max_inline_data; 1315 qp->sq.max_sges = attrs->cap.max_send_sge; 1316 qp->state = QED_ROCE_QP_STATE_RESET; 1317 qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; 1318 qp->sq_cq = get_qedr_cq(attrs->send_cq); 1319 qp->rq_cq = get_qedr_cq(attrs->recv_cq); 1320 qp->dev = dev; 1321 qp->rq.max_sges = attrs->cap.max_recv_sge; 1322 1323 DP_DEBUG(dev, QEDR_MSG_QP, 1324 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n", 1325 qp->rq.max_sges, qp->rq_cq->icid); 1326 DP_DEBUG(dev, QEDR_MSG_QP, 1327 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n", 1328 pd->pd_id, qp->qp_type, qp->max_inline_data, 1329 qp->state, qp->signaled, (attrs->srq) ? 1 : 0); 1330 DP_DEBUG(dev, QEDR_MSG_QP, 1331 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n", 1332 qp->sq.max_sges, qp->sq_cq->icid); 1333 } 1334 1335 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) 1336 { 1337 qp->sq.db = dev->db_addr + 1338 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); 1339 qp->sq.db_data.data.icid = qp->icid + 1; 1340 qp->rq.db = dev->db_addr + 1341 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); 1342 qp->rq.db_data.data.icid = qp->icid; 1343 } 1344 1345 static inline void 1346 qedr_init_common_qp_in_params(struct qedr_dev *dev, 1347 struct qedr_pd *pd, 1348 struct qedr_qp *qp, 1349 struct ib_qp_init_attr *attrs, 1350 bool fmr_and_reserved_lkey, 1351 struct qed_rdma_create_qp_in_params *params) 1352 { 1353 /* QP handle to be written in an async event */ 1354 params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp); 1355 params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp); 1356 1357 params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR); 1358 params->fmr_and_reserved_lkey = fmr_and_reserved_lkey; 1359 params->pd = pd->pd_id; 1360 params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; 1361 params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid; 1362 params->stats_queue = 0; 1363 params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; 1364 params->srq_id = 0; 1365 params->use_srq = false; 1366 } 1367 1368 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) 1369 { 1370 DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. " 1371 "qp=%p. " 1372 "sq_addr=0x%llx, " 1373 "sq_len=%zd, " 1374 "rq_addr=0x%llx, " 1375 "rq_len=%zd" 1376 "\n", 1377 qp, 1378 qp->usq.buf_addr, 1379 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len); 1380 } 1381 1382 static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id) 1383 { 1384 int rc; 1385 1386 if (!rdma_protocol_iwarp(&dev->ibdev, 1)) 1387 return 0; 1388 1389 idr_preload(GFP_KERNEL); 1390 spin_lock_irq(&dev->idr_lock); 1391 1392 rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC); 1393 1394 spin_unlock_irq(&dev->idr_lock); 1395 idr_preload_end(); 1396 1397 return rc < 0 ? rc : 0; 1398 } 1399 1400 static void qedr_idr_remove(struct qedr_dev *dev, u32 id) 1401 { 1402 if (!rdma_protocol_iwarp(&dev->ibdev, 1)) 1403 return; 1404 1405 spin_lock_irq(&dev->idr_lock); 1406 idr_remove(&dev->qpidr, id); 1407 spin_unlock_irq(&dev->idr_lock); 1408 } 1409 1410 static inline void 1411 qedr_iwarp_populate_user_qp(struct qedr_dev *dev, 1412 struct qedr_qp *qp, 1413 struct qed_rdma_create_qp_out_params *out_params) 1414 { 1415 qp->usq.pbl_tbl->va = out_params->sq_pbl_virt; 1416 qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys; 1417 1418 qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl, 1419 &qp->usq.pbl_info, FW_PAGE_SHIFT); 1420 1421 qp->urq.pbl_tbl->va = out_params->rq_pbl_virt; 1422 qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys; 1423 1424 qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl, 1425 &qp->urq.pbl_info, FW_PAGE_SHIFT); 1426 } 1427 1428 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) 1429 { 1430 if (qp->usq.umem) 1431 ib_umem_release(qp->usq.umem); 1432 qp->usq.umem = NULL; 1433 1434 if (qp->urq.umem) 1435 ib_umem_release(qp->urq.umem); 1436 qp->urq.umem = NULL; 1437 } 1438 1439 static int qedr_create_user_qp(struct qedr_dev *dev, 1440 struct qedr_qp *qp, 1441 struct ib_pd *ibpd, 1442 struct ib_udata *udata, 1443 struct ib_qp_init_attr *attrs) 1444 { 1445 struct qed_rdma_create_qp_in_params in_params; 1446 struct qed_rdma_create_qp_out_params out_params; 1447 struct qedr_pd *pd = get_qedr_pd(ibpd); 1448 struct ib_ucontext *ib_ctx = NULL; 1449 struct qedr_create_qp_ureq ureq; 1450 int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); 1451 int rc = -EINVAL; 1452 1453 ib_ctx = ibpd->uobject->context; 1454 1455 memset(&ureq, 0, sizeof(ureq)); 1456 rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); 1457 if (rc) { 1458 DP_ERR(dev, "Problem copying data from user space\n"); 1459 return rc; 1460 } 1461 1462 /* SQ - read access only (0), dma sync not required (0) */ 1463 rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr, 1464 ureq.sq_len, 0, 0, alloc_and_init); 1465 if (rc) 1466 return rc; 1467 1468 /* RQ - read access only (0), dma sync not required (0) */ 1469 rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr, 1470 ureq.rq_len, 0, 0, alloc_and_init); 1471 if (rc) 1472 return rc; 1473 1474 memset(&in_params, 0, sizeof(in_params)); 1475 qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params); 1476 in_params.qp_handle_lo = ureq.qp_handle_lo; 1477 in_params.qp_handle_hi = ureq.qp_handle_hi; 1478 in_params.sq_num_pages = qp->usq.pbl_info.num_pbes; 1479 in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa; 1480 in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; 1481 in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; 1482 1483 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, 1484 &in_params, &out_params); 1485 1486 if (!qp->qed_qp) { 1487 rc = -ENOMEM; 1488 goto err1; 1489 } 1490 1491 if (rdma_protocol_iwarp(&dev->ibdev, 1)) 1492 qedr_iwarp_populate_user_qp(dev, qp, &out_params); 1493 1494 qp->qp_id = out_params.qp_id; 1495 qp->icid = out_params.icid; 1496 1497 rc = qedr_copy_qp_uresp(dev, qp, udata); 1498 if (rc) 1499 goto err; 1500 1501 qedr_qp_user_print(dev, qp); 1502 1503 return 0; 1504 err: 1505 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); 1506 if (rc) 1507 DP_ERR(dev, "create qp: fatal fault. rc=%d", rc); 1508 1509 err1: 1510 qedr_cleanup_user(dev, qp); 1511 return rc; 1512 } 1513 1514 static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) 1515 { 1516 qp->sq.db = dev->db_addr + 1517 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); 1518 qp->sq.db_data.data.icid = qp->icid; 1519 1520 qp->rq.db = dev->db_addr + 1521 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); 1522 qp->rq.db_data.data.icid = qp->icid; 1523 qp->rq.iwarp_db2 = dev->db_addr + 1524 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); 1525 qp->rq.iwarp_db2_data.data.icid = qp->icid; 1526 qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD; 1527 } 1528 1529 static int 1530 qedr_roce_create_kernel_qp(struct qedr_dev *dev, 1531 struct qedr_qp *qp, 1532 struct qed_rdma_create_qp_in_params *in_params, 1533 u32 n_sq_elems, u32 n_rq_elems) 1534 { 1535 struct qed_rdma_create_qp_out_params out_params; 1536 int rc; 1537 1538 rc = dev->ops->common->chain_alloc(dev->cdev, 1539 QED_CHAIN_USE_TO_PRODUCE, 1540 QED_CHAIN_MODE_PBL, 1541 QED_CHAIN_CNT_TYPE_U32, 1542 n_sq_elems, 1543 QEDR_SQE_ELEMENT_SIZE, 1544 &qp->sq.pbl, NULL); 1545 1546 if (rc) 1547 return rc; 1548 1549 in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl); 1550 in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl); 1551 1552 rc = dev->ops->common->chain_alloc(dev->cdev, 1553 QED_CHAIN_USE_TO_CONSUME_PRODUCE, 1554 QED_CHAIN_MODE_PBL, 1555 QED_CHAIN_CNT_TYPE_U32, 1556 n_rq_elems, 1557 QEDR_RQE_ELEMENT_SIZE, 1558 &qp->rq.pbl, NULL); 1559 if (rc) 1560 return rc; 1561 1562 in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl); 1563 in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl); 1564 1565 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, 1566 in_params, &out_params); 1567 1568 if (!qp->qed_qp) 1569 return -EINVAL; 1570 1571 qp->qp_id = out_params.qp_id; 1572 qp->icid = out_params.icid; 1573 1574 qedr_set_roce_db_info(dev, qp); 1575 return rc; 1576 } 1577 1578 static int 1579 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev, 1580 struct qedr_qp *qp, 1581 struct qed_rdma_create_qp_in_params *in_params, 1582 u32 n_sq_elems, u32 n_rq_elems) 1583 { 1584 struct qed_rdma_create_qp_out_params out_params; 1585 struct qed_chain_ext_pbl ext_pbl; 1586 int rc; 1587 1588 in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems, 1589 QEDR_SQE_ELEMENT_SIZE, 1590 QED_CHAIN_MODE_PBL); 1591 in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems, 1592 QEDR_RQE_ELEMENT_SIZE, 1593 QED_CHAIN_MODE_PBL); 1594 1595 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, 1596 in_params, &out_params); 1597 1598 if (!qp->qed_qp) 1599 return -EINVAL; 1600 1601 /* Now we allocate the chain */ 1602 ext_pbl.p_pbl_virt = out_params.sq_pbl_virt; 1603 ext_pbl.p_pbl_phys = out_params.sq_pbl_phys; 1604 1605 rc = dev->ops->common->chain_alloc(dev->cdev, 1606 QED_CHAIN_USE_TO_PRODUCE, 1607 QED_CHAIN_MODE_PBL, 1608 QED_CHAIN_CNT_TYPE_U32, 1609 n_sq_elems, 1610 QEDR_SQE_ELEMENT_SIZE, 1611 &qp->sq.pbl, &ext_pbl); 1612 1613 if (rc) 1614 goto err; 1615 1616 ext_pbl.p_pbl_virt = out_params.rq_pbl_virt; 1617 ext_pbl.p_pbl_phys = out_params.rq_pbl_phys; 1618 1619 rc = dev->ops->common->chain_alloc(dev->cdev, 1620 QED_CHAIN_USE_TO_CONSUME_PRODUCE, 1621 QED_CHAIN_MODE_PBL, 1622 QED_CHAIN_CNT_TYPE_U32, 1623 n_rq_elems, 1624 QEDR_RQE_ELEMENT_SIZE, 1625 &qp->rq.pbl, &ext_pbl); 1626 1627 if (rc) 1628 goto err; 1629 1630 qp->qp_id = out_params.qp_id; 1631 qp->icid = out_params.icid; 1632 1633 qedr_set_iwarp_db_info(dev, qp); 1634 return rc; 1635 1636 err: 1637 dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); 1638 1639 return rc; 1640 } 1641 1642 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp) 1643 { 1644 dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl); 1645 kfree(qp->wqe_wr_id); 1646 1647 dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl); 1648 kfree(qp->rqe_wr_id); 1649 } 1650 1651 static int qedr_create_kernel_qp(struct qedr_dev *dev, 1652 struct qedr_qp *qp, 1653 struct ib_pd *ibpd, 1654 struct ib_qp_init_attr *attrs) 1655 { 1656 struct qed_rdma_create_qp_in_params in_params; 1657 struct qedr_pd *pd = get_qedr_pd(ibpd); 1658 int rc = -EINVAL; 1659 u32 n_rq_elems; 1660 u32 n_sq_elems; 1661 u32 n_sq_entries; 1662 1663 memset(&in_params, 0, sizeof(in_params)); 1664 1665 /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in 1666 * the ring. The ring should allow at least a single WR, even if the 1667 * user requested none, due to allocation issues. 1668 * We should add an extra WR since the prod and cons indices of 1669 * wqe_wr_id are managed in such a way that the WQ is considered full 1670 * when (prod+1)%max_wr==cons. We currently don't do that because we 1671 * double the number of entries due an iSER issue that pushes far more 1672 * WRs than indicated. If we decline its ib_post_send() then we get 1673 * error prints in the dmesg we'd like to avoid. 1674 */ 1675 qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier, 1676 dev->attr.max_sqe); 1677 1678 qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id), 1679 GFP_KERNEL); 1680 if (!qp->wqe_wr_id) { 1681 DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n"); 1682 return -ENOMEM; 1683 } 1684 1685 /* QP handle to be written in CQE */ 1686 in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp); 1687 in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp); 1688 1689 /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in 1690 * the ring. There ring should allow at least a single WR, even if the 1691 * user requested none, due to allocation issues. 1692 */ 1693 qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1); 1694 1695 /* Allocate driver internal RQ array */ 1696 qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id), 1697 GFP_KERNEL); 1698 if (!qp->rqe_wr_id) { 1699 DP_ERR(dev, 1700 "create qp: failed RQ shadow memory allocation\n"); 1701 kfree(qp->wqe_wr_id); 1702 return -ENOMEM; 1703 } 1704 1705 qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params); 1706 1707 n_sq_entries = attrs->cap.max_send_wr; 1708 n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe); 1709 n_sq_entries = max_t(u32, n_sq_entries, 1); 1710 n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE; 1711 1712 n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE; 1713 1714 if (rdma_protocol_iwarp(&dev->ibdev, 1)) 1715 rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params, 1716 n_sq_elems, n_rq_elems); 1717 else 1718 rc = qedr_roce_create_kernel_qp(dev, qp, &in_params, 1719 n_sq_elems, n_rq_elems); 1720 if (rc) 1721 qedr_cleanup_kernel(dev, qp); 1722 1723 return rc; 1724 } 1725 1726 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, 1727 struct ib_qp_init_attr *attrs, 1728 struct ib_udata *udata) 1729 { 1730 struct qedr_dev *dev = get_qedr_dev(ibpd->device); 1731 struct qedr_pd *pd = get_qedr_pd(ibpd); 1732 struct qedr_qp *qp; 1733 struct ib_qp *ibqp; 1734 int rc = 0; 1735 1736 DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n", 1737 udata ? "user library" : "kernel", pd); 1738 1739 rc = qedr_check_qp_attrs(ibpd, dev, attrs); 1740 if (rc) 1741 return ERR_PTR(rc); 1742 1743 if (attrs->srq) 1744 return ERR_PTR(-EINVAL); 1745 1746 DP_DEBUG(dev, QEDR_MSG_QP, 1747 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n", 1748 udata ? "user library" : "kernel", attrs->event_handler, pd, 1749 get_qedr_cq(attrs->send_cq), 1750 get_qedr_cq(attrs->send_cq)->icid, 1751 get_qedr_cq(attrs->recv_cq), 1752 get_qedr_cq(attrs->recv_cq)->icid); 1753 1754 qp = kzalloc(sizeof(*qp), GFP_KERNEL); 1755 if (!qp) { 1756 DP_ERR(dev, "create qp: failed allocating memory\n"); 1757 return ERR_PTR(-ENOMEM); 1758 } 1759 1760 qedr_set_common_qp_params(dev, qp, pd, attrs); 1761 1762 if (attrs->qp_type == IB_QPT_GSI) { 1763 ibqp = qedr_create_gsi_qp(dev, attrs, qp); 1764 if (IS_ERR(ibqp)) 1765 kfree(qp); 1766 return ibqp; 1767 } 1768 1769 if (udata) 1770 rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs); 1771 else 1772 rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs); 1773 1774 if (rc) 1775 goto err; 1776 1777 qp->ibqp.qp_num = qp->qp_id; 1778 1779 rc = qedr_idr_add(dev, qp, qp->qp_id); 1780 if (rc) 1781 goto err; 1782 1783 return &qp->ibqp; 1784 1785 err: 1786 kfree(qp); 1787 1788 return ERR_PTR(-EFAULT); 1789 } 1790 1791 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state) 1792 { 1793 switch (qp_state) { 1794 case QED_ROCE_QP_STATE_RESET: 1795 return IB_QPS_RESET; 1796 case QED_ROCE_QP_STATE_INIT: 1797 return IB_QPS_INIT; 1798 case QED_ROCE_QP_STATE_RTR: 1799 return IB_QPS_RTR; 1800 case QED_ROCE_QP_STATE_RTS: 1801 return IB_QPS_RTS; 1802 case QED_ROCE_QP_STATE_SQD: 1803 return IB_QPS_SQD; 1804 case QED_ROCE_QP_STATE_ERR: 1805 return IB_QPS_ERR; 1806 case QED_ROCE_QP_STATE_SQE: 1807 return IB_QPS_SQE; 1808 } 1809 return IB_QPS_ERR; 1810 } 1811 1812 static enum qed_roce_qp_state qedr_get_state_from_ibqp( 1813 enum ib_qp_state qp_state) 1814 { 1815 switch (qp_state) { 1816 case IB_QPS_RESET: 1817 return QED_ROCE_QP_STATE_RESET; 1818 case IB_QPS_INIT: 1819 return QED_ROCE_QP_STATE_INIT; 1820 case IB_QPS_RTR: 1821 return QED_ROCE_QP_STATE_RTR; 1822 case IB_QPS_RTS: 1823 return QED_ROCE_QP_STATE_RTS; 1824 case IB_QPS_SQD: 1825 return QED_ROCE_QP_STATE_SQD; 1826 case IB_QPS_ERR: 1827 return QED_ROCE_QP_STATE_ERR; 1828 default: 1829 return QED_ROCE_QP_STATE_ERR; 1830 } 1831 } 1832 1833 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph) 1834 { 1835 qed_chain_reset(&qph->pbl); 1836 qph->prod = 0; 1837 qph->cons = 0; 1838 qph->wqe_cons = 0; 1839 qph->db_data.data.value = cpu_to_le16(0); 1840 } 1841 1842 static int qedr_update_qp_state(struct qedr_dev *dev, 1843 struct qedr_qp *qp, 1844 enum qed_roce_qp_state cur_state, 1845 enum qed_roce_qp_state new_state) 1846 { 1847 int status = 0; 1848 1849 if (new_state == cur_state) 1850 return 0; 1851 1852 switch (cur_state) { 1853 case QED_ROCE_QP_STATE_RESET: 1854 switch (new_state) { 1855 case QED_ROCE_QP_STATE_INIT: 1856 qp->prev_wqe_size = 0; 1857 qedr_reset_qp_hwq_info(&qp->sq); 1858 qedr_reset_qp_hwq_info(&qp->rq); 1859 break; 1860 default: 1861 status = -EINVAL; 1862 break; 1863 }; 1864 break; 1865 case QED_ROCE_QP_STATE_INIT: 1866 switch (new_state) { 1867 case QED_ROCE_QP_STATE_RTR: 1868 /* Update doorbell (in case post_recv was 1869 * done before move to RTR) 1870 */ 1871 1872 if (rdma_protocol_roce(&dev->ibdev, 1)) { 1873 wmb(); 1874 writel(qp->rq.db_data.raw, qp->rq.db); 1875 /* Make sure write takes effect */ 1876 mmiowb(); 1877 } 1878 break; 1879 case QED_ROCE_QP_STATE_ERR: 1880 break; 1881 default: 1882 /* Invalid state change. */ 1883 status = -EINVAL; 1884 break; 1885 }; 1886 break; 1887 case QED_ROCE_QP_STATE_RTR: 1888 /* RTR->XXX */ 1889 switch (new_state) { 1890 case QED_ROCE_QP_STATE_RTS: 1891 break; 1892 case QED_ROCE_QP_STATE_ERR: 1893 break; 1894 default: 1895 /* Invalid state change. */ 1896 status = -EINVAL; 1897 break; 1898 }; 1899 break; 1900 case QED_ROCE_QP_STATE_RTS: 1901 /* RTS->XXX */ 1902 switch (new_state) { 1903 case QED_ROCE_QP_STATE_SQD: 1904 break; 1905 case QED_ROCE_QP_STATE_ERR: 1906 break; 1907 default: 1908 /* Invalid state change. */ 1909 status = -EINVAL; 1910 break; 1911 }; 1912 break; 1913 case QED_ROCE_QP_STATE_SQD: 1914 /* SQD->XXX */ 1915 switch (new_state) { 1916 case QED_ROCE_QP_STATE_RTS: 1917 case QED_ROCE_QP_STATE_ERR: 1918 break; 1919 default: 1920 /* Invalid state change. */ 1921 status = -EINVAL; 1922 break; 1923 }; 1924 break; 1925 case QED_ROCE_QP_STATE_ERR: 1926 /* ERR->XXX */ 1927 switch (new_state) { 1928 case QED_ROCE_QP_STATE_RESET: 1929 if ((qp->rq.prod != qp->rq.cons) || 1930 (qp->sq.prod != qp->sq.cons)) { 1931 DP_NOTICE(dev, 1932 "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n", 1933 qp->rq.prod, qp->rq.cons, qp->sq.prod, 1934 qp->sq.cons); 1935 status = -EINVAL; 1936 } 1937 break; 1938 default: 1939 status = -EINVAL; 1940 break; 1941 }; 1942 break; 1943 default: 1944 status = -EINVAL; 1945 break; 1946 }; 1947 1948 return status; 1949 } 1950 1951 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 1952 int attr_mask, struct ib_udata *udata) 1953 { 1954 struct qedr_qp *qp = get_qedr_qp(ibqp); 1955 struct qed_rdma_modify_qp_in_params qp_params = { 0 }; 1956 struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev); 1957 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); 1958 enum ib_qp_state old_qp_state, new_qp_state; 1959 enum qed_roce_qp_state cur_state; 1960 int rc = 0; 1961 1962 DP_DEBUG(dev, QEDR_MSG_QP, 1963 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask, 1964 attr->qp_state); 1965 1966 old_qp_state = qedr_get_ibqp_state(qp->state); 1967 if (attr_mask & IB_QP_STATE) 1968 new_qp_state = attr->qp_state; 1969 else 1970 new_qp_state = old_qp_state; 1971 1972 if (rdma_protocol_roce(&dev->ibdev, 1)) { 1973 if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state, 1974 ibqp->qp_type, attr_mask, 1975 IB_LINK_LAYER_ETHERNET)) { 1976 DP_ERR(dev, 1977 "modify qp: invalid attribute mask=0x%x specified for\n" 1978 "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n", 1979 attr_mask, qp->qp_id, ibqp->qp_type, 1980 old_qp_state, new_qp_state); 1981 rc = -EINVAL; 1982 goto err; 1983 } 1984 } 1985 1986 /* Translate the masks... */ 1987 if (attr_mask & IB_QP_STATE) { 1988 SET_FIELD(qp_params.modify_flags, 1989 QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1); 1990 qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state); 1991 } 1992 1993 if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) 1994 qp_params.sqd_async = true; 1995 1996 if (attr_mask & IB_QP_PKEY_INDEX) { 1997 SET_FIELD(qp_params.modify_flags, 1998 QED_ROCE_MODIFY_QP_VALID_PKEY, 1); 1999 if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) { 2000 rc = -EINVAL; 2001 goto err; 2002 } 2003 2004 qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT; 2005 } 2006 2007 if (attr_mask & IB_QP_QKEY) 2008 qp->qkey = attr->qkey; 2009 2010 if (attr_mask & IB_QP_ACCESS_FLAGS) { 2011 SET_FIELD(qp_params.modify_flags, 2012 QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1); 2013 qp_params.incoming_rdma_read_en = attr->qp_access_flags & 2014 IB_ACCESS_REMOTE_READ; 2015 qp_params.incoming_rdma_write_en = attr->qp_access_flags & 2016 IB_ACCESS_REMOTE_WRITE; 2017 qp_params.incoming_atomic_en = attr->qp_access_flags & 2018 IB_ACCESS_REMOTE_ATOMIC; 2019 } 2020 2021 if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) { 2022 if (attr_mask & IB_QP_PATH_MTU) { 2023 if (attr->path_mtu < IB_MTU_256 || 2024 attr->path_mtu > IB_MTU_4096) { 2025 pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n"); 2026 rc = -EINVAL; 2027 goto err; 2028 } 2029 qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu), 2030 ib_mtu_enum_to_int(iboe_get_mtu 2031 (dev->ndev->mtu))); 2032 } 2033 2034 if (!qp->mtu) { 2035 qp->mtu = 2036 ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu)); 2037 pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu); 2038 } 2039 2040 SET_FIELD(qp_params.modify_flags, 2041 QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1); 2042 2043 qp_params.traffic_class_tos = grh->traffic_class; 2044 qp_params.flow_label = grh->flow_label; 2045 qp_params.hop_limit_ttl = grh->hop_limit; 2046 2047 qp->sgid_idx = grh->sgid_index; 2048 2049 rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params); 2050 if (rc) { 2051 DP_ERR(dev, 2052 "modify qp: problems with GID index %d (rc=%d)\n", 2053 grh->sgid_index, rc); 2054 return rc; 2055 } 2056 2057 rc = qedr_get_dmac(dev, &attr->ah_attr, 2058 qp_params.remote_mac_addr); 2059 if (rc) 2060 return rc; 2061 2062 qp_params.use_local_mac = true; 2063 ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr); 2064 2065 DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n", 2066 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1], 2067 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]); 2068 DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n", 2069 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1], 2070 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]); 2071 DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n", 2072 qp_params.remote_mac_addr); 2073 2074 qp_params.mtu = qp->mtu; 2075 qp_params.lb_indication = false; 2076 } 2077 2078 if (!qp_params.mtu) { 2079 /* Stay with current MTU */ 2080 if (qp->mtu) 2081 qp_params.mtu = qp->mtu; 2082 else 2083 qp_params.mtu = 2084 ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu)); 2085 } 2086 2087 if (attr_mask & IB_QP_TIMEOUT) { 2088 SET_FIELD(qp_params.modify_flags, 2089 QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1); 2090 2091 /* The received timeout value is an exponent used like this: 2092 * "12.7.34 LOCAL ACK TIMEOUT 2093 * Value representing the transport (ACK) timeout for use by 2094 * the remote, expressed as: 4.096 * 2^timeout [usec]" 2095 * The FW expects timeout in msec so we need to divide the usec 2096 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2, 2097 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8). 2098 * The value of zero means infinite so we use a 'max_t' to make 2099 * sure that sub 1 msec values will be configured as 1 msec. 2100 */ 2101 if (attr->timeout) 2102 qp_params.ack_timeout = 2103 1 << max_t(int, attr->timeout - 8, 0); 2104 else 2105 qp_params.ack_timeout = 0; 2106 } 2107 2108 if (attr_mask & IB_QP_RETRY_CNT) { 2109 SET_FIELD(qp_params.modify_flags, 2110 QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1); 2111 qp_params.retry_cnt = attr->retry_cnt; 2112 } 2113 2114 if (attr_mask & IB_QP_RNR_RETRY) { 2115 SET_FIELD(qp_params.modify_flags, 2116 QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1); 2117 qp_params.rnr_retry_cnt = attr->rnr_retry; 2118 } 2119 2120 if (attr_mask & IB_QP_RQ_PSN) { 2121 SET_FIELD(qp_params.modify_flags, 2122 QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1); 2123 qp_params.rq_psn = attr->rq_psn; 2124 qp->rq_psn = attr->rq_psn; 2125 } 2126 2127 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { 2128 if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) { 2129 rc = -EINVAL; 2130 DP_ERR(dev, 2131 "unsupported max_rd_atomic=%d, supported=%d\n", 2132 attr->max_rd_atomic, 2133 dev->attr.max_qp_req_rd_atomic_resc); 2134 goto err; 2135 } 2136 2137 SET_FIELD(qp_params.modify_flags, 2138 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1); 2139 qp_params.max_rd_atomic_req = attr->max_rd_atomic; 2140 } 2141 2142 if (attr_mask & IB_QP_MIN_RNR_TIMER) { 2143 SET_FIELD(qp_params.modify_flags, 2144 QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1); 2145 qp_params.min_rnr_nak_timer = attr->min_rnr_timer; 2146 } 2147 2148 if (attr_mask & IB_QP_SQ_PSN) { 2149 SET_FIELD(qp_params.modify_flags, 2150 QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1); 2151 qp_params.sq_psn = attr->sq_psn; 2152 qp->sq_psn = attr->sq_psn; 2153 } 2154 2155 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { 2156 if (attr->max_dest_rd_atomic > 2157 dev->attr.max_qp_resp_rd_atomic_resc) { 2158 DP_ERR(dev, 2159 "unsupported max_dest_rd_atomic=%d, supported=%d\n", 2160 attr->max_dest_rd_atomic, 2161 dev->attr.max_qp_resp_rd_atomic_resc); 2162 2163 rc = -EINVAL; 2164 goto err; 2165 } 2166 2167 SET_FIELD(qp_params.modify_flags, 2168 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1); 2169 qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic; 2170 } 2171 2172 if (attr_mask & IB_QP_DEST_QPN) { 2173 SET_FIELD(qp_params.modify_flags, 2174 QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1); 2175 2176 qp_params.dest_qp = attr->dest_qp_num; 2177 qp->dest_qp_num = attr->dest_qp_num; 2178 } 2179 2180 cur_state = qp->state; 2181 2182 /* Update the QP state before the actual ramrod to prevent a race with 2183 * fast path. Modifying the QP state to error will cause the device to 2184 * flush the CQEs and while polling the flushed CQEs will considered as 2185 * a potential issue if the QP isn't in error state. 2186 */ 2187 if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI && 2188 !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR) 2189 qp->state = QED_ROCE_QP_STATE_ERR; 2190 2191 if (qp->qp_type != IB_QPT_GSI) 2192 rc = dev->ops->rdma_modify_qp(dev->rdma_ctx, 2193 qp->qed_qp, &qp_params); 2194 2195 if (attr_mask & IB_QP_STATE) { 2196 if ((qp->qp_type != IB_QPT_GSI) && (!udata)) 2197 rc = qedr_update_qp_state(dev, qp, cur_state, 2198 qp_params.new_state); 2199 qp->state = qp_params.new_state; 2200 } 2201 2202 err: 2203 return rc; 2204 } 2205 2206 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params) 2207 { 2208 int ib_qp_acc_flags = 0; 2209 2210 if (params->incoming_rdma_write_en) 2211 ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE; 2212 if (params->incoming_rdma_read_en) 2213 ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ; 2214 if (params->incoming_atomic_en) 2215 ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC; 2216 ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE; 2217 return ib_qp_acc_flags; 2218 } 2219 2220 int qedr_query_qp(struct ib_qp *ibqp, 2221 struct ib_qp_attr *qp_attr, 2222 int attr_mask, struct ib_qp_init_attr *qp_init_attr) 2223 { 2224 struct qed_rdma_query_qp_out_params params; 2225 struct qedr_qp *qp = get_qedr_qp(ibqp); 2226 struct qedr_dev *dev = qp->dev; 2227 int rc = 0; 2228 2229 memset(¶ms, 0, sizeof(params)); 2230 2231 rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, ¶ms); 2232 if (rc) 2233 goto err; 2234 2235 memset(qp_attr, 0, sizeof(*qp_attr)); 2236 memset(qp_init_attr, 0, sizeof(*qp_init_attr)); 2237 2238 qp_attr->qp_state = qedr_get_ibqp_state(params.state); 2239 qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state); 2240 qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu); 2241 qp_attr->path_mig_state = IB_MIG_MIGRATED; 2242 qp_attr->rq_psn = params.rq_psn; 2243 qp_attr->sq_psn = params.sq_psn; 2244 qp_attr->dest_qp_num = params.dest_qp; 2245 2246 qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(¶ms); 2247 2248 qp_attr->cap.max_send_wr = qp->sq.max_wr; 2249 qp_attr->cap.max_recv_wr = qp->rq.max_wr; 2250 qp_attr->cap.max_send_sge = qp->sq.max_sges; 2251 qp_attr->cap.max_recv_sge = qp->rq.max_sges; 2252 qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE; 2253 qp_init_attr->cap = qp_attr->cap; 2254 2255 qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; 2256 rdma_ah_set_grh(&qp_attr->ah_attr, NULL, 2257 params.flow_label, qp->sgid_idx, 2258 params.hop_limit_ttl, params.traffic_class_tos); 2259 rdma_ah_set_dgid_raw(&qp_attr->ah_attr, ¶ms.dgid.bytes[0]); 2260 rdma_ah_set_port_num(&qp_attr->ah_attr, 1); 2261 rdma_ah_set_sl(&qp_attr->ah_attr, 0); 2262 qp_attr->timeout = params.timeout; 2263 qp_attr->rnr_retry = params.rnr_retry; 2264 qp_attr->retry_cnt = params.retry_cnt; 2265 qp_attr->min_rnr_timer = params.min_rnr_nak_timer; 2266 qp_attr->pkey_index = params.pkey_index; 2267 qp_attr->port_num = 1; 2268 rdma_ah_set_path_bits(&qp_attr->ah_attr, 0); 2269 rdma_ah_set_static_rate(&qp_attr->ah_attr, 0); 2270 qp_attr->alt_pkey_index = 0; 2271 qp_attr->alt_port_num = 0; 2272 qp_attr->alt_timeout = 0; 2273 memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr)); 2274 2275 qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0; 2276 qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic; 2277 qp_attr->max_rd_atomic = params.max_rd_atomic; 2278 qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0; 2279 2280 DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n", 2281 qp_attr->cap.max_inline_data); 2282 2283 err: 2284 return rc; 2285 } 2286 2287 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp) 2288 { 2289 int rc = 0; 2290 2291 if (qp->qp_type != IB_QPT_GSI) { 2292 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); 2293 if (rc) 2294 return rc; 2295 } 2296 2297 if (qp->ibqp.uobject && qp->ibqp.uobject->context) 2298 qedr_cleanup_user(dev, qp); 2299 else 2300 qedr_cleanup_kernel(dev, qp); 2301 2302 return 0; 2303 } 2304 2305 int qedr_destroy_qp(struct ib_qp *ibqp) 2306 { 2307 struct qedr_qp *qp = get_qedr_qp(ibqp); 2308 struct qedr_dev *dev = qp->dev; 2309 struct ib_qp_attr attr; 2310 int attr_mask = 0; 2311 int rc = 0; 2312 2313 DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n", 2314 qp, qp->qp_type); 2315 2316 if (rdma_protocol_roce(&dev->ibdev, 1)) { 2317 if ((qp->state != QED_ROCE_QP_STATE_RESET) && 2318 (qp->state != QED_ROCE_QP_STATE_ERR) && 2319 (qp->state != QED_ROCE_QP_STATE_INIT)) { 2320 2321 attr.qp_state = IB_QPS_ERR; 2322 attr_mask |= IB_QP_STATE; 2323 2324 /* Change the QP state to ERROR */ 2325 qedr_modify_qp(ibqp, &attr, attr_mask, NULL); 2326 } 2327 } else { 2328 /* Wait for the connect/accept to complete */ 2329 if (qp->ep) { 2330 int wait_count = 1; 2331 2332 while (qp->ep->during_connect) { 2333 DP_DEBUG(dev, QEDR_MSG_QP, 2334 "Still in during connect/accept\n"); 2335 2336 msleep(100); 2337 if (wait_count++ > 200) { 2338 DP_NOTICE(dev, 2339 "during connect timeout\n"); 2340 break; 2341 } 2342 } 2343 } 2344 } 2345 2346 if (qp->qp_type == IB_QPT_GSI) 2347 qedr_destroy_gsi_qp(dev); 2348 2349 qedr_free_qp_resources(dev, qp); 2350 2351 if (atomic_dec_and_test(&qp->refcnt)) { 2352 qedr_idr_remove(dev, qp->qp_id); 2353 kfree(qp); 2354 } 2355 return rc; 2356 } 2357 2358 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, 2359 struct ib_udata *udata) 2360 { 2361 struct qedr_ah *ah; 2362 2363 ah = kzalloc(sizeof(*ah), GFP_ATOMIC); 2364 if (!ah) 2365 return ERR_PTR(-ENOMEM); 2366 2367 ah->attr = *attr; 2368 2369 return &ah->ibah; 2370 } 2371 2372 int qedr_destroy_ah(struct ib_ah *ibah) 2373 { 2374 struct qedr_ah *ah = get_qedr_ah(ibah); 2375 2376 kfree(ah); 2377 return 0; 2378 } 2379 2380 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info) 2381 { 2382 struct qedr_pbl *pbl, *tmp; 2383 2384 if (info->pbl_table) 2385 list_add_tail(&info->pbl_table->list_entry, 2386 &info->free_pbl_list); 2387 2388 if (!list_empty(&info->inuse_pbl_list)) 2389 list_splice(&info->inuse_pbl_list, &info->free_pbl_list); 2390 2391 list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) { 2392 list_del(&pbl->list_entry); 2393 qedr_free_pbl(dev, &info->pbl_info, pbl); 2394 } 2395 } 2396 2397 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info, 2398 size_t page_list_len, bool two_layered) 2399 { 2400 struct qedr_pbl *tmp; 2401 int rc; 2402 2403 INIT_LIST_HEAD(&info->free_pbl_list); 2404 INIT_LIST_HEAD(&info->inuse_pbl_list); 2405 2406 rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info, 2407 page_list_len, two_layered); 2408 if (rc) 2409 goto done; 2410 2411 info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL); 2412 if (IS_ERR(info->pbl_table)) { 2413 rc = PTR_ERR(info->pbl_table); 2414 goto done; 2415 } 2416 2417 DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n", 2418 &info->pbl_table->pa); 2419 2420 /* in usual case we use 2 PBLs, so we add one to free 2421 * list and allocating another one 2422 */ 2423 tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL); 2424 if (IS_ERR(tmp)) { 2425 DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n"); 2426 goto done; 2427 } 2428 2429 list_add_tail(&tmp->list_entry, &info->free_pbl_list); 2430 2431 DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa); 2432 2433 done: 2434 if (rc) 2435 free_mr_info(dev, info); 2436 2437 return rc; 2438 } 2439 2440 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, 2441 u64 usr_addr, int acc, struct ib_udata *udata) 2442 { 2443 struct qedr_dev *dev = get_qedr_dev(ibpd->device); 2444 struct qedr_mr *mr; 2445 struct qedr_pd *pd; 2446 int rc = -ENOMEM; 2447 2448 pd = get_qedr_pd(ibpd); 2449 DP_DEBUG(dev, QEDR_MSG_MR, 2450 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n", 2451 pd->pd_id, start, len, usr_addr, acc); 2452 2453 if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) 2454 return ERR_PTR(-EINVAL); 2455 2456 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 2457 if (!mr) 2458 return ERR_PTR(rc); 2459 2460 mr->type = QEDR_MR_USER; 2461 2462 mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0); 2463 if (IS_ERR(mr->umem)) { 2464 rc = -EFAULT; 2465 goto err0; 2466 } 2467 2468 rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1); 2469 if (rc) 2470 goto err1; 2471 2472 qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table, 2473 &mr->info.pbl_info, mr->umem->page_shift); 2474 2475 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); 2476 if (rc) { 2477 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc); 2478 goto err1; 2479 } 2480 2481 /* Index only, 18 bit long, lkey = itid << 8 | key */ 2482 mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR; 2483 mr->hw_mr.key = 0; 2484 mr->hw_mr.pd = pd->pd_id; 2485 mr->hw_mr.local_read = 1; 2486 mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; 2487 mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; 2488 mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; 2489 mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; 2490 mr->hw_mr.mw_bind = false; 2491 mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa; 2492 mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; 2493 mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); 2494 mr->hw_mr.page_size_log = mr->umem->page_shift; 2495 mr->hw_mr.fbo = ib_umem_offset(mr->umem); 2496 mr->hw_mr.length = len; 2497 mr->hw_mr.vaddr = usr_addr; 2498 mr->hw_mr.zbva = false; 2499 mr->hw_mr.phy_mr = false; 2500 mr->hw_mr.dma_mr = false; 2501 2502 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); 2503 if (rc) { 2504 DP_ERR(dev, "roce register tid returned an error %d\n", rc); 2505 goto err2; 2506 } 2507 2508 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; 2509 if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || 2510 mr->hw_mr.remote_atomic) 2511 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; 2512 2513 DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n", 2514 mr->ibmr.lkey); 2515 return &mr->ibmr; 2516 2517 err2: 2518 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); 2519 err1: 2520 qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); 2521 err0: 2522 kfree(mr); 2523 return ERR_PTR(rc); 2524 } 2525 2526 int qedr_dereg_mr(struct ib_mr *ib_mr) 2527 { 2528 struct qedr_mr *mr = get_qedr_mr(ib_mr); 2529 struct qedr_dev *dev = get_qedr_dev(ib_mr->device); 2530 int rc = 0; 2531 2532 rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid); 2533 if (rc) 2534 return rc; 2535 2536 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); 2537 2538 if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR)) 2539 qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); 2540 2541 /* it could be user registered memory. */ 2542 if (mr->umem) 2543 ib_umem_release(mr->umem); 2544 2545 kfree(mr); 2546 2547 return rc; 2548 } 2549 2550 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, 2551 int max_page_list_len) 2552 { 2553 struct qedr_pd *pd = get_qedr_pd(ibpd); 2554 struct qedr_dev *dev = get_qedr_dev(ibpd->device); 2555 struct qedr_mr *mr; 2556 int rc = -ENOMEM; 2557 2558 DP_DEBUG(dev, QEDR_MSG_MR, 2559 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id, 2560 max_page_list_len); 2561 2562 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 2563 if (!mr) 2564 return ERR_PTR(rc); 2565 2566 mr->dev = dev; 2567 mr->type = QEDR_MR_FRMR; 2568 2569 rc = init_mr_info(dev, &mr->info, max_page_list_len, 1); 2570 if (rc) 2571 goto err0; 2572 2573 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); 2574 if (rc) { 2575 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc); 2576 goto err0; 2577 } 2578 2579 /* Index only, 18 bit long, lkey = itid << 8 | key */ 2580 mr->hw_mr.tid_type = QED_RDMA_TID_FMR; 2581 mr->hw_mr.key = 0; 2582 mr->hw_mr.pd = pd->pd_id; 2583 mr->hw_mr.local_read = 1; 2584 mr->hw_mr.local_write = 0; 2585 mr->hw_mr.remote_read = 0; 2586 mr->hw_mr.remote_write = 0; 2587 mr->hw_mr.remote_atomic = 0; 2588 mr->hw_mr.mw_bind = false; 2589 mr->hw_mr.pbl_ptr = 0; 2590 mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; 2591 mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); 2592 mr->hw_mr.fbo = 0; 2593 mr->hw_mr.length = 0; 2594 mr->hw_mr.vaddr = 0; 2595 mr->hw_mr.zbva = false; 2596 mr->hw_mr.phy_mr = true; 2597 mr->hw_mr.dma_mr = false; 2598 2599 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); 2600 if (rc) { 2601 DP_ERR(dev, "roce register tid returned an error %d\n", rc); 2602 goto err1; 2603 } 2604 2605 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; 2606 mr->ibmr.rkey = mr->ibmr.lkey; 2607 2608 DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey); 2609 return mr; 2610 2611 err1: 2612 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); 2613 err0: 2614 kfree(mr); 2615 return ERR_PTR(rc); 2616 } 2617 2618 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, 2619 enum ib_mr_type mr_type, u32 max_num_sg) 2620 { 2621 struct qedr_mr *mr; 2622 2623 if (mr_type != IB_MR_TYPE_MEM_REG) 2624 return ERR_PTR(-EINVAL); 2625 2626 mr = __qedr_alloc_mr(ibpd, max_num_sg); 2627 2628 if (IS_ERR(mr)) 2629 return ERR_PTR(-EINVAL); 2630 2631 return &mr->ibmr; 2632 } 2633 2634 static int qedr_set_page(struct ib_mr *ibmr, u64 addr) 2635 { 2636 struct qedr_mr *mr = get_qedr_mr(ibmr); 2637 struct qedr_pbl *pbl_table; 2638 struct regpair *pbe; 2639 u32 pbes_in_page; 2640 2641 if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) { 2642 DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages); 2643 return -ENOMEM; 2644 } 2645 2646 DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n", 2647 mr->npages, addr); 2648 2649 pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64); 2650 pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page); 2651 pbe = (struct regpair *)pbl_table->va; 2652 pbe += mr->npages % pbes_in_page; 2653 pbe->lo = cpu_to_le32((u32)addr); 2654 pbe->hi = cpu_to_le32((u32)upper_32_bits(addr)); 2655 2656 mr->npages++; 2657 2658 return 0; 2659 } 2660 2661 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info) 2662 { 2663 int work = info->completed - info->completed_handled - 1; 2664 2665 DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work); 2666 while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) { 2667 struct qedr_pbl *pbl; 2668 2669 /* Free all the page list that are possible to be freed 2670 * (all the ones that were invalidated), under the assumption 2671 * that if an FMR was completed successfully that means that 2672 * if there was an invalidate operation before it also ended 2673 */ 2674 pbl = list_first_entry(&info->inuse_pbl_list, 2675 struct qedr_pbl, list_entry); 2676 list_move_tail(&pbl->list_entry, &info->free_pbl_list); 2677 info->completed_handled++; 2678 } 2679 } 2680 2681 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 2682 int sg_nents, unsigned int *sg_offset) 2683 { 2684 struct qedr_mr *mr = get_qedr_mr(ibmr); 2685 2686 mr->npages = 0; 2687 2688 handle_completed_mrs(mr->dev, &mr->info); 2689 return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page); 2690 } 2691 2692 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc) 2693 { 2694 struct qedr_dev *dev = get_qedr_dev(ibpd->device); 2695 struct qedr_pd *pd = get_qedr_pd(ibpd); 2696 struct qedr_mr *mr; 2697 int rc; 2698 2699 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 2700 if (!mr) 2701 return ERR_PTR(-ENOMEM); 2702 2703 mr->type = QEDR_MR_DMA; 2704 2705 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); 2706 if (rc) { 2707 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc); 2708 goto err1; 2709 } 2710 2711 /* index only, 18 bit long, lkey = itid << 8 | key */ 2712 mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR; 2713 mr->hw_mr.pd = pd->pd_id; 2714 mr->hw_mr.local_read = 1; 2715 mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; 2716 mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; 2717 mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; 2718 mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; 2719 mr->hw_mr.dma_mr = true; 2720 2721 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); 2722 if (rc) { 2723 DP_ERR(dev, "roce register tid returned an error %d\n", rc); 2724 goto err2; 2725 } 2726 2727 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; 2728 if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || 2729 mr->hw_mr.remote_atomic) 2730 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; 2731 2732 DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey); 2733 return &mr->ibmr; 2734 2735 err2: 2736 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); 2737 err1: 2738 kfree(mr); 2739 return ERR_PTR(rc); 2740 } 2741 2742 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq) 2743 { 2744 return (((wq->prod + 1) % wq->max_wr) == wq->cons); 2745 } 2746 2747 static int sge_data_len(struct ib_sge *sg_list, int num_sge) 2748 { 2749 int i, len = 0; 2750 2751 for (i = 0; i < num_sge; i++) 2752 len += sg_list[i].length; 2753 2754 return len; 2755 } 2756 2757 static void swap_wqe_data64(u64 *p) 2758 { 2759 int i; 2760 2761 for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++) 2762 *p = cpu_to_be64(cpu_to_le64(*p)); 2763 } 2764 2765 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev, 2766 struct qedr_qp *qp, u8 *wqe_size, 2767 struct ib_send_wr *wr, 2768 struct ib_send_wr **bad_wr, u8 *bits, 2769 u8 bit) 2770 { 2771 u32 data_size = sge_data_len(wr->sg_list, wr->num_sge); 2772 char *seg_prt, *wqe; 2773 int i, seg_siz; 2774 2775 if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) { 2776 DP_ERR(dev, "Too much inline data in WR: %d\n", data_size); 2777 *bad_wr = wr; 2778 return 0; 2779 } 2780 2781 if (!data_size) 2782 return data_size; 2783 2784 *bits |= bit; 2785 2786 seg_prt = NULL; 2787 wqe = NULL; 2788 seg_siz = 0; 2789 2790 /* Copy data inline */ 2791 for (i = 0; i < wr->num_sge; i++) { 2792 u32 len = wr->sg_list[i].length; 2793 void *src = (void *)(uintptr_t)wr->sg_list[i].addr; 2794 2795 while (len > 0) { 2796 u32 cur; 2797 2798 /* New segment required */ 2799 if (!seg_siz) { 2800 wqe = (char *)qed_chain_produce(&qp->sq.pbl); 2801 seg_prt = wqe; 2802 seg_siz = sizeof(struct rdma_sq_common_wqe); 2803 (*wqe_size)++; 2804 } 2805 2806 /* Calculate currently allowed length */ 2807 cur = min_t(u32, len, seg_siz); 2808 memcpy(seg_prt, src, cur); 2809 2810 /* Update segment variables */ 2811 seg_prt += cur; 2812 seg_siz -= cur; 2813 2814 /* Update sge variables */ 2815 src += cur; 2816 len -= cur; 2817 2818 /* Swap fully-completed segments */ 2819 if (!seg_siz) 2820 swap_wqe_data64((u64 *)wqe); 2821 } 2822 } 2823 2824 /* swap last not completed segment */ 2825 if (seg_siz) 2826 swap_wqe_data64((u64 *)wqe); 2827 2828 return data_size; 2829 } 2830 2831 #define RQ_SGE_SET(sge, vaddr, vlength, vflags) \ 2832 do { \ 2833 DMA_REGPAIR_LE(sge->addr, vaddr); \ 2834 (sge)->length = cpu_to_le32(vlength); \ 2835 (sge)->flags = cpu_to_le32(vflags); \ 2836 } while (0) 2837 2838 #define SRQ_HDR_SET(hdr, vwr_id, num_sge) \ 2839 do { \ 2840 DMA_REGPAIR_LE(hdr->wr_id, vwr_id); \ 2841 (hdr)->num_sges = num_sge; \ 2842 } while (0) 2843 2844 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey) \ 2845 do { \ 2846 DMA_REGPAIR_LE(sge->addr, vaddr); \ 2847 (sge)->length = cpu_to_le32(vlength); \ 2848 (sge)->l_key = cpu_to_le32(vlkey); \ 2849 } while (0) 2850 2851 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size, 2852 struct ib_send_wr *wr) 2853 { 2854 u32 data_size = 0; 2855 int i; 2856 2857 for (i = 0; i < wr->num_sge; i++) { 2858 struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl); 2859 2860 DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr); 2861 sge->l_key = cpu_to_le32(wr->sg_list[i].lkey); 2862 sge->length = cpu_to_le32(wr->sg_list[i].length); 2863 data_size += wr->sg_list[i].length; 2864 } 2865 2866 if (wqe_size) 2867 *wqe_size += wr->num_sge; 2868 2869 return data_size; 2870 } 2871 2872 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev, 2873 struct qedr_qp *qp, 2874 struct rdma_sq_rdma_wqe_1st *rwqe, 2875 struct rdma_sq_rdma_wqe_2nd *rwqe2, 2876 struct ib_send_wr *wr, 2877 struct ib_send_wr **bad_wr) 2878 { 2879 rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey); 2880 DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr); 2881 2882 if (wr->send_flags & IB_SEND_INLINE && 2883 (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || 2884 wr->opcode == IB_WR_RDMA_WRITE)) { 2885 u8 flags = 0; 2886 2887 SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1); 2888 return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr, 2889 bad_wr, &rwqe->flags, flags); 2890 } 2891 2892 return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr); 2893 } 2894 2895 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev, 2896 struct qedr_qp *qp, 2897 struct rdma_sq_send_wqe_1st *swqe, 2898 struct rdma_sq_send_wqe_2st *swqe2, 2899 struct ib_send_wr *wr, 2900 struct ib_send_wr **bad_wr) 2901 { 2902 memset(swqe2, 0, sizeof(*swqe2)); 2903 if (wr->send_flags & IB_SEND_INLINE) { 2904 u8 flags = 0; 2905 2906 SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1); 2907 return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr, 2908 bad_wr, &swqe->flags, flags); 2909 } 2910 2911 return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr); 2912 } 2913 2914 static int qedr_prepare_reg(struct qedr_qp *qp, 2915 struct rdma_sq_fmr_wqe_1st *fwqe1, 2916 struct ib_reg_wr *wr) 2917 { 2918 struct qedr_mr *mr = get_qedr_mr(wr->mr); 2919 struct rdma_sq_fmr_wqe_2nd *fwqe2; 2920 2921 fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl); 2922 fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova); 2923 fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova); 2924 fwqe1->l_key = wr->key; 2925 2926 fwqe2->access_ctrl = 0; 2927 2928 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ, 2929 !!(wr->access & IB_ACCESS_REMOTE_READ)); 2930 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE, 2931 !!(wr->access & IB_ACCESS_REMOTE_WRITE)); 2932 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC, 2933 !!(wr->access & IB_ACCESS_REMOTE_ATOMIC)); 2934 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1); 2935 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE, 2936 !!(wr->access & IB_ACCESS_LOCAL_WRITE)); 2937 fwqe2->fmr_ctrl = 0; 2938 2939 SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG, 2940 ilog2(mr->ibmr.page_size) - 12); 2941 2942 fwqe2->length_hi = 0; 2943 fwqe2->length_lo = mr->ibmr.length; 2944 fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa); 2945 fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa); 2946 2947 qp->wqe_wr_id[qp->sq.prod].mr = mr; 2948 2949 return 0; 2950 } 2951 2952 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode) 2953 { 2954 switch (opcode) { 2955 case IB_WR_RDMA_WRITE: 2956 case IB_WR_RDMA_WRITE_WITH_IMM: 2957 return IB_WC_RDMA_WRITE; 2958 case IB_WR_SEND_WITH_IMM: 2959 case IB_WR_SEND: 2960 case IB_WR_SEND_WITH_INV: 2961 return IB_WC_SEND; 2962 case IB_WR_RDMA_READ: 2963 case IB_WR_RDMA_READ_WITH_INV: 2964 return IB_WC_RDMA_READ; 2965 case IB_WR_ATOMIC_CMP_AND_SWP: 2966 return IB_WC_COMP_SWAP; 2967 case IB_WR_ATOMIC_FETCH_AND_ADD: 2968 return IB_WC_FETCH_ADD; 2969 case IB_WR_REG_MR: 2970 return IB_WC_REG_MR; 2971 case IB_WR_LOCAL_INV: 2972 return IB_WC_LOCAL_INV; 2973 default: 2974 return IB_WC_SEND; 2975 } 2976 } 2977 2978 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr) 2979 { 2980 int wq_is_full, err_wr, pbl_is_full; 2981 struct qedr_dev *dev = qp->dev; 2982 2983 /* prevent SQ overflow and/or processing of a bad WR */ 2984 err_wr = wr->num_sge > qp->sq.max_sges; 2985 wq_is_full = qedr_wq_is_full(&qp->sq); 2986 pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) < 2987 QEDR_MAX_SQE_ELEMENTS_PER_SQE; 2988 if (wq_is_full || err_wr || pbl_is_full) { 2989 if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) { 2990 DP_ERR(dev, 2991 "error: WQ is full. Post send on QP %p failed (this error appears only once)\n", 2992 qp); 2993 qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL; 2994 } 2995 2996 if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) { 2997 DP_ERR(dev, 2998 "error: WR is bad. Post send on QP %p failed (this error appears only once)\n", 2999 qp); 3000 qp->err_bitmap |= QEDR_QP_ERR_BAD_SR; 3001 } 3002 3003 if (pbl_is_full && 3004 !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) { 3005 DP_ERR(dev, 3006 "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n", 3007 qp); 3008 qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL; 3009 } 3010 return false; 3011 } 3012 return true; 3013 } 3014 3015 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 3016 struct ib_send_wr **bad_wr) 3017 { 3018 struct qedr_dev *dev = get_qedr_dev(ibqp->device); 3019 struct qedr_qp *qp = get_qedr_qp(ibqp); 3020 struct rdma_sq_atomic_wqe_1st *awqe1; 3021 struct rdma_sq_atomic_wqe_2nd *awqe2; 3022 struct rdma_sq_atomic_wqe_3rd *awqe3; 3023 struct rdma_sq_send_wqe_2st *swqe2; 3024 struct rdma_sq_local_inv_wqe *iwqe; 3025 struct rdma_sq_rdma_wqe_2nd *rwqe2; 3026 struct rdma_sq_send_wqe_1st *swqe; 3027 struct rdma_sq_rdma_wqe_1st *rwqe; 3028 struct rdma_sq_fmr_wqe_1st *fwqe1; 3029 struct rdma_sq_common_wqe *wqe; 3030 u32 length; 3031 int rc = 0; 3032 bool comp; 3033 3034 if (!qedr_can_post_send(qp, wr)) { 3035 *bad_wr = wr; 3036 return -ENOMEM; 3037 } 3038 3039 wqe = qed_chain_produce(&qp->sq.pbl); 3040 qp->wqe_wr_id[qp->sq.prod].signaled = 3041 !!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled; 3042 3043 wqe->flags = 0; 3044 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG, 3045 !!(wr->send_flags & IB_SEND_SOLICITED)); 3046 comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled; 3047 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp); 3048 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG, 3049 !!(wr->send_flags & IB_SEND_FENCE)); 3050 wqe->prev_wqe_size = qp->prev_wqe_size; 3051 3052 qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode); 3053 3054 switch (wr->opcode) { 3055 case IB_WR_SEND_WITH_IMM: 3056 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) { 3057 rc = -EINVAL; 3058 *bad_wr = wr; 3059 break; 3060 } 3061 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM; 3062 swqe = (struct rdma_sq_send_wqe_1st *)wqe; 3063 swqe->wqe_size = 2; 3064 swqe2 = qed_chain_produce(&qp->sq.pbl); 3065 3066 swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); 3067 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2, 3068 wr, bad_wr); 3069 swqe->length = cpu_to_le32(length); 3070 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; 3071 qp->prev_wqe_size = swqe->wqe_size; 3072 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; 3073 break; 3074 case IB_WR_SEND: 3075 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND; 3076 swqe = (struct rdma_sq_send_wqe_1st *)wqe; 3077 3078 swqe->wqe_size = 2; 3079 swqe2 = qed_chain_produce(&qp->sq.pbl); 3080 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2, 3081 wr, bad_wr); 3082 swqe->length = cpu_to_le32(length); 3083 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; 3084 qp->prev_wqe_size = swqe->wqe_size; 3085 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; 3086 break; 3087 case IB_WR_SEND_WITH_INV: 3088 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE; 3089 swqe = (struct rdma_sq_send_wqe_1st *)wqe; 3090 swqe2 = qed_chain_produce(&qp->sq.pbl); 3091 swqe->wqe_size = 2; 3092 swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey); 3093 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2, 3094 wr, bad_wr); 3095 swqe->length = cpu_to_le32(length); 3096 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; 3097 qp->prev_wqe_size = swqe->wqe_size; 3098 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; 3099 break; 3100 3101 case IB_WR_RDMA_WRITE_WITH_IMM: 3102 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) { 3103 rc = -EINVAL; 3104 *bad_wr = wr; 3105 break; 3106 } 3107 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM; 3108 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; 3109 3110 rwqe->wqe_size = 2; 3111 rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data)); 3112 rwqe2 = qed_chain_produce(&qp->sq.pbl); 3113 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2, 3114 wr, bad_wr); 3115 rwqe->length = cpu_to_le32(length); 3116 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; 3117 qp->prev_wqe_size = rwqe->wqe_size; 3118 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; 3119 break; 3120 case IB_WR_RDMA_WRITE: 3121 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR; 3122 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; 3123 3124 rwqe->wqe_size = 2; 3125 rwqe2 = qed_chain_produce(&qp->sq.pbl); 3126 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2, 3127 wr, bad_wr); 3128 rwqe->length = cpu_to_le32(length); 3129 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; 3130 qp->prev_wqe_size = rwqe->wqe_size; 3131 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; 3132 break; 3133 case IB_WR_RDMA_READ_WITH_INV: 3134 SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1); 3135 /* fallthrough -- same is identical to RDMA READ */ 3136 3137 case IB_WR_RDMA_READ: 3138 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD; 3139 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; 3140 3141 rwqe->wqe_size = 2; 3142 rwqe2 = qed_chain_produce(&qp->sq.pbl); 3143 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2, 3144 wr, bad_wr); 3145 rwqe->length = cpu_to_le32(length); 3146 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; 3147 qp->prev_wqe_size = rwqe->wqe_size; 3148 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; 3149 break; 3150 3151 case IB_WR_ATOMIC_CMP_AND_SWP: 3152 case IB_WR_ATOMIC_FETCH_AND_ADD: 3153 awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe; 3154 awqe1->wqe_size = 4; 3155 3156 awqe2 = qed_chain_produce(&qp->sq.pbl); 3157 DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr); 3158 awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey); 3159 3160 awqe3 = qed_chain_produce(&qp->sq.pbl); 3161 3162 if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { 3163 wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD; 3164 DMA_REGPAIR_LE(awqe3->swap_data, 3165 atomic_wr(wr)->compare_add); 3166 } else { 3167 wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP; 3168 DMA_REGPAIR_LE(awqe3->swap_data, 3169 atomic_wr(wr)->swap); 3170 DMA_REGPAIR_LE(awqe3->cmp_data, 3171 atomic_wr(wr)->compare_add); 3172 } 3173 3174 qedr_prepare_sq_sges(qp, NULL, wr); 3175 3176 qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size; 3177 qp->prev_wqe_size = awqe1->wqe_size; 3178 break; 3179 3180 case IB_WR_LOCAL_INV: 3181 iwqe = (struct rdma_sq_local_inv_wqe *)wqe; 3182 iwqe->wqe_size = 1; 3183 3184 iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE; 3185 iwqe->inv_l_key = wr->ex.invalidate_rkey; 3186 qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size; 3187 qp->prev_wqe_size = iwqe->wqe_size; 3188 break; 3189 case IB_WR_REG_MR: 3190 DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n"); 3191 wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR; 3192 fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe; 3193 fwqe1->wqe_size = 2; 3194 3195 rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr)); 3196 if (rc) { 3197 DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc); 3198 *bad_wr = wr; 3199 break; 3200 } 3201 3202 qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size; 3203 qp->prev_wqe_size = fwqe1->wqe_size; 3204 break; 3205 default: 3206 DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode); 3207 rc = -EINVAL; 3208 *bad_wr = wr; 3209 break; 3210 } 3211 3212 if (*bad_wr) { 3213 u16 value; 3214 3215 /* Restore prod to its position before 3216 * this WR was processed 3217 */ 3218 value = le16_to_cpu(qp->sq.db_data.data.value); 3219 qed_chain_set_prod(&qp->sq.pbl, value, wqe); 3220 3221 /* Restore prev_wqe_size */ 3222 qp->prev_wqe_size = wqe->prev_wqe_size; 3223 rc = -EINVAL; 3224 DP_ERR(dev, "POST SEND FAILED\n"); 3225 } 3226 3227 return rc; 3228 } 3229 3230 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 3231 struct ib_send_wr **bad_wr) 3232 { 3233 struct qedr_dev *dev = get_qedr_dev(ibqp->device); 3234 struct qedr_qp *qp = get_qedr_qp(ibqp); 3235 unsigned long flags; 3236 int rc = 0; 3237 3238 *bad_wr = NULL; 3239 3240 if (qp->qp_type == IB_QPT_GSI) 3241 return qedr_gsi_post_send(ibqp, wr, bad_wr); 3242 3243 spin_lock_irqsave(&qp->q_lock, flags); 3244 3245 if (rdma_protocol_roce(&dev->ibdev, 1)) { 3246 if ((qp->state != QED_ROCE_QP_STATE_RTS) && 3247 (qp->state != QED_ROCE_QP_STATE_ERR) && 3248 (qp->state != QED_ROCE_QP_STATE_SQD)) { 3249 spin_unlock_irqrestore(&qp->q_lock, flags); 3250 *bad_wr = wr; 3251 DP_DEBUG(dev, QEDR_MSG_CQ, 3252 "QP in wrong state! QP icid=0x%x state %d\n", 3253 qp->icid, qp->state); 3254 return -EINVAL; 3255 } 3256 } 3257 3258 while (wr) { 3259 rc = __qedr_post_send(ibqp, wr, bad_wr); 3260 if (rc) 3261 break; 3262 3263 qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id; 3264 3265 qedr_inc_sw_prod(&qp->sq); 3266 3267 qp->sq.db_data.data.value++; 3268 3269 wr = wr->next; 3270 } 3271 3272 /* Trigger doorbell 3273 * If there was a failure in the first WR then it will be triggered in 3274 * vane. However this is not harmful (as long as the producer value is 3275 * unchanged). For performance reasons we avoid checking for this 3276 * redundant doorbell. 3277 */ 3278 wmb(); 3279 writel(qp->sq.db_data.raw, qp->sq.db); 3280 3281 /* Make sure write sticks */ 3282 mmiowb(); 3283 3284 spin_unlock_irqrestore(&qp->q_lock, flags); 3285 3286 return rc; 3287 } 3288 3289 int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 3290 struct ib_recv_wr **bad_wr) 3291 { 3292 struct qedr_qp *qp = get_qedr_qp(ibqp); 3293 struct qedr_dev *dev = qp->dev; 3294 unsigned long flags; 3295 int status = 0; 3296 3297 if (qp->qp_type == IB_QPT_GSI) 3298 return qedr_gsi_post_recv(ibqp, wr, bad_wr); 3299 3300 spin_lock_irqsave(&qp->q_lock, flags); 3301 3302 if (qp->state == QED_ROCE_QP_STATE_RESET) { 3303 spin_unlock_irqrestore(&qp->q_lock, flags); 3304 *bad_wr = wr; 3305 return -EINVAL; 3306 } 3307 3308 while (wr) { 3309 int i; 3310 3311 if (qed_chain_get_elem_left_u32(&qp->rq.pbl) < 3312 QEDR_MAX_RQE_ELEMENTS_PER_RQE || 3313 wr->num_sge > qp->rq.max_sges) { 3314 DP_ERR(dev, "Can't post WR (%d < %d) || (%d > %d)\n", 3315 qed_chain_get_elem_left_u32(&qp->rq.pbl), 3316 QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge, 3317 qp->rq.max_sges); 3318 status = -ENOMEM; 3319 *bad_wr = wr; 3320 break; 3321 } 3322 for (i = 0; i < wr->num_sge; i++) { 3323 u32 flags = 0; 3324 struct rdma_rq_sge *rqe = 3325 qed_chain_produce(&qp->rq.pbl); 3326 3327 /* First one must include the number 3328 * of SGE in the list 3329 */ 3330 if (!i) 3331 SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 3332 wr->num_sge); 3333 3334 SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 3335 wr->sg_list[i].lkey); 3336 3337 RQ_SGE_SET(rqe, wr->sg_list[i].addr, 3338 wr->sg_list[i].length, flags); 3339 } 3340 3341 /* Special case of no sges. FW requires between 1-4 sges... 3342 * in this case we need to post 1 sge with length zero. this is 3343 * because rdma write with immediate consumes an RQ. 3344 */ 3345 if (!wr->num_sge) { 3346 u32 flags = 0; 3347 struct rdma_rq_sge *rqe = 3348 qed_chain_produce(&qp->rq.pbl); 3349 3350 /* First one must include the number 3351 * of SGE in the list 3352 */ 3353 SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0); 3354 SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1); 3355 3356 RQ_SGE_SET(rqe, 0, 0, flags); 3357 i = 1; 3358 } 3359 3360 qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id; 3361 qp->rqe_wr_id[qp->rq.prod].wqe_size = i; 3362 3363 qedr_inc_sw_prod(&qp->rq); 3364 3365 /* Flush all the writes before signalling doorbell */ 3366 wmb(); 3367 3368 qp->rq.db_data.data.value++; 3369 3370 writel(qp->rq.db_data.raw, qp->rq.db); 3371 3372 /* Make sure write sticks */ 3373 mmiowb(); 3374 3375 if (rdma_protocol_iwarp(&dev->ibdev, 1)) { 3376 writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2); 3377 mmiowb(); /* for second doorbell */ 3378 } 3379 3380 wr = wr->next; 3381 } 3382 3383 spin_unlock_irqrestore(&qp->q_lock, flags); 3384 3385 return status; 3386 } 3387 3388 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe) 3389 { 3390 struct rdma_cqe_requester *resp_cqe = &cqe->req; 3391 3392 return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) == 3393 cq->pbl_toggle; 3394 } 3395 3396 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe) 3397 { 3398 struct rdma_cqe_requester *resp_cqe = &cqe->req; 3399 struct qedr_qp *qp; 3400 3401 qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi, 3402 resp_cqe->qp_handle.lo, 3403 u64); 3404 return qp; 3405 } 3406 3407 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe) 3408 { 3409 struct rdma_cqe_requester *resp_cqe = &cqe->req; 3410 3411 return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE); 3412 } 3413 3414 /* Return latest CQE (needs processing) */ 3415 static union rdma_cqe *get_cqe(struct qedr_cq *cq) 3416 { 3417 return cq->latest_cqe; 3418 } 3419 3420 /* In fmr we need to increase the number of fmr completed counter for the fmr 3421 * algorithm determining whether we can free a pbl or not. 3422 * we need to perform this whether the work request was signaled or not. for 3423 * this purpose we call this function from the condition that checks if a wr 3424 * should be skipped, to make sure we don't miss it ( possibly this fmr 3425 * operation was not signalted) 3426 */ 3427 static inline void qedr_chk_if_fmr(struct qedr_qp *qp) 3428 { 3429 if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR) 3430 qp->wqe_wr_id[qp->sq.cons].mr->info.completed++; 3431 } 3432 3433 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp, 3434 struct qedr_cq *cq, int num_entries, 3435 struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status, 3436 int force) 3437 { 3438 u16 cnt = 0; 3439 3440 while (num_entries && qp->sq.wqe_cons != hw_cons) { 3441 if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) { 3442 qedr_chk_if_fmr(qp); 3443 /* skip WC */ 3444 goto next_cqe; 3445 } 3446 3447 /* fill WC */ 3448 wc->status = status; 3449 wc->vendor_err = 0; 3450 wc->wc_flags = 0; 3451 wc->src_qp = qp->id; 3452 wc->qp = &qp->ibqp; 3453 3454 wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id; 3455 wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode; 3456 3457 switch (wc->opcode) { 3458 case IB_WC_RDMA_WRITE: 3459 wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len; 3460 break; 3461 case IB_WC_COMP_SWAP: 3462 case IB_WC_FETCH_ADD: 3463 wc->byte_len = 8; 3464 break; 3465 case IB_WC_REG_MR: 3466 qp->wqe_wr_id[qp->sq.cons].mr->info.completed++; 3467 break; 3468 case IB_WC_RDMA_READ: 3469 case IB_WC_SEND: 3470 wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len; 3471 break; 3472 default: 3473 break; 3474 } 3475 3476 num_entries--; 3477 wc++; 3478 cnt++; 3479 next_cqe: 3480 while (qp->wqe_wr_id[qp->sq.cons].wqe_size--) 3481 qed_chain_consume(&qp->sq.pbl); 3482 qedr_inc_sw_cons(&qp->sq); 3483 } 3484 3485 return cnt; 3486 } 3487 3488 static int qedr_poll_cq_req(struct qedr_dev *dev, 3489 struct qedr_qp *qp, struct qedr_cq *cq, 3490 int num_entries, struct ib_wc *wc, 3491 struct rdma_cqe_requester *req) 3492 { 3493 int cnt = 0; 3494 3495 switch (req->status) { 3496 case RDMA_CQE_REQ_STS_OK: 3497 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons, 3498 IB_WC_SUCCESS, 0); 3499 break; 3500 case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR: 3501 if (qp->state != QED_ROCE_QP_STATE_ERR) 3502 DP_DEBUG(dev, QEDR_MSG_CQ, 3503 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3504 cq->icid, qp->icid); 3505 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons, 3506 IB_WC_WR_FLUSH_ERR, 1); 3507 break; 3508 default: 3509 /* process all WQE before the cosumer */ 3510 qp->state = QED_ROCE_QP_STATE_ERR; 3511 cnt = process_req(dev, qp, cq, num_entries, wc, 3512 req->sq_cons - 1, IB_WC_SUCCESS, 0); 3513 wc += cnt; 3514 /* if we have extra WC fill it with actual error info */ 3515 if (cnt < num_entries) { 3516 enum ib_wc_status wc_status; 3517 3518 switch (req->status) { 3519 case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR: 3520 DP_ERR(dev, 3521 "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3522 cq->icid, qp->icid); 3523 wc_status = IB_WC_BAD_RESP_ERR; 3524 break; 3525 case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR: 3526 DP_ERR(dev, 3527 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3528 cq->icid, qp->icid); 3529 wc_status = IB_WC_LOC_LEN_ERR; 3530 break; 3531 case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR: 3532 DP_ERR(dev, 3533 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3534 cq->icid, qp->icid); 3535 wc_status = IB_WC_LOC_QP_OP_ERR; 3536 break; 3537 case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR: 3538 DP_ERR(dev, 3539 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3540 cq->icid, qp->icid); 3541 wc_status = IB_WC_LOC_PROT_ERR; 3542 break; 3543 case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR: 3544 DP_ERR(dev, 3545 "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3546 cq->icid, qp->icid); 3547 wc_status = IB_WC_MW_BIND_ERR; 3548 break; 3549 case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR: 3550 DP_ERR(dev, 3551 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3552 cq->icid, qp->icid); 3553 wc_status = IB_WC_REM_INV_REQ_ERR; 3554 break; 3555 case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR: 3556 DP_ERR(dev, 3557 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3558 cq->icid, qp->icid); 3559 wc_status = IB_WC_REM_ACCESS_ERR; 3560 break; 3561 case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR: 3562 DP_ERR(dev, 3563 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3564 cq->icid, qp->icid); 3565 wc_status = IB_WC_REM_OP_ERR; 3566 break; 3567 case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR: 3568 DP_ERR(dev, 3569 "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3570 cq->icid, qp->icid); 3571 wc_status = IB_WC_RNR_RETRY_EXC_ERR; 3572 break; 3573 case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR: 3574 DP_ERR(dev, 3575 "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3576 cq->icid, qp->icid); 3577 wc_status = IB_WC_RETRY_EXC_ERR; 3578 break; 3579 default: 3580 DP_ERR(dev, 3581 "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3582 cq->icid, qp->icid); 3583 wc_status = IB_WC_GENERAL_ERR; 3584 } 3585 cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons, 3586 wc_status, 1); 3587 } 3588 } 3589 3590 return cnt; 3591 } 3592 3593 static inline int qedr_cqe_resp_status_to_ib(u8 status) 3594 { 3595 switch (status) { 3596 case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR: 3597 return IB_WC_LOC_ACCESS_ERR; 3598 case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR: 3599 return IB_WC_LOC_LEN_ERR; 3600 case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR: 3601 return IB_WC_LOC_QP_OP_ERR; 3602 case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR: 3603 return IB_WC_LOC_PROT_ERR; 3604 case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR: 3605 return IB_WC_MW_BIND_ERR; 3606 case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR: 3607 return IB_WC_REM_INV_RD_REQ_ERR; 3608 case RDMA_CQE_RESP_STS_OK: 3609 return IB_WC_SUCCESS; 3610 default: 3611 return IB_WC_GENERAL_ERR; 3612 } 3613 } 3614 3615 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp, 3616 struct ib_wc *wc) 3617 { 3618 wc->status = IB_WC_SUCCESS; 3619 wc->byte_len = le32_to_cpu(resp->length); 3620 3621 if (resp->flags & QEDR_RESP_IMM) { 3622 wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key)); 3623 wc->wc_flags |= IB_WC_WITH_IMM; 3624 3625 if (resp->flags & QEDR_RESP_RDMA) 3626 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; 3627 3628 if (resp->flags & QEDR_RESP_INV) 3629 return -EINVAL; 3630 3631 } else if (resp->flags & QEDR_RESP_INV) { 3632 wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key); 3633 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 3634 3635 if (resp->flags & QEDR_RESP_RDMA) 3636 return -EINVAL; 3637 3638 } else if (resp->flags & QEDR_RESP_RDMA) { 3639 return -EINVAL; 3640 } 3641 3642 return 0; 3643 } 3644 3645 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp, 3646 struct qedr_cq *cq, struct ib_wc *wc, 3647 struct rdma_cqe_responder *resp, u64 wr_id) 3648 { 3649 /* Must fill fields before qedr_set_ok_cqe_resp_wc() */ 3650 wc->opcode = IB_WC_RECV; 3651 wc->wc_flags = 0; 3652 3653 if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) { 3654 if (qedr_set_ok_cqe_resp_wc(resp, wc)) 3655 DP_ERR(dev, 3656 "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n", 3657 cq, cq->icid, resp->flags); 3658 3659 } else { 3660 wc->status = qedr_cqe_resp_status_to_ib(resp->status); 3661 if (wc->status == IB_WC_GENERAL_ERR) 3662 DP_ERR(dev, 3663 "CQ %p (icid=%d) contains an invalid CQE status %d\n", 3664 cq, cq->icid, resp->status); 3665 } 3666 3667 /* Fill the rest of the WC */ 3668 wc->vendor_err = 0; 3669 wc->src_qp = qp->id; 3670 wc->qp = &qp->ibqp; 3671 wc->wr_id = wr_id; 3672 } 3673 3674 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp, 3675 struct qedr_cq *cq, struct ib_wc *wc, 3676 struct rdma_cqe_responder *resp) 3677 { 3678 u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id; 3679 3680 __process_resp_one(dev, qp, cq, wc, resp, wr_id); 3681 3682 while (qp->rqe_wr_id[qp->rq.cons].wqe_size--) 3683 qed_chain_consume(&qp->rq.pbl); 3684 qedr_inc_sw_cons(&qp->rq); 3685 3686 return 1; 3687 } 3688 3689 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq, 3690 int num_entries, struct ib_wc *wc, u16 hw_cons) 3691 { 3692 u16 cnt = 0; 3693 3694 while (num_entries && qp->rq.wqe_cons != hw_cons) { 3695 /* fill WC */ 3696 wc->status = IB_WC_WR_FLUSH_ERR; 3697 wc->vendor_err = 0; 3698 wc->wc_flags = 0; 3699 wc->src_qp = qp->id; 3700 wc->byte_len = 0; 3701 wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id; 3702 wc->qp = &qp->ibqp; 3703 num_entries--; 3704 wc++; 3705 cnt++; 3706 while (qp->rqe_wr_id[qp->rq.cons].wqe_size--) 3707 qed_chain_consume(&qp->rq.pbl); 3708 qedr_inc_sw_cons(&qp->rq); 3709 } 3710 3711 return cnt; 3712 } 3713 3714 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp, 3715 struct rdma_cqe_responder *resp, int *update) 3716 { 3717 if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) { 3718 consume_cqe(cq); 3719 *update |= 1; 3720 } 3721 } 3722 3723 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp, 3724 struct qedr_cq *cq, int num_entries, 3725 struct ib_wc *wc, struct rdma_cqe_responder *resp, 3726 int *update) 3727 { 3728 int cnt; 3729 3730 if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) { 3731 cnt = process_resp_flush(qp, cq, num_entries, wc, 3732 resp->rq_cons_or_srq_id); 3733 try_consume_resp_cqe(cq, qp, resp, update); 3734 } else { 3735 cnt = process_resp_one(dev, qp, cq, wc, resp); 3736 consume_cqe(cq); 3737 *update |= 1; 3738 } 3739 3740 return cnt; 3741 } 3742 3743 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp, 3744 struct rdma_cqe_requester *req, int *update) 3745 { 3746 if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) { 3747 consume_cqe(cq); 3748 *update |= 1; 3749 } 3750 } 3751 3752 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 3753 { 3754 struct qedr_dev *dev = get_qedr_dev(ibcq->device); 3755 struct qedr_cq *cq = get_qedr_cq(ibcq); 3756 union rdma_cqe *cqe; 3757 u32 old_cons, new_cons; 3758 unsigned long flags; 3759 int update = 0; 3760 int done = 0; 3761 3762 if (cq->destroyed) { 3763 DP_ERR(dev, 3764 "warning: poll was invoked after destroy for cq %p (icid=%d)\n", 3765 cq, cq->icid); 3766 return 0; 3767 } 3768 3769 if (cq->cq_type == QEDR_CQ_TYPE_GSI) 3770 return qedr_gsi_poll_cq(ibcq, num_entries, wc); 3771 3772 spin_lock_irqsave(&cq->cq_lock, flags); 3773 cqe = cq->latest_cqe; 3774 old_cons = qed_chain_get_cons_idx_u32(&cq->pbl); 3775 while (num_entries && is_valid_cqe(cq, cqe)) { 3776 struct qedr_qp *qp; 3777 int cnt = 0; 3778 3779 /* prevent speculative reads of any field of CQE */ 3780 rmb(); 3781 3782 qp = cqe_get_qp(cqe); 3783 if (!qp) { 3784 WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe); 3785 break; 3786 } 3787 3788 wc->qp = &qp->ibqp; 3789 3790 switch (cqe_get_type(cqe)) { 3791 case RDMA_CQE_TYPE_REQUESTER: 3792 cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc, 3793 &cqe->req); 3794 try_consume_req_cqe(cq, qp, &cqe->req, &update); 3795 break; 3796 case RDMA_CQE_TYPE_RESPONDER_RQ: 3797 cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc, 3798 &cqe->resp, &update); 3799 break; 3800 case RDMA_CQE_TYPE_INVALID: 3801 default: 3802 DP_ERR(dev, "Error: invalid CQE type = %d\n", 3803 cqe_get_type(cqe)); 3804 } 3805 num_entries -= cnt; 3806 wc += cnt; 3807 done += cnt; 3808 3809 cqe = get_cqe(cq); 3810 } 3811 new_cons = qed_chain_get_cons_idx_u32(&cq->pbl); 3812 3813 cq->cq_cons += new_cons - old_cons; 3814 3815 if (update) 3816 /* doorbell notifies abount latest VALID entry, 3817 * but chain already point to the next INVALID one 3818 */ 3819 doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags); 3820 3821 spin_unlock_irqrestore(&cq->cq_lock, flags); 3822 return done; 3823 } 3824 3825 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, 3826 u8 port_num, 3827 const struct ib_wc *in_wc, 3828 const struct ib_grh *in_grh, 3829 const struct ib_mad_hdr *mad_hdr, 3830 size_t in_mad_size, struct ib_mad_hdr *out_mad, 3831 size_t *out_mad_size, u16 *out_mad_pkey_index) 3832 { 3833 struct qedr_dev *dev = get_qedr_dev(ibdev); 3834 3835 DP_DEBUG(dev, QEDR_MSG_GSI, 3836 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n", 3837 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod, 3838 mad_hdr->class_specific, mad_hdr->class_version, 3839 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status); 3840 return IB_MAD_RESULT_SUCCESS; 3841 } 3842