1 /* QLogic qedr NIC Driver 2 * Copyright (c) 2015-2016 QLogic Corporation 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and /or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #include <linux/dma-mapping.h> 33 #include <linux/crc32.h> 34 #include <net/ip.h> 35 #include <net/ipv6.h> 36 #include <net/udp.h> 37 #include <linux/iommu.h> 38 39 #include <rdma/ib_verbs.h> 40 #include <rdma/ib_user_verbs.h> 41 #include <rdma/iw_cm.h> 42 #include <rdma/ib_umem.h> 43 #include <rdma/ib_addr.h> 44 #include <rdma/ib_cache.h> 45 46 #include <linux/qed/common_hsi.h> 47 #include "qedr_hsi_rdma.h" 48 #include <linux/qed/qed_if.h> 49 #include "qedr.h" 50 #include "verbs.h" 51 #include <rdma/qedr-abi.h> 52 #include "qedr_roce_cm.h" 53 54 #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) 55 56 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, 57 size_t len) 58 { 59 size_t min_len = min_t(size_t, len, udata->outlen); 60 61 return ib_copy_to_udata(udata, src, min_len); 62 } 63 64 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) 65 { 66 if (index > QEDR_ROCE_PKEY_TABLE_LEN) 67 return -EINVAL; 68 69 *pkey = QEDR_ROCE_PKEY_DEFAULT; 70 return 0; 71 } 72 73 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port, 74 int index, union ib_gid *sgid) 75 { 76 struct qedr_dev *dev = get_qedr_dev(ibdev); 77 78 memset(sgid->raw, 0, sizeof(sgid->raw)); 79 ether_addr_copy(sgid->raw, dev->ndev->dev_addr); 80 81 DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index, 82 sgid->global.interface_id, sgid->global.subnet_prefix); 83 84 return 0; 85 } 86 87 int qedr_query_device(struct ib_device *ibdev, 88 struct ib_device_attr *attr, struct ib_udata *udata) 89 { 90 struct qedr_dev *dev = get_qedr_dev(ibdev); 91 struct qedr_device_attr *qattr = &dev->attr; 92 93 if (!dev->rdma_ctx) { 94 DP_ERR(dev, 95 "qedr_query_device called with invalid params rdma_ctx=%p\n", 96 dev->rdma_ctx); 97 return -EINVAL; 98 } 99 100 memset(attr, 0, sizeof(*attr)); 101 102 attr->fw_ver = qattr->fw_ver; 103 attr->sys_image_guid = qattr->sys_image_guid; 104 attr->max_mr_size = qattr->max_mr_size; 105 attr->page_size_cap = qattr->page_size_caps; 106 attr->vendor_id = qattr->vendor_id; 107 attr->vendor_part_id = qattr->vendor_part_id; 108 attr->hw_ver = qattr->hw_ver; 109 attr->max_qp = qattr->max_qp; 110 attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe); 111 attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD | 112 IB_DEVICE_RC_RNR_NAK_GEN | 113 IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; 114 115 attr->max_sge = qattr->max_sge; 116 attr->max_sge_rd = qattr->max_sge; 117 attr->max_cq = qattr->max_cq; 118 attr->max_cqe = qattr->max_cqe; 119 attr->max_mr = qattr->max_mr; 120 attr->max_mw = qattr->max_mw; 121 attr->max_pd = qattr->max_pd; 122 attr->atomic_cap = dev->atomic_cap; 123 attr->max_fmr = qattr->max_fmr; 124 attr->max_map_per_fmr = 16; 125 attr->max_qp_init_rd_atom = 126 1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1); 127 attr->max_qp_rd_atom = 128 min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1), 129 attr->max_qp_init_rd_atom); 130 131 attr->max_srq = qattr->max_srq; 132 attr->max_srq_sge = qattr->max_srq_sge; 133 attr->max_srq_wr = qattr->max_srq_wr; 134 135 attr->local_ca_ack_delay = qattr->dev_ack_delay; 136 attr->max_fast_reg_page_list_len = qattr->max_mr / 8; 137 attr->max_pkeys = QEDR_ROCE_PKEY_MAX; 138 attr->max_ah = qattr->max_ah; 139 140 return 0; 141 } 142 143 #define QEDR_SPEED_SDR (1) 144 #define QEDR_SPEED_DDR (2) 145 #define QEDR_SPEED_QDR (4) 146 #define QEDR_SPEED_FDR10 (8) 147 #define QEDR_SPEED_FDR (16) 148 #define QEDR_SPEED_EDR (32) 149 150 static inline void get_link_speed_and_width(int speed, u8 *ib_speed, 151 u8 *ib_width) 152 { 153 switch (speed) { 154 case 1000: 155 *ib_speed = QEDR_SPEED_SDR; 156 *ib_width = IB_WIDTH_1X; 157 break; 158 case 10000: 159 *ib_speed = QEDR_SPEED_QDR; 160 *ib_width = IB_WIDTH_1X; 161 break; 162 163 case 20000: 164 *ib_speed = QEDR_SPEED_DDR; 165 *ib_width = IB_WIDTH_4X; 166 break; 167 168 case 25000: 169 *ib_speed = QEDR_SPEED_EDR; 170 *ib_width = IB_WIDTH_1X; 171 break; 172 173 case 40000: 174 *ib_speed = QEDR_SPEED_QDR; 175 *ib_width = IB_WIDTH_4X; 176 break; 177 178 case 50000: 179 *ib_speed = QEDR_SPEED_QDR; 180 *ib_width = IB_WIDTH_4X; 181 break; 182 183 case 100000: 184 *ib_speed = QEDR_SPEED_EDR; 185 *ib_width = IB_WIDTH_4X; 186 break; 187 188 default: 189 /* Unsupported */ 190 *ib_speed = QEDR_SPEED_SDR; 191 *ib_width = IB_WIDTH_1X; 192 } 193 } 194 195 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) 196 { 197 struct qedr_dev *dev; 198 struct qed_rdma_port *rdma_port; 199 200 dev = get_qedr_dev(ibdev); 201 if (port > 1) { 202 DP_ERR(dev, "invalid_port=0x%x\n", port); 203 return -EINVAL; 204 } 205 206 if (!dev->rdma_ctx) { 207 DP_ERR(dev, "rdma_ctx is NULL\n"); 208 return -EINVAL; 209 } 210 211 rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx); 212 213 /* *attr being zeroed by the caller, avoid zeroing it here */ 214 if (rdma_port->port_state == QED_RDMA_PORT_UP) { 215 attr->state = IB_PORT_ACTIVE; 216 attr->phys_state = 5; 217 } else { 218 attr->state = IB_PORT_DOWN; 219 attr->phys_state = 3; 220 } 221 attr->max_mtu = IB_MTU_4096; 222 attr->active_mtu = iboe_get_mtu(dev->ndev->mtu); 223 attr->lid = 0; 224 attr->lmc = 0; 225 attr->sm_lid = 0; 226 attr->sm_sl = 0; 227 attr->port_cap_flags = IB_PORT_IP_BASED_GIDS; 228 if (rdma_protocol_iwarp(&dev->ibdev, 1)) { 229 attr->gid_tbl_len = 1; 230 attr->pkey_tbl_len = 1; 231 } else { 232 attr->gid_tbl_len = QEDR_MAX_SGID; 233 attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN; 234 } 235 attr->bad_pkey_cntr = rdma_port->pkey_bad_counter; 236 attr->qkey_viol_cntr = 0; 237 get_link_speed_and_width(rdma_port->link_speed, 238 &attr->active_speed, &attr->active_width); 239 attr->max_msg_sz = rdma_port->max_msg_size; 240 attr->max_vl_num = 4; 241 242 return 0; 243 } 244 245 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask, 246 struct ib_port_modify *props) 247 { 248 struct qedr_dev *dev; 249 250 dev = get_qedr_dev(ibdev); 251 if (port > 1) { 252 DP_ERR(dev, "invalid_port=0x%x\n", port); 253 return -EINVAL; 254 } 255 256 return 0; 257 } 258 259 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr, 260 unsigned long len) 261 { 262 struct qedr_mm *mm; 263 264 mm = kzalloc(sizeof(*mm), GFP_KERNEL); 265 if (!mm) 266 return -ENOMEM; 267 268 mm->key.phy_addr = phy_addr; 269 /* This function might be called with a length which is not a multiple 270 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel 271 * forces this granularity by increasing the requested size if needed. 272 * When qedr_mmap is called, it will search the list with the updated 273 * length as a key. To prevent search failures, the length is rounded up 274 * in advance to PAGE_SIZE. 275 */ 276 mm->key.len = roundup(len, PAGE_SIZE); 277 INIT_LIST_HEAD(&mm->entry); 278 279 mutex_lock(&uctx->mm_list_lock); 280 list_add(&mm->entry, &uctx->mm_head); 281 mutex_unlock(&uctx->mm_list_lock); 282 283 DP_DEBUG(uctx->dev, QEDR_MSG_MISC, 284 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n", 285 (unsigned long long)mm->key.phy_addr, 286 (unsigned long)mm->key.len, uctx); 287 288 return 0; 289 } 290 291 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr, 292 unsigned long len) 293 { 294 bool found = false; 295 struct qedr_mm *mm; 296 297 mutex_lock(&uctx->mm_list_lock); 298 list_for_each_entry(mm, &uctx->mm_head, entry) { 299 if (len != mm->key.len || phy_addr != mm->key.phy_addr) 300 continue; 301 302 found = true; 303 break; 304 } 305 mutex_unlock(&uctx->mm_list_lock); 306 DP_DEBUG(uctx->dev, QEDR_MSG_MISC, 307 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n", 308 mm->key.phy_addr, mm->key.len, uctx, found); 309 310 return found; 311 } 312 313 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev, 314 struct ib_udata *udata) 315 { 316 int rc; 317 struct qedr_ucontext *ctx; 318 struct qedr_alloc_ucontext_resp uresp; 319 struct qedr_dev *dev = get_qedr_dev(ibdev); 320 struct qed_rdma_add_user_out_params oparams; 321 322 if (!udata) 323 return ERR_PTR(-EFAULT); 324 325 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 326 if (!ctx) 327 return ERR_PTR(-ENOMEM); 328 329 rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams); 330 if (rc) { 331 DP_ERR(dev, 332 "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n", 333 rc); 334 goto err; 335 } 336 337 ctx->dpi = oparams.dpi; 338 ctx->dpi_addr = oparams.dpi_addr; 339 ctx->dpi_phys_addr = oparams.dpi_phys_addr; 340 ctx->dpi_size = oparams.dpi_size; 341 INIT_LIST_HEAD(&ctx->mm_head); 342 mutex_init(&ctx->mm_list_lock); 343 344 memset(&uresp, 0, sizeof(uresp)); 345 346 uresp.dpm_enabled = dev->user_dpm_enabled; 347 uresp.wids_enabled = 1; 348 uresp.wid_count = oparams.wid_count; 349 uresp.db_pa = ctx->dpi_phys_addr; 350 uresp.db_size = ctx->dpi_size; 351 uresp.max_send_wr = dev->attr.max_sqe; 352 uresp.max_recv_wr = dev->attr.max_rqe; 353 uresp.max_srq_wr = dev->attr.max_srq_wr; 354 uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE; 355 uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE; 356 uresp.sges_per_srq_wr = dev->attr.max_srq_sge; 357 uresp.max_cqes = QEDR_MAX_CQES; 358 359 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 360 if (rc) 361 goto err; 362 363 ctx->dev = dev; 364 365 rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size); 366 if (rc) 367 goto err; 368 369 DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n", 370 &ctx->ibucontext); 371 return &ctx->ibucontext; 372 373 err: 374 kfree(ctx); 375 return ERR_PTR(rc); 376 } 377 378 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx) 379 { 380 struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx); 381 struct qedr_mm *mm, *tmp; 382 int status = 0; 383 384 DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n", 385 uctx); 386 uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi); 387 388 list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { 389 DP_DEBUG(uctx->dev, QEDR_MSG_MISC, 390 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n", 391 mm->key.phy_addr, mm->key.len, uctx); 392 list_del(&mm->entry); 393 kfree(mm); 394 } 395 396 kfree(uctx); 397 return status; 398 } 399 400 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) 401 { 402 struct qedr_ucontext *ucontext = get_qedr_ucontext(context); 403 struct qedr_dev *dev = get_qedr_dev(context->device); 404 unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; 405 u64 unmapped_db = dev->db_phys_addr; 406 unsigned long len = (vma->vm_end - vma->vm_start); 407 int rc = 0; 408 bool found; 409 410 DP_DEBUG(dev, QEDR_MSG_INIT, 411 "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n", 412 vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len); 413 if (vma->vm_start & (PAGE_SIZE - 1)) { 414 DP_ERR(dev, "Vma_start not page aligned = %ld\n", 415 vma->vm_start); 416 return -EINVAL; 417 } 418 419 found = qedr_search_mmap(ucontext, vm_page, len); 420 if (!found) { 421 DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n", 422 vma->vm_pgoff); 423 return -EINVAL; 424 } 425 426 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n"); 427 428 if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db + 429 dev->db_size))) { 430 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n"); 431 if (vma->vm_flags & VM_READ) { 432 DP_ERR(dev, "Trying to map doorbell bar for read\n"); 433 return -EPERM; 434 } 435 436 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 437 438 rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, 439 PAGE_SIZE, vma->vm_page_prot); 440 } else { 441 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n"); 442 rc = remap_pfn_range(vma, vma->vm_start, 443 vma->vm_pgoff, len, vma->vm_page_prot); 444 } 445 DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc); 446 return rc; 447 } 448 449 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, 450 struct ib_ucontext *context, struct ib_udata *udata) 451 { 452 struct qedr_dev *dev = get_qedr_dev(ibdev); 453 struct qedr_pd *pd; 454 u16 pd_id; 455 int rc; 456 457 DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n", 458 (udata && context) ? "User Lib" : "Kernel"); 459 460 if (!dev->rdma_ctx) { 461 DP_ERR(dev, "invalid RDMA context\n"); 462 return ERR_PTR(-EINVAL); 463 } 464 465 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 466 if (!pd) 467 return ERR_PTR(-ENOMEM); 468 469 rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id); 470 if (rc) 471 goto err; 472 473 pd->pd_id = pd_id; 474 475 if (udata && context) { 476 struct qedr_alloc_pd_uresp uresp = { 477 .pd_id = pd_id, 478 }; 479 480 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 481 if (rc) { 482 DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id); 483 dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id); 484 goto err; 485 } 486 487 pd->uctx = get_qedr_ucontext(context); 488 pd->uctx->pd = pd; 489 } 490 491 return &pd->ibpd; 492 493 err: 494 kfree(pd); 495 return ERR_PTR(rc); 496 } 497 498 int qedr_dealloc_pd(struct ib_pd *ibpd) 499 { 500 struct qedr_dev *dev = get_qedr_dev(ibpd->device); 501 struct qedr_pd *pd = get_qedr_pd(ibpd); 502 503 if (!pd) { 504 pr_err("Invalid PD received in dealloc_pd\n"); 505 return -EINVAL; 506 } 507 508 DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id); 509 dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id); 510 511 kfree(pd); 512 513 return 0; 514 } 515 516 static void qedr_free_pbl(struct qedr_dev *dev, 517 struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl) 518 { 519 struct pci_dev *pdev = dev->pdev; 520 int i; 521 522 for (i = 0; i < pbl_info->num_pbls; i++) { 523 if (!pbl[i].va) 524 continue; 525 dma_free_coherent(&pdev->dev, pbl_info->pbl_size, 526 pbl[i].va, pbl[i].pa); 527 } 528 529 kfree(pbl); 530 } 531 532 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024) 533 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024) 534 535 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64)) 536 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE) 537 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE) 538 539 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev, 540 struct qedr_pbl_info *pbl_info, 541 gfp_t flags) 542 { 543 struct pci_dev *pdev = dev->pdev; 544 struct qedr_pbl *pbl_table; 545 dma_addr_t *pbl_main_tbl; 546 dma_addr_t pa; 547 void *va; 548 int i; 549 550 pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags); 551 if (!pbl_table) 552 return ERR_PTR(-ENOMEM); 553 554 for (i = 0; i < pbl_info->num_pbls; i++) { 555 va = dma_zalloc_coherent(&pdev->dev, pbl_info->pbl_size, 556 &pa, flags); 557 if (!va) 558 goto err; 559 560 pbl_table[i].va = va; 561 pbl_table[i].pa = pa; 562 } 563 564 /* Two-Layer PBLs, if we have more than one pbl we need to initialize 565 * the first one with physical pointers to all of the rest 566 */ 567 pbl_main_tbl = (dma_addr_t *)pbl_table[0].va; 568 for (i = 0; i < pbl_info->num_pbls - 1; i++) 569 pbl_main_tbl[i] = pbl_table[i + 1].pa; 570 571 return pbl_table; 572 573 err: 574 for (i--; i >= 0; i--) 575 dma_free_coherent(&pdev->dev, pbl_info->pbl_size, 576 pbl_table[i].va, pbl_table[i].pa); 577 578 qedr_free_pbl(dev, pbl_info, pbl_table); 579 580 return ERR_PTR(-ENOMEM); 581 } 582 583 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev, 584 struct qedr_pbl_info *pbl_info, 585 u32 num_pbes, int two_layer_capable) 586 { 587 u32 pbl_capacity; 588 u32 pbl_size; 589 u32 num_pbls; 590 591 if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) { 592 if (num_pbes > MAX_PBES_TWO_LAYER) { 593 DP_ERR(dev, "prepare pbl table: too many pages %d\n", 594 num_pbes); 595 return -EINVAL; 596 } 597 598 /* calculate required pbl page size */ 599 pbl_size = MIN_FW_PBL_PAGE_SIZE; 600 pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) * 601 NUM_PBES_ON_PAGE(pbl_size); 602 603 while (pbl_capacity < num_pbes) { 604 pbl_size *= 2; 605 pbl_capacity = pbl_size / sizeof(u64); 606 pbl_capacity = pbl_capacity * pbl_capacity; 607 } 608 609 num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size)); 610 num_pbls++; /* One for the layer0 ( points to the pbls) */ 611 pbl_info->two_layered = true; 612 } else { 613 /* One layered PBL */ 614 num_pbls = 1; 615 pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE, 616 roundup_pow_of_two((num_pbes * sizeof(u64)))); 617 pbl_info->two_layered = false; 618 } 619 620 pbl_info->num_pbls = num_pbls; 621 pbl_info->pbl_size = pbl_size; 622 pbl_info->num_pbes = num_pbes; 623 624 DP_DEBUG(dev, QEDR_MSG_MR, 625 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n", 626 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size); 627 628 return 0; 629 } 630 631 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, 632 struct qedr_pbl *pbl, 633 struct qedr_pbl_info *pbl_info, u32 pg_shift) 634 { 635 int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0; 636 u32 fw_pg_cnt, fw_pg_per_umem_pg; 637 struct qedr_pbl *pbl_tbl; 638 struct scatterlist *sg; 639 struct regpair *pbe; 640 u64 pg_addr; 641 int entry; 642 643 if (!pbl_info->num_pbes) 644 return; 645 646 /* If we have a two layered pbl, the first pbl points to the rest 647 * of the pbls and the first entry lays on the second pbl in the table 648 */ 649 if (pbl_info->two_layered) 650 pbl_tbl = &pbl[1]; 651 else 652 pbl_tbl = pbl; 653 654 pbe = (struct regpair *)pbl_tbl->va; 655 if (!pbe) { 656 DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n"); 657 return; 658 } 659 660 pbe_cnt = 0; 661 662 shift = umem->page_shift; 663 664 fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift); 665 666 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 667 pages = sg_dma_len(sg) >> shift; 668 pg_addr = sg_dma_address(sg); 669 for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { 670 for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) { 671 pbe->lo = cpu_to_le32(pg_addr); 672 pbe->hi = cpu_to_le32(upper_32_bits(pg_addr)); 673 674 pg_addr += BIT(pg_shift); 675 pbe_cnt++; 676 total_num_pbes++; 677 pbe++; 678 679 if (total_num_pbes == pbl_info->num_pbes) 680 return; 681 682 /* If the given pbl is full storing the pbes, 683 * move to next pbl. 684 */ 685 if (pbe_cnt == 686 (pbl_info->pbl_size / sizeof(u64))) { 687 pbl_tbl++; 688 pbe = (struct regpair *)pbl_tbl->va; 689 pbe_cnt = 0; 690 } 691 692 fw_pg_cnt++; 693 } 694 } 695 } 696 } 697 698 static int qedr_copy_cq_uresp(struct qedr_dev *dev, 699 struct qedr_cq *cq, struct ib_udata *udata) 700 { 701 struct qedr_create_cq_uresp uresp; 702 int rc; 703 704 memset(&uresp, 0, sizeof(uresp)); 705 706 uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); 707 uresp.icid = cq->icid; 708 709 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 710 if (rc) 711 DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid); 712 713 return rc; 714 } 715 716 static void consume_cqe(struct qedr_cq *cq) 717 { 718 if (cq->latest_cqe == cq->toggle_cqe) 719 cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK; 720 721 cq->latest_cqe = qed_chain_consume(&cq->pbl); 722 } 723 724 static inline int qedr_align_cq_entries(int entries) 725 { 726 u64 size, aligned_size; 727 728 /* We allocate an extra entry that we don't report to the FW. */ 729 size = (entries + 1) * QEDR_CQE_SIZE; 730 aligned_size = ALIGN(size, PAGE_SIZE); 731 732 return aligned_size / QEDR_CQE_SIZE; 733 } 734 735 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx, 736 struct qedr_dev *dev, 737 struct qedr_userq *q, 738 u64 buf_addr, size_t buf_len, 739 int access, int dmasync, 740 int alloc_and_init) 741 { 742 u32 fw_pages; 743 int rc; 744 745 q->buf_addr = buf_addr; 746 q->buf_len = buf_len; 747 q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync); 748 if (IS_ERR(q->umem)) { 749 DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n", 750 PTR_ERR(q->umem)); 751 return PTR_ERR(q->umem); 752 } 753 754 fw_pages = ib_umem_page_count(q->umem) << 755 (q->umem->page_shift - FW_PAGE_SHIFT); 756 757 rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0); 758 if (rc) 759 goto err0; 760 761 if (alloc_and_init) { 762 q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL); 763 if (IS_ERR(q->pbl_tbl)) { 764 rc = PTR_ERR(q->pbl_tbl); 765 goto err0; 766 } 767 qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info, 768 FW_PAGE_SHIFT); 769 } else { 770 q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL); 771 if (!q->pbl_tbl) { 772 rc = -ENOMEM; 773 goto err0; 774 } 775 } 776 777 return 0; 778 779 err0: 780 ib_umem_release(q->umem); 781 q->umem = NULL; 782 783 return rc; 784 } 785 786 static inline void qedr_init_cq_params(struct qedr_cq *cq, 787 struct qedr_ucontext *ctx, 788 struct qedr_dev *dev, int vector, 789 int chain_entries, int page_cnt, 790 u64 pbl_ptr, 791 struct qed_rdma_create_cq_in_params 792 *params) 793 { 794 memset(params, 0, sizeof(*params)); 795 params->cq_handle_hi = upper_32_bits((uintptr_t)cq); 796 params->cq_handle_lo = lower_32_bits((uintptr_t)cq); 797 params->cnq_id = vector; 798 params->cq_size = chain_entries - 1; 799 params->dpi = (ctx) ? ctx->dpi : dev->dpi; 800 params->pbl_num_pages = page_cnt; 801 params->pbl_ptr = pbl_ptr; 802 params->pbl_two_level = 0; 803 } 804 805 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags) 806 { 807 cq->db.data.agg_flags = flags; 808 cq->db.data.value = cpu_to_le32(cons); 809 writeq(cq->db.raw, cq->db_addr); 810 811 /* Make sure write would stick */ 812 mmiowb(); 813 } 814 815 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 816 { 817 struct qedr_cq *cq = get_qedr_cq(ibcq); 818 unsigned long sflags; 819 struct qedr_dev *dev; 820 821 dev = get_qedr_dev(ibcq->device); 822 823 if (cq->destroyed) { 824 DP_ERR(dev, 825 "warning: arm was invoked after destroy for cq %p (icid=%d)\n", 826 cq, cq->icid); 827 return -EINVAL; 828 } 829 830 831 if (cq->cq_type == QEDR_CQ_TYPE_GSI) 832 return 0; 833 834 spin_lock_irqsave(&cq->cq_lock, sflags); 835 836 cq->arm_flags = 0; 837 838 if (flags & IB_CQ_SOLICITED) 839 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD; 840 841 if (flags & IB_CQ_NEXT_COMP) 842 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD; 843 844 doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags); 845 846 spin_unlock_irqrestore(&cq->cq_lock, sflags); 847 848 return 0; 849 } 850 851 struct ib_cq *qedr_create_cq(struct ib_device *ibdev, 852 const struct ib_cq_init_attr *attr, 853 struct ib_ucontext *ib_ctx, struct ib_udata *udata) 854 { 855 struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx); 856 struct qed_rdma_destroy_cq_out_params destroy_oparams; 857 struct qed_rdma_destroy_cq_in_params destroy_iparams; 858 struct qedr_dev *dev = get_qedr_dev(ibdev); 859 struct qed_rdma_create_cq_in_params params; 860 struct qedr_create_cq_ureq ureq; 861 int vector = attr->comp_vector; 862 int entries = attr->cqe; 863 struct qedr_cq *cq; 864 int chain_entries; 865 int page_cnt; 866 u64 pbl_ptr; 867 u16 icid; 868 int rc; 869 870 DP_DEBUG(dev, QEDR_MSG_INIT, 871 "create_cq: called from %s. entries=%d, vector=%d\n", 872 udata ? "User Lib" : "Kernel", entries, vector); 873 874 if (entries > QEDR_MAX_CQES) { 875 DP_ERR(dev, 876 "create cq: the number of entries %d is too high. Must be equal or below %d.\n", 877 entries, QEDR_MAX_CQES); 878 return ERR_PTR(-EINVAL); 879 } 880 881 chain_entries = qedr_align_cq_entries(entries); 882 chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES); 883 884 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 885 if (!cq) 886 return ERR_PTR(-ENOMEM); 887 888 if (udata) { 889 memset(&ureq, 0, sizeof(ureq)); 890 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { 891 DP_ERR(dev, 892 "create cq: problem copying data from user space\n"); 893 goto err0; 894 } 895 896 if (!ureq.len) { 897 DP_ERR(dev, 898 "create cq: cannot create a cq with 0 entries\n"); 899 goto err0; 900 } 901 902 cq->cq_type = QEDR_CQ_TYPE_USER; 903 904 rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr, 905 ureq.len, IB_ACCESS_LOCAL_WRITE, 906 1, 1); 907 if (rc) 908 goto err0; 909 910 pbl_ptr = cq->q.pbl_tbl->pa; 911 page_cnt = cq->q.pbl_info.num_pbes; 912 913 cq->ibcq.cqe = chain_entries; 914 } else { 915 cq->cq_type = QEDR_CQ_TYPE_KERNEL; 916 917 rc = dev->ops->common->chain_alloc(dev->cdev, 918 QED_CHAIN_USE_TO_CONSUME, 919 QED_CHAIN_MODE_PBL, 920 QED_CHAIN_CNT_TYPE_U32, 921 chain_entries, 922 sizeof(union rdma_cqe), 923 &cq->pbl, NULL); 924 if (rc) 925 goto err1; 926 927 page_cnt = qed_chain_get_page_cnt(&cq->pbl); 928 pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl); 929 cq->ibcq.cqe = cq->pbl.capacity; 930 } 931 932 qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt, 933 pbl_ptr, ¶ms); 934 935 rc = dev->ops->rdma_create_cq(dev->rdma_ctx, ¶ms, &icid); 936 if (rc) 937 goto err2; 938 939 cq->icid = icid; 940 cq->sig = QEDR_CQ_MAGIC_NUMBER; 941 spin_lock_init(&cq->cq_lock); 942 943 if (ib_ctx) { 944 rc = qedr_copy_cq_uresp(dev, cq, udata); 945 if (rc) 946 goto err3; 947 } else { 948 /* Generate doorbell address. */ 949 cq->db_addr = dev->db_addr + 950 DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); 951 cq->db.data.icid = cq->icid; 952 cq->db.data.params = DB_AGG_CMD_SET << 953 RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT; 954 955 /* point to the very last element, passing it we will toggle */ 956 cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl); 957 cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK; 958 cq->latest_cqe = NULL; 959 consume_cqe(cq); 960 cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl); 961 } 962 963 DP_DEBUG(dev, QEDR_MSG_CQ, 964 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n", 965 cq->icid, cq, params.cq_size); 966 967 return &cq->ibcq; 968 969 err3: 970 destroy_iparams.icid = cq->icid; 971 dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams, 972 &destroy_oparams); 973 err2: 974 if (udata) 975 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); 976 else 977 dev->ops->common->chain_free(dev->cdev, &cq->pbl); 978 err1: 979 if (udata) 980 ib_umem_release(cq->q.umem); 981 err0: 982 kfree(cq); 983 return ERR_PTR(-EINVAL); 984 } 985 986 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) 987 { 988 struct qedr_dev *dev = get_qedr_dev(ibcq->device); 989 struct qedr_cq *cq = get_qedr_cq(ibcq); 990 991 DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq); 992 993 return 0; 994 } 995 996 #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10) 997 #define QEDR_DESTROY_CQ_ITER_DURATION (10) 998 999 int qedr_destroy_cq(struct ib_cq *ibcq) 1000 { 1001 struct qedr_dev *dev = get_qedr_dev(ibcq->device); 1002 struct qed_rdma_destroy_cq_out_params oparams; 1003 struct qed_rdma_destroy_cq_in_params iparams; 1004 struct qedr_cq *cq = get_qedr_cq(ibcq); 1005 int iter; 1006 int rc; 1007 1008 DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid); 1009 1010 cq->destroyed = 1; 1011 1012 /* GSIs CQs are handled by driver, so they don't exist in the FW */ 1013 if (cq->cq_type == QEDR_CQ_TYPE_GSI) 1014 goto done; 1015 1016 iparams.icid = cq->icid; 1017 rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); 1018 if (rc) 1019 return rc; 1020 1021 dev->ops->common->chain_free(dev->cdev, &cq->pbl); 1022 1023 if (ibcq->uobject && ibcq->uobject->context) { 1024 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); 1025 ib_umem_release(cq->q.umem); 1026 } 1027 1028 /* We don't want the IRQ handler to handle a non-existing CQ so we 1029 * wait until all CNQ interrupts, if any, are received. This will always 1030 * happen and will always happen very fast. If not, then a serious error 1031 * has occured. That is why we can use a long delay. 1032 * We spin for a short time so we don’t lose time on context switching 1033 * in case all the completions are handled in that span. Otherwise 1034 * we sleep for a while and check again. Since the CNQ may be 1035 * associated with (only) the current CPU we use msleep to allow the 1036 * current CPU to be freed. 1037 * The CNQ notification is increased in qedr_irq_handler(). 1038 */ 1039 iter = QEDR_DESTROY_CQ_MAX_ITERATIONS; 1040 while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) { 1041 udelay(QEDR_DESTROY_CQ_ITER_DURATION); 1042 iter--; 1043 } 1044 1045 iter = QEDR_DESTROY_CQ_MAX_ITERATIONS; 1046 while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) { 1047 msleep(QEDR_DESTROY_CQ_ITER_DURATION); 1048 iter--; 1049 } 1050 1051 if (oparams.num_cq_notif != cq->cnq_notif) 1052 goto err; 1053 1054 /* Note that we don't need to have explicit code to wait for the 1055 * completion of the event handler because it is invoked from the EQ. 1056 * Since the destroy CQ ramrod has also been received on the EQ we can 1057 * be certain that there's no event handler in process. 1058 */ 1059 done: 1060 cq->sig = ~cq->sig; 1061 1062 kfree(cq); 1063 1064 return 0; 1065 1066 err: 1067 DP_ERR(dev, 1068 "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n", 1069 cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif); 1070 1071 return -EINVAL; 1072 } 1073 1074 static inline int get_gid_info_from_table(struct ib_qp *ibqp, 1075 struct ib_qp_attr *attr, 1076 int attr_mask, 1077 struct qed_rdma_modify_qp_in_params 1078 *qp_params) 1079 { 1080 enum rdma_network_type nw_type; 1081 struct ib_gid_attr gid_attr; 1082 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); 1083 union ib_gid gid; 1084 u32 ipv4_addr; 1085 int rc = 0; 1086 int i; 1087 1088 rc = ib_get_cached_gid(ibqp->device, 1089 rdma_ah_get_port_num(&attr->ah_attr), 1090 grh->sgid_index, &gid, &gid_attr); 1091 if (rc) 1092 return rc; 1093 1094 qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev); 1095 1096 dev_put(gid_attr.ndev); 1097 nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid); 1098 switch (nw_type) { 1099 case RDMA_NETWORK_IPV6: 1100 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], 1101 sizeof(qp_params->sgid)); 1102 memcpy(&qp_params->dgid.bytes[0], 1103 &grh->dgid, 1104 sizeof(qp_params->dgid)); 1105 qp_params->roce_mode = ROCE_V2_IPV6; 1106 SET_FIELD(qp_params->modify_flags, 1107 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); 1108 break; 1109 case RDMA_NETWORK_IB: 1110 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], 1111 sizeof(qp_params->sgid)); 1112 memcpy(&qp_params->dgid.bytes[0], 1113 &grh->dgid, 1114 sizeof(qp_params->dgid)); 1115 qp_params->roce_mode = ROCE_V1; 1116 break; 1117 case RDMA_NETWORK_IPV4: 1118 memset(&qp_params->sgid, 0, sizeof(qp_params->sgid)); 1119 memset(&qp_params->dgid, 0, sizeof(qp_params->dgid)); 1120 ipv4_addr = qedr_get_ipv4_from_gid(gid.raw); 1121 qp_params->sgid.ipv4_addr = ipv4_addr; 1122 ipv4_addr = 1123 qedr_get_ipv4_from_gid(grh->dgid.raw); 1124 qp_params->dgid.ipv4_addr = ipv4_addr; 1125 SET_FIELD(qp_params->modify_flags, 1126 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); 1127 qp_params->roce_mode = ROCE_V2_IPV4; 1128 break; 1129 } 1130 1131 for (i = 0; i < 4; i++) { 1132 qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]); 1133 qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]); 1134 } 1135 1136 if (qp_params->vlan_id >= VLAN_CFI_MASK) 1137 qp_params->vlan_id = 0; 1138 1139 return 0; 1140 } 1141 1142 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, 1143 struct ib_qp_init_attr *attrs) 1144 { 1145 struct qedr_device_attr *qattr = &dev->attr; 1146 1147 /* QP0... attrs->qp_type == IB_QPT_GSI */ 1148 if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) { 1149 DP_DEBUG(dev, QEDR_MSG_QP, 1150 "create qp: unsupported qp type=0x%x requested\n", 1151 attrs->qp_type); 1152 return -EINVAL; 1153 } 1154 1155 if (attrs->cap.max_send_wr > qattr->max_sqe) { 1156 DP_ERR(dev, 1157 "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n", 1158 attrs->cap.max_send_wr, qattr->max_sqe); 1159 return -EINVAL; 1160 } 1161 1162 if (attrs->cap.max_inline_data > qattr->max_inline) { 1163 DP_ERR(dev, 1164 "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n", 1165 attrs->cap.max_inline_data, qattr->max_inline); 1166 return -EINVAL; 1167 } 1168 1169 if (attrs->cap.max_send_sge > qattr->max_sge) { 1170 DP_ERR(dev, 1171 "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n", 1172 attrs->cap.max_send_sge, qattr->max_sge); 1173 return -EINVAL; 1174 } 1175 1176 if (attrs->cap.max_recv_sge > qattr->max_sge) { 1177 DP_ERR(dev, 1178 "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n", 1179 attrs->cap.max_recv_sge, qattr->max_sge); 1180 return -EINVAL; 1181 } 1182 1183 /* Unprivileged user space cannot create special QP */ 1184 if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { 1185 DP_ERR(dev, 1186 "create qp: userspace can't create special QPs of type=0x%x\n", 1187 attrs->qp_type); 1188 return -EINVAL; 1189 } 1190 1191 return 0; 1192 } 1193 1194 static void qedr_copy_rq_uresp(struct qedr_dev *dev, 1195 struct qedr_create_qp_uresp *uresp, 1196 struct qedr_qp *qp) 1197 { 1198 /* iWARP requires two doorbells per RQ. */ 1199 if (rdma_protocol_iwarp(&dev->ibdev, 1)) { 1200 uresp->rq_db_offset = 1201 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); 1202 uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); 1203 } else { 1204 uresp->rq_db_offset = 1205 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); 1206 } 1207 1208 uresp->rq_icid = qp->icid; 1209 } 1210 1211 static void qedr_copy_sq_uresp(struct qedr_dev *dev, 1212 struct qedr_create_qp_uresp *uresp, 1213 struct qedr_qp *qp) 1214 { 1215 uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); 1216 1217 /* iWARP uses the same cid for rq and sq */ 1218 if (rdma_protocol_iwarp(&dev->ibdev, 1)) 1219 uresp->sq_icid = qp->icid; 1220 else 1221 uresp->sq_icid = qp->icid + 1; 1222 } 1223 1224 static int qedr_copy_qp_uresp(struct qedr_dev *dev, 1225 struct qedr_qp *qp, struct ib_udata *udata) 1226 { 1227 struct qedr_create_qp_uresp uresp; 1228 int rc; 1229 1230 memset(&uresp, 0, sizeof(uresp)); 1231 qedr_copy_sq_uresp(dev, &uresp, qp); 1232 qedr_copy_rq_uresp(dev, &uresp, qp); 1233 1234 uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; 1235 uresp.qp_id = qp->qp_id; 1236 1237 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 1238 if (rc) 1239 DP_ERR(dev, 1240 "create qp: failed a copy to user space with qp icid=0x%x.\n", 1241 qp->icid); 1242 1243 return rc; 1244 } 1245 1246 static void qedr_set_common_qp_params(struct qedr_dev *dev, 1247 struct qedr_qp *qp, 1248 struct qedr_pd *pd, 1249 struct ib_qp_init_attr *attrs) 1250 { 1251 spin_lock_init(&qp->q_lock); 1252 atomic_set(&qp->refcnt, 1); 1253 qp->pd = pd; 1254 qp->qp_type = attrs->qp_type; 1255 qp->max_inline_data = attrs->cap.max_inline_data; 1256 qp->sq.max_sges = attrs->cap.max_send_sge; 1257 qp->state = QED_ROCE_QP_STATE_RESET; 1258 qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; 1259 qp->sq_cq = get_qedr_cq(attrs->send_cq); 1260 qp->rq_cq = get_qedr_cq(attrs->recv_cq); 1261 qp->dev = dev; 1262 qp->rq.max_sges = attrs->cap.max_recv_sge; 1263 1264 DP_DEBUG(dev, QEDR_MSG_QP, 1265 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n", 1266 qp->rq.max_sges, qp->rq_cq->icid); 1267 DP_DEBUG(dev, QEDR_MSG_QP, 1268 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n", 1269 pd->pd_id, qp->qp_type, qp->max_inline_data, 1270 qp->state, qp->signaled, (attrs->srq) ? 1 : 0); 1271 DP_DEBUG(dev, QEDR_MSG_QP, 1272 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n", 1273 qp->sq.max_sges, qp->sq_cq->icid); 1274 } 1275 1276 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) 1277 { 1278 qp->sq.db = dev->db_addr + 1279 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); 1280 qp->sq.db_data.data.icid = qp->icid + 1; 1281 qp->rq.db = dev->db_addr + 1282 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); 1283 qp->rq.db_data.data.icid = qp->icid; 1284 } 1285 1286 static inline void 1287 qedr_init_common_qp_in_params(struct qedr_dev *dev, 1288 struct qedr_pd *pd, 1289 struct qedr_qp *qp, 1290 struct ib_qp_init_attr *attrs, 1291 bool fmr_and_reserved_lkey, 1292 struct qed_rdma_create_qp_in_params *params) 1293 { 1294 /* QP handle to be written in an async event */ 1295 params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp); 1296 params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp); 1297 1298 params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR); 1299 params->fmr_and_reserved_lkey = fmr_and_reserved_lkey; 1300 params->pd = pd->pd_id; 1301 params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; 1302 params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid; 1303 params->stats_queue = 0; 1304 params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; 1305 params->srq_id = 0; 1306 params->use_srq = false; 1307 } 1308 1309 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) 1310 { 1311 DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. " 1312 "qp=%p. " 1313 "sq_addr=0x%llx, " 1314 "sq_len=%zd, " 1315 "rq_addr=0x%llx, " 1316 "rq_len=%zd" 1317 "\n", 1318 qp, 1319 qp->usq.buf_addr, 1320 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len); 1321 } 1322 1323 static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id) 1324 { 1325 int rc; 1326 1327 if (!rdma_protocol_iwarp(&dev->ibdev, 1)) 1328 return 0; 1329 1330 idr_preload(GFP_KERNEL); 1331 spin_lock_irq(&dev->idr_lock); 1332 1333 rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC); 1334 1335 spin_unlock_irq(&dev->idr_lock); 1336 idr_preload_end(); 1337 1338 return rc < 0 ? rc : 0; 1339 } 1340 1341 static void qedr_idr_remove(struct qedr_dev *dev, u32 id) 1342 { 1343 if (!rdma_protocol_iwarp(&dev->ibdev, 1)) 1344 return; 1345 1346 spin_lock_irq(&dev->idr_lock); 1347 idr_remove(&dev->qpidr, id); 1348 spin_unlock_irq(&dev->idr_lock); 1349 } 1350 1351 static inline void 1352 qedr_iwarp_populate_user_qp(struct qedr_dev *dev, 1353 struct qedr_qp *qp, 1354 struct qed_rdma_create_qp_out_params *out_params) 1355 { 1356 qp->usq.pbl_tbl->va = out_params->sq_pbl_virt; 1357 qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys; 1358 1359 qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl, 1360 &qp->usq.pbl_info, FW_PAGE_SHIFT); 1361 1362 qp->urq.pbl_tbl->va = out_params->rq_pbl_virt; 1363 qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys; 1364 1365 qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl, 1366 &qp->urq.pbl_info, FW_PAGE_SHIFT); 1367 } 1368 1369 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) 1370 { 1371 if (qp->usq.umem) 1372 ib_umem_release(qp->usq.umem); 1373 qp->usq.umem = NULL; 1374 1375 if (qp->urq.umem) 1376 ib_umem_release(qp->urq.umem); 1377 qp->urq.umem = NULL; 1378 } 1379 1380 static int qedr_create_user_qp(struct qedr_dev *dev, 1381 struct qedr_qp *qp, 1382 struct ib_pd *ibpd, 1383 struct ib_udata *udata, 1384 struct ib_qp_init_attr *attrs) 1385 { 1386 struct qed_rdma_create_qp_in_params in_params; 1387 struct qed_rdma_create_qp_out_params out_params; 1388 struct qedr_pd *pd = get_qedr_pd(ibpd); 1389 struct ib_ucontext *ib_ctx = NULL; 1390 struct qedr_create_qp_ureq ureq; 1391 int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); 1392 int rc = -EINVAL; 1393 1394 ib_ctx = ibpd->uobject->context; 1395 1396 memset(&ureq, 0, sizeof(ureq)); 1397 rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); 1398 if (rc) { 1399 DP_ERR(dev, "Problem copying data from user space\n"); 1400 return rc; 1401 } 1402 1403 /* SQ - read access only (0), dma sync not required (0) */ 1404 rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr, 1405 ureq.sq_len, 0, 0, alloc_and_init); 1406 if (rc) 1407 return rc; 1408 1409 /* RQ - read access only (0), dma sync not required (0) */ 1410 rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr, 1411 ureq.rq_len, 0, 0, alloc_and_init); 1412 if (rc) 1413 return rc; 1414 1415 memset(&in_params, 0, sizeof(in_params)); 1416 qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params); 1417 in_params.qp_handle_lo = ureq.qp_handle_lo; 1418 in_params.qp_handle_hi = ureq.qp_handle_hi; 1419 in_params.sq_num_pages = qp->usq.pbl_info.num_pbes; 1420 in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa; 1421 in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; 1422 in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; 1423 1424 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, 1425 &in_params, &out_params); 1426 1427 if (!qp->qed_qp) { 1428 rc = -ENOMEM; 1429 goto err1; 1430 } 1431 1432 if (rdma_protocol_iwarp(&dev->ibdev, 1)) 1433 qedr_iwarp_populate_user_qp(dev, qp, &out_params); 1434 1435 qp->qp_id = out_params.qp_id; 1436 qp->icid = out_params.icid; 1437 1438 rc = qedr_copy_qp_uresp(dev, qp, udata); 1439 if (rc) 1440 goto err; 1441 1442 qedr_qp_user_print(dev, qp); 1443 1444 return 0; 1445 err: 1446 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); 1447 if (rc) 1448 DP_ERR(dev, "create qp: fatal fault. rc=%d", rc); 1449 1450 err1: 1451 qedr_cleanup_user(dev, qp); 1452 return rc; 1453 } 1454 1455 static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) 1456 { 1457 qp->sq.db = dev->db_addr + 1458 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); 1459 qp->sq.db_data.data.icid = qp->icid; 1460 1461 qp->rq.db = dev->db_addr + 1462 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); 1463 qp->rq.db_data.data.icid = qp->icid; 1464 qp->rq.iwarp_db2 = dev->db_addr + 1465 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); 1466 qp->rq.iwarp_db2_data.data.icid = qp->icid; 1467 qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD; 1468 } 1469 1470 static int 1471 qedr_roce_create_kernel_qp(struct qedr_dev *dev, 1472 struct qedr_qp *qp, 1473 struct qed_rdma_create_qp_in_params *in_params, 1474 u32 n_sq_elems, u32 n_rq_elems) 1475 { 1476 struct qed_rdma_create_qp_out_params out_params; 1477 int rc; 1478 1479 rc = dev->ops->common->chain_alloc(dev->cdev, 1480 QED_CHAIN_USE_TO_PRODUCE, 1481 QED_CHAIN_MODE_PBL, 1482 QED_CHAIN_CNT_TYPE_U32, 1483 n_sq_elems, 1484 QEDR_SQE_ELEMENT_SIZE, 1485 &qp->sq.pbl, NULL); 1486 1487 if (rc) 1488 return rc; 1489 1490 in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl); 1491 in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl); 1492 1493 rc = dev->ops->common->chain_alloc(dev->cdev, 1494 QED_CHAIN_USE_TO_CONSUME_PRODUCE, 1495 QED_CHAIN_MODE_PBL, 1496 QED_CHAIN_CNT_TYPE_U32, 1497 n_rq_elems, 1498 QEDR_RQE_ELEMENT_SIZE, 1499 &qp->rq.pbl, NULL); 1500 if (rc) 1501 return rc; 1502 1503 in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl); 1504 in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl); 1505 1506 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, 1507 in_params, &out_params); 1508 1509 if (!qp->qed_qp) 1510 return -EINVAL; 1511 1512 qp->qp_id = out_params.qp_id; 1513 qp->icid = out_params.icid; 1514 1515 qedr_set_roce_db_info(dev, qp); 1516 return rc; 1517 } 1518 1519 static int 1520 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev, 1521 struct qedr_qp *qp, 1522 struct qed_rdma_create_qp_in_params *in_params, 1523 u32 n_sq_elems, u32 n_rq_elems) 1524 { 1525 struct qed_rdma_create_qp_out_params out_params; 1526 struct qed_chain_ext_pbl ext_pbl; 1527 int rc; 1528 1529 in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems, 1530 QEDR_SQE_ELEMENT_SIZE, 1531 QED_CHAIN_MODE_PBL); 1532 in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems, 1533 QEDR_RQE_ELEMENT_SIZE, 1534 QED_CHAIN_MODE_PBL); 1535 1536 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, 1537 in_params, &out_params); 1538 1539 if (!qp->qed_qp) 1540 return -EINVAL; 1541 1542 /* Now we allocate the chain */ 1543 ext_pbl.p_pbl_virt = out_params.sq_pbl_virt; 1544 ext_pbl.p_pbl_phys = out_params.sq_pbl_phys; 1545 1546 rc = dev->ops->common->chain_alloc(dev->cdev, 1547 QED_CHAIN_USE_TO_PRODUCE, 1548 QED_CHAIN_MODE_PBL, 1549 QED_CHAIN_CNT_TYPE_U32, 1550 n_sq_elems, 1551 QEDR_SQE_ELEMENT_SIZE, 1552 &qp->sq.pbl, &ext_pbl); 1553 1554 if (rc) 1555 goto err; 1556 1557 ext_pbl.p_pbl_virt = out_params.rq_pbl_virt; 1558 ext_pbl.p_pbl_phys = out_params.rq_pbl_phys; 1559 1560 rc = dev->ops->common->chain_alloc(dev->cdev, 1561 QED_CHAIN_USE_TO_CONSUME_PRODUCE, 1562 QED_CHAIN_MODE_PBL, 1563 QED_CHAIN_CNT_TYPE_U32, 1564 n_rq_elems, 1565 QEDR_RQE_ELEMENT_SIZE, 1566 &qp->rq.pbl, &ext_pbl); 1567 1568 if (rc) 1569 goto err; 1570 1571 qp->qp_id = out_params.qp_id; 1572 qp->icid = out_params.icid; 1573 1574 qedr_set_iwarp_db_info(dev, qp); 1575 return rc; 1576 1577 err: 1578 dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); 1579 1580 return rc; 1581 } 1582 1583 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp) 1584 { 1585 dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl); 1586 kfree(qp->wqe_wr_id); 1587 1588 dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl); 1589 kfree(qp->rqe_wr_id); 1590 } 1591 1592 static int qedr_create_kernel_qp(struct qedr_dev *dev, 1593 struct qedr_qp *qp, 1594 struct ib_pd *ibpd, 1595 struct ib_qp_init_attr *attrs) 1596 { 1597 struct qed_rdma_create_qp_in_params in_params; 1598 struct qedr_pd *pd = get_qedr_pd(ibpd); 1599 int rc = -EINVAL; 1600 u32 n_rq_elems; 1601 u32 n_sq_elems; 1602 u32 n_sq_entries; 1603 1604 memset(&in_params, 0, sizeof(in_params)); 1605 1606 /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in 1607 * the ring. The ring should allow at least a single WR, even if the 1608 * user requested none, due to allocation issues. 1609 * We should add an extra WR since the prod and cons indices of 1610 * wqe_wr_id are managed in such a way that the WQ is considered full 1611 * when (prod+1)%max_wr==cons. We currently don't do that because we 1612 * double the number of entries due an iSER issue that pushes far more 1613 * WRs than indicated. If we decline its ib_post_send() then we get 1614 * error prints in the dmesg we'd like to avoid. 1615 */ 1616 qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier, 1617 dev->attr.max_sqe); 1618 1619 qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id), 1620 GFP_KERNEL); 1621 if (!qp->wqe_wr_id) { 1622 DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n"); 1623 return -ENOMEM; 1624 } 1625 1626 /* QP handle to be written in CQE */ 1627 in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp); 1628 in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp); 1629 1630 /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in 1631 * the ring. There ring should allow at least a single WR, even if the 1632 * user requested none, due to allocation issues. 1633 */ 1634 qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1); 1635 1636 /* Allocate driver internal RQ array */ 1637 qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id), 1638 GFP_KERNEL); 1639 if (!qp->rqe_wr_id) { 1640 DP_ERR(dev, 1641 "create qp: failed RQ shadow memory allocation\n"); 1642 kfree(qp->wqe_wr_id); 1643 return -ENOMEM; 1644 } 1645 1646 qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params); 1647 1648 n_sq_entries = attrs->cap.max_send_wr; 1649 n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe); 1650 n_sq_entries = max_t(u32, n_sq_entries, 1); 1651 n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE; 1652 1653 n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE; 1654 1655 if (rdma_protocol_iwarp(&dev->ibdev, 1)) 1656 rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params, 1657 n_sq_elems, n_rq_elems); 1658 else 1659 rc = qedr_roce_create_kernel_qp(dev, qp, &in_params, 1660 n_sq_elems, n_rq_elems); 1661 if (rc) 1662 qedr_cleanup_kernel(dev, qp); 1663 1664 return rc; 1665 } 1666 1667 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, 1668 struct ib_qp_init_attr *attrs, 1669 struct ib_udata *udata) 1670 { 1671 struct qedr_dev *dev = get_qedr_dev(ibpd->device); 1672 struct qedr_pd *pd = get_qedr_pd(ibpd); 1673 struct qedr_qp *qp; 1674 struct ib_qp *ibqp; 1675 int rc = 0; 1676 1677 DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n", 1678 udata ? "user library" : "kernel", pd); 1679 1680 rc = qedr_check_qp_attrs(ibpd, dev, attrs); 1681 if (rc) 1682 return ERR_PTR(rc); 1683 1684 if (attrs->srq) 1685 return ERR_PTR(-EINVAL); 1686 1687 DP_DEBUG(dev, QEDR_MSG_QP, 1688 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n", 1689 udata ? "user library" : "kernel", attrs->event_handler, pd, 1690 get_qedr_cq(attrs->send_cq), 1691 get_qedr_cq(attrs->send_cq)->icid, 1692 get_qedr_cq(attrs->recv_cq), 1693 get_qedr_cq(attrs->recv_cq)->icid); 1694 1695 qp = kzalloc(sizeof(*qp), GFP_KERNEL); 1696 if (!qp) { 1697 DP_ERR(dev, "create qp: failed allocating memory\n"); 1698 return ERR_PTR(-ENOMEM); 1699 } 1700 1701 qedr_set_common_qp_params(dev, qp, pd, attrs); 1702 1703 if (attrs->qp_type == IB_QPT_GSI) { 1704 ibqp = qedr_create_gsi_qp(dev, attrs, qp); 1705 if (IS_ERR(ibqp)) 1706 kfree(qp); 1707 return ibqp; 1708 } 1709 1710 if (udata) 1711 rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs); 1712 else 1713 rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs); 1714 1715 if (rc) 1716 goto err; 1717 1718 qp->ibqp.qp_num = qp->qp_id; 1719 1720 rc = qedr_idr_add(dev, qp, qp->qp_id); 1721 if (rc) 1722 goto err; 1723 1724 return &qp->ibqp; 1725 1726 err: 1727 kfree(qp); 1728 1729 return ERR_PTR(-EFAULT); 1730 } 1731 1732 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state) 1733 { 1734 switch (qp_state) { 1735 case QED_ROCE_QP_STATE_RESET: 1736 return IB_QPS_RESET; 1737 case QED_ROCE_QP_STATE_INIT: 1738 return IB_QPS_INIT; 1739 case QED_ROCE_QP_STATE_RTR: 1740 return IB_QPS_RTR; 1741 case QED_ROCE_QP_STATE_RTS: 1742 return IB_QPS_RTS; 1743 case QED_ROCE_QP_STATE_SQD: 1744 return IB_QPS_SQD; 1745 case QED_ROCE_QP_STATE_ERR: 1746 return IB_QPS_ERR; 1747 case QED_ROCE_QP_STATE_SQE: 1748 return IB_QPS_SQE; 1749 } 1750 return IB_QPS_ERR; 1751 } 1752 1753 static enum qed_roce_qp_state qedr_get_state_from_ibqp( 1754 enum ib_qp_state qp_state) 1755 { 1756 switch (qp_state) { 1757 case IB_QPS_RESET: 1758 return QED_ROCE_QP_STATE_RESET; 1759 case IB_QPS_INIT: 1760 return QED_ROCE_QP_STATE_INIT; 1761 case IB_QPS_RTR: 1762 return QED_ROCE_QP_STATE_RTR; 1763 case IB_QPS_RTS: 1764 return QED_ROCE_QP_STATE_RTS; 1765 case IB_QPS_SQD: 1766 return QED_ROCE_QP_STATE_SQD; 1767 case IB_QPS_ERR: 1768 return QED_ROCE_QP_STATE_ERR; 1769 default: 1770 return QED_ROCE_QP_STATE_ERR; 1771 } 1772 } 1773 1774 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph) 1775 { 1776 qed_chain_reset(&qph->pbl); 1777 qph->prod = 0; 1778 qph->cons = 0; 1779 qph->wqe_cons = 0; 1780 qph->db_data.data.value = cpu_to_le16(0); 1781 } 1782 1783 static int qedr_update_qp_state(struct qedr_dev *dev, 1784 struct qedr_qp *qp, 1785 enum qed_roce_qp_state cur_state, 1786 enum qed_roce_qp_state new_state) 1787 { 1788 int status = 0; 1789 1790 if (new_state == cur_state) 1791 return 0; 1792 1793 switch (cur_state) { 1794 case QED_ROCE_QP_STATE_RESET: 1795 switch (new_state) { 1796 case QED_ROCE_QP_STATE_INIT: 1797 qp->prev_wqe_size = 0; 1798 qedr_reset_qp_hwq_info(&qp->sq); 1799 qedr_reset_qp_hwq_info(&qp->rq); 1800 break; 1801 default: 1802 status = -EINVAL; 1803 break; 1804 }; 1805 break; 1806 case QED_ROCE_QP_STATE_INIT: 1807 switch (new_state) { 1808 case QED_ROCE_QP_STATE_RTR: 1809 /* Update doorbell (in case post_recv was 1810 * done before move to RTR) 1811 */ 1812 1813 if (rdma_protocol_roce(&dev->ibdev, 1)) { 1814 writel(qp->rq.db_data.raw, qp->rq.db); 1815 /* Make sure write takes effect */ 1816 mmiowb(); 1817 } 1818 break; 1819 case QED_ROCE_QP_STATE_ERR: 1820 break; 1821 default: 1822 /* Invalid state change. */ 1823 status = -EINVAL; 1824 break; 1825 }; 1826 break; 1827 case QED_ROCE_QP_STATE_RTR: 1828 /* RTR->XXX */ 1829 switch (new_state) { 1830 case QED_ROCE_QP_STATE_RTS: 1831 break; 1832 case QED_ROCE_QP_STATE_ERR: 1833 break; 1834 default: 1835 /* Invalid state change. */ 1836 status = -EINVAL; 1837 break; 1838 }; 1839 break; 1840 case QED_ROCE_QP_STATE_RTS: 1841 /* RTS->XXX */ 1842 switch (new_state) { 1843 case QED_ROCE_QP_STATE_SQD: 1844 break; 1845 case QED_ROCE_QP_STATE_ERR: 1846 break; 1847 default: 1848 /* Invalid state change. */ 1849 status = -EINVAL; 1850 break; 1851 }; 1852 break; 1853 case QED_ROCE_QP_STATE_SQD: 1854 /* SQD->XXX */ 1855 switch (new_state) { 1856 case QED_ROCE_QP_STATE_RTS: 1857 case QED_ROCE_QP_STATE_ERR: 1858 break; 1859 default: 1860 /* Invalid state change. */ 1861 status = -EINVAL; 1862 break; 1863 }; 1864 break; 1865 case QED_ROCE_QP_STATE_ERR: 1866 /* ERR->XXX */ 1867 switch (new_state) { 1868 case QED_ROCE_QP_STATE_RESET: 1869 if ((qp->rq.prod != qp->rq.cons) || 1870 (qp->sq.prod != qp->sq.cons)) { 1871 DP_NOTICE(dev, 1872 "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n", 1873 qp->rq.prod, qp->rq.cons, qp->sq.prod, 1874 qp->sq.cons); 1875 status = -EINVAL; 1876 } 1877 break; 1878 default: 1879 status = -EINVAL; 1880 break; 1881 }; 1882 break; 1883 default: 1884 status = -EINVAL; 1885 break; 1886 }; 1887 1888 return status; 1889 } 1890 1891 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 1892 int attr_mask, struct ib_udata *udata) 1893 { 1894 struct qedr_qp *qp = get_qedr_qp(ibqp); 1895 struct qed_rdma_modify_qp_in_params qp_params = { 0 }; 1896 struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev); 1897 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); 1898 enum ib_qp_state old_qp_state, new_qp_state; 1899 enum qed_roce_qp_state cur_state; 1900 int rc = 0; 1901 1902 DP_DEBUG(dev, QEDR_MSG_QP, 1903 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask, 1904 attr->qp_state); 1905 1906 old_qp_state = qedr_get_ibqp_state(qp->state); 1907 if (attr_mask & IB_QP_STATE) 1908 new_qp_state = attr->qp_state; 1909 else 1910 new_qp_state = old_qp_state; 1911 1912 if (rdma_protocol_roce(&dev->ibdev, 1)) { 1913 if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state, 1914 ibqp->qp_type, attr_mask, 1915 IB_LINK_LAYER_ETHERNET)) { 1916 DP_ERR(dev, 1917 "modify qp: invalid attribute mask=0x%x specified for\n" 1918 "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n", 1919 attr_mask, qp->qp_id, ibqp->qp_type, 1920 old_qp_state, new_qp_state); 1921 rc = -EINVAL; 1922 goto err; 1923 } 1924 } 1925 1926 /* Translate the masks... */ 1927 if (attr_mask & IB_QP_STATE) { 1928 SET_FIELD(qp_params.modify_flags, 1929 QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1); 1930 qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state); 1931 } 1932 1933 if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) 1934 qp_params.sqd_async = true; 1935 1936 if (attr_mask & IB_QP_PKEY_INDEX) { 1937 SET_FIELD(qp_params.modify_flags, 1938 QED_ROCE_MODIFY_QP_VALID_PKEY, 1); 1939 if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) { 1940 rc = -EINVAL; 1941 goto err; 1942 } 1943 1944 qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT; 1945 } 1946 1947 if (attr_mask & IB_QP_QKEY) 1948 qp->qkey = attr->qkey; 1949 1950 if (attr_mask & IB_QP_ACCESS_FLAGS) { 1951 SET_FIELD(qp_params.modify_flags, 1952 QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1); 1953 qp_params.incoming_rdma_read_en = attr->qp_access_flags & 1954 IB_ACCESS_REMOTE_READ; 1955 qp_params.incoming_rdma_write_en = attr->qp_access_flags & 1956 IB_ACCESS_REMOTE_WRITE; 1957 qp_params.incoming_atomic_en = attr->qp_access_flags & 1958 IB_ACCESS_REMOTE_ATOMIC; 1959 } 1960 1961 if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) { 1962 if (attr_mask & IB_QP_PATH_MTU) { 1963 if (attr->path_mtu < IB_MTU_256 || 1964 attr->path_mtu > IB_MTU_4096) { 1965 pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n"); 1966 rc = -EINVAL; 1967 goto err; 1968 } 1969 qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu), 1970 ib_mtu_enum_to_int(iboe_get_mtu 1971 (dev->ndev->mtu))); 1972 } 1973 1974 if (!qp->mtu) { 1975 qp->mtu = 1976 ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu)); 1977 pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu); 1978 } 1979 1980 SET_FIELD(qp_params.modify_flags, 1981 QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1); 1982 1983 qp_params.traffic_class_tos = grh->traffic_class; 1984 qp_params.flow_label = grh->flow_label; 1985 qp_params.hop_limit_ttl = grh->hop_limit; 1986 1987 qp->sgid_idx = grh->sgid_index; 1988 1989 rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params); 1990 if (rc) { 1991 DP_ERR(dev, 1992 "modify qp: problems with GID index %d (rc=%d)\n", 1993 grh->sgid_index, rc); 1994 return rc; 1995 } 1996 1997 rc = qedr_get_dmac(dev, &attr->ah_attr, 1998 qp_params.remote_mac_addr); 1999 if (rc) 2000 return rc; 2001 2002 qp_params.use_local_mac = true; 2003 ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr); 2004 2005 DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n", 2006 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1], 2007 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]); 2008 DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n", 2009 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1], 2010 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]); 2011 DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n", 2012 qp_params.remote_mac_addr); 2013 2014 qp_params.mtu = qp->mtu; 2015 qp_params.lb_indication = false; 2016 } 2017 2018 if (!qp_params.mtu) { 2019 /* Stay with current MTU */ 2020 if (qp->mtu) 2021 qp_params.mtu = qp->mtu; 2022 else 2023 qp_params.mtu = 2024 ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu)); 2025 } 2026 2027 if (attr_mask & IB_QP_TIMEOUT) { 2028 SET_FIELD(qp_params.modify_flags, 2029 QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1); 2030 2031 /* The received timeout value is an exponent used like this: 2032 * "12.7.34 LOCAL ACK TIMEOUT 2033 * Value representing the transport (ACK) timeout for use by 2034 * the remote, expressed as: 4.096 * 2^timeout [usec]" 2035 * The FW expects timeout in msec so we need to divide the usec 2036 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2, 2037 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8). 2038 * The value of zero means infinite so we use a 'max_t' to make 2039 * sure that sub 1 msec values will be configured as 1 msec. 2040 */ 2041 if (attr->timeout) 2042 qp_params.ack_timeout = 2043 1 << max_t(int, attr->timeout - 8, 0); 2044 else 2045 qp_params.ack_timeout = 0; 2046 } 2047 2048 if (attr_mask & IB_QP_RETRY_CNT) { 2049 SET_FIELD(qp_params.modify_flags, 2050 QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1); 2051 qp_params.retry_cnt = attr->retry_cnt; 2052 } 2053 2054 if (attr_mask & IB_QP_RNR_RETRY) { 2055 SET_FIELD(qp_params.modify_flags, 2056 QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1); 2057 qp_params.rnr_retry_cnt = attr->rnr_retry; 2058 } 2059 2060 if (attr_mask & IB_QP_RQ_PSN) { 2061 SET_FIELD(qp_params.modify_flags, 2062 QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1); 2063 qp_params.rq_psn = attr->rq_psn; 2064 qp->rq_psn = attr->rq_psn; 2065 } 2066 2067 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { 2068 if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) { 2069 rc = -EINVAL; 2070 DP_ERR(dev, 2071 "unsupported max_rd_atomic=%d, supported=%d\n", 2072 attr->max_rd_atomic, 2073 dev->attr.max_qp_req_rd_atomic_resc); 2074 goto err; 2075 } 2076 2077 SET_FIELD(qp_params.modify_flags, 2078 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1); 2079 qp_params.max_rd_atomic_req = attr->max_rd_atomic; 2080 } 2081 2082 if (attr_mask & IB_QP_MIN_RNR_TIMER) { 2083 SET_FIELD(qp_params.modify_flags, 2084 QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1); 2085 qp_params.min_rnr_nak_timer = attr->min_rnr_timer; 2086 } 2087 2088 if (attr_mask & IB_QP_SQ_PSN) { 2089 SET_FIELD(qp_params.modify_flags, 2090 QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1); 2091 qp_params.sq_psn = attr->sq_psn; 2092 qp->sq_psn = attr->sq_psn; 2093 } 2094 2095 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { 2096 if (attr->max_dest_rd_atomic > 2097 dev->attr.max_qp_resp_rd_atomic_resc) { 2098 DP_ERR(dev, 2099 "unsupported max_dest_rd_atomic=%d, supported=%d\n", 2100 attr->max_dest_rd_atomic, 2101 dev->attr.max_qp_resp_rd_atomic_resc); 2102 2103 rc = -EINVAL; 2104 goto err; 2105 } 2106 2107 SET_FIELD(qp_params.modify_flags, 2108 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1); 2109 qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic; 2110 } 2111 2112 if (attr_mask & IB_QP_DEST_QPN) { 2113 SET_FIELD(qp_params.modify_flags, 2114 QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1); 2115 2116 qp_params.dest_qp = attr->dest_qp_num; 2117 qp->dest_qp_num = attr->dest_qp_num; 2118 } 2119 2120 cur_state = qp->state; 2121 2122 /* Update the QP state before the actual ramrod to prevent a race with 2123 * fast path. Modifying the QP state to error will cause the device to 2124 * flush the CQEs and while polling the flushed CQEs will considered as 2125 * a potential issue if the QP isn't in error state. 2126 */ 2127 if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI && 2128 !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR) 2129 qp->state = QED_ROCE_QP_STATE_ERR; 2130 2131 if (qp->qp_type != IB_QPT_GSI) 2132 rc = dev->ops->rdma_modify_qp(dev->rdma_ctx, 2133 qp->qed_qp, &qp_params); 2134 2135 if (attr_mask & IB_QP_STATE) { 2136 if ((qp->qp_type != IB_QPT_GSI) && (!udata)) 2137 rc = qedr_update_qp_state(dev, qp, cur_state, 2138 qp_params.new_state); 2139 qp->state = qp_params.new_state; 2140 } 2141 2142 err: 2143 return rc; 2144 } 2145 2146 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params) 2147 { 2148 int ib_qp_acc_flags = 0; 2149 2150 if (params->incoming_rdma_write_en) 2151 ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE; 2152 if (params->incoming_rdma_read_en) 2153 ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ; 2154 if (params->incoming_atomic_en) 2155 ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC; 2156 ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE; 2157 return ib_qp_acc_flags; 2158 } 2159 2160 int qedr_query_qp(struct ib_qp *ibqp, 2161 struct ib_qp_attr *qp_attr, 2162 int attr_mask, struct ib_qp_init_attr *qp_init_attr) 2163 { 2164 struct qed_rdma_query_qp_out_params params; 2165 struct qedr_qp *qp = get_qedr_qp(ibqp); 2166 struct qedr_dev *dev = qp->dev; 2167 int rc = 0; 2168 2169 memset(¶ms, 0, sizeof(params)); 2170 2171 rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, ¶ms); 2172 if (rc) 2173 goto err; 2174 2175 memset(qp_attr, 0, sizeof(*qp_attr)); 2176 memset(qp_init_attr, 0, sizeof(*qp_init_attr)); 2177 2178 qp_attr->qp_state = qedr_get_ibqp_state(params.state); 2179 qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state); 2180 qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu); 2181 qp_attr->path_mig_state = IB_MIG_MIGRATED; 2182 qp_attr->rq_psn = params.rq_psn; 2183 qp_attr->sq_psn = params.sq_psn; 2184 qp_attr->dest_qp_num = params.dest_qp; 2185 2186 qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(¶ms); 2187 2188 qp_attr->cap.max_send_wr = qp->sq.max_wr; 2189 qp_attr->cap.max_recv_wr = qp->rq.max_wr; 2190 qp_attr->cap.max_send_sge = qp->sq.max_sges; 2191 qp_attr->cap.max_recv_sge = qp->rq.max_sges; 2192 qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE; 2193 qp_init_attr->cap = qp_attr->cap; 2194 2195 qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; 2196 rdma_ah_set_grh(&qp_attr->ah_attr, NULL, 2197 params.flow_label, qp->sgid_idx, 2198 params.hop_limit_ttl, params.traffic_class_tos); 2199 rdma_ah_set_dgid_raw(&qp_attr->ah_attr, ¶ms.dgid.bytes[0]); 2200 rdma_ah_set_port_num(&qp_attr->ah_attr, 1); 2201 rdma_ah_set_sl(&qp_attr->ah_attr, 0); 2202 qp_attr->timeout = params.timeout; 2203 qp_attr->rnr_retry = params.rnr_retry; 2204 qp_attr->retry_cnt = params.retry_cnt; 2205 qp_attr->min_rnr_timer = params.min_rnr_nak_timer; 2206 qp_attr->pkey_index = params.pkey_index; 2207 qp_attr->port_num = 1; 2208 rdma_ah_set_path_bits(&qp_attr->ah_attr, 0); 2209 rdma_ah_set_static_rate(&qp_attr->ah_attr, 0); 2210 qp_attr->alt_pkey_index = 0; 2211 qp_attr->alt_port_num = 0; 2212 qp_attr->alt_timeout = 0; 2213 memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr)); 2214 2215 qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0; 2216 qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic; 2217 qp_attr->max_rd_atomic = params.max_rd_atomic; 2218 qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0; 2219 2220 DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n", 2221 qp_attr->cap.max_inline_data); 2222 2223 err: 2224 return rc; 2225 } 2226 2227 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp) 2228 { 2229 int rc = 0; 2230 2231 if (qp->qp_type != IB_QPT_GSI) { 2232 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); 2233 if (rc) 2234 return rc; 2235 } 2236 2237 if (qp->ibqp.uobject && qp->ibqp.uobject->context) 2238 qedr_cleanup_user(dev, qp); 2239 else 2240 qedr_cleanup_kernel(dev, qp); 2241 2242 return 0; 2243 } 2244 2245 int qedr_destroy_qp(struct ib_qp *ibqp) 2246 { 2247 struct qedr_qp *qp = get_qedr_qp(ibqp); 2248 struct qedr_dev *dev = qp->dev; 2249 struct ib_qp_attr attr; 2250 int attr_mask = 0; 2251 int rc = 0; 2252 2253 DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n", 2254 qp, qp->qp_type); 2255 2256 if (rdma_protocol_roce(&dev->ibdev, 1)) { 2257 if ((qp->state != QED_ROCE_QP_STATE_RESET) && 2258 (qp->state != QED_ROCE_QP_STATE_ERR) && 2259 (qp->state != QED_ROCE_QP_STATE_INIT)) { 2260 2261 attr.qp_state = IB_QPS_ERR; 2262 attr_mask |= IB_QP_STATE; 2263 2264 /* Change the QP state to ERROR */ 2265 qedr_modify_qp(ibqp, &attr, attr_mask, NULL); 2266 } 2267 } else { 2268 /* Wait for the connect/accept to complete */ 2269 if (qp->ep) { 2270 int wait_count = 1; 2271 2272 while (qp->ep->during_connect) { 2273 DP_DEBUG(dev, QEDR_MSG_QP, 2274 "Still in during connect/accept\n"); 2275 2276 msleep(100); 2277 if (wait_count++ > 200) { 2278 DP_NOTICE(dev, 2279 "during connect timeout\n"); 2280 break; 2281 } 2282 } 2283 } 2284 } 2285 2286 if (qp->qp_type == IB_QPT_GSI) 2287 qedr_destroy_gsi_qp(dev); 2288 2289 qedr_free_qp_resources(dev, qp); 2290 2291 if (atomic_dec_and_test(&qp->refcnt)) { 2292 qedr_idr_remove(dev, qp->qp_id); 2293 kfree(qp); 2294 } 2295 return rc; 2296 } 2297 2298 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, 2299 struct ib_udata *udata) 2300 { 2301 struct qedr_ah *ah; 2302 2303 ah = kzalloc(sizeof(*ah), GFP_ATOMIC); 2304 if (!ah) 2305 return ERR_PTR(-ENOMEM); 2306 2307 ah->attr = *attr; 2308 2309 return &ah->ibah; 2310 } 2311 2312 int qedr_destroy_ah(struct ib_ah *ibah) 2313 { 2314 struct qedr_ah *ah = get_qedr_ah(ibah); 2315 2316 kfree(ah); 2317 return 0; 2318 } 2319 2320 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info) 2321 { 2322 struct qedr_pbl *pbl, *tmp; 2323 2324 if (info->pbl_table) 2325 list_add_tail(&info->pbl_table->list_entry, 2326 &info->free_pbl_list); 2327 2328 if (!list_empty(&info->inuse_pbl_list)) 2329 list_splice(&info->inuse_pbl_list, &info->free_pbl_list); 2330 2331 list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) { 2332 list_del(&pbl->list_entry); 2333 qedr_free_pbl(dev, &info->pbl_info, pbl); 2334 } 2335 } 2336 2337 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info, 2338 size_t page_list_len, bool two_layered) 2339 { 2340 struct qedr_pbl *tmp; 2341 int rc; 2342 2343 INIT_LIST_HEAD(&info->free_pbl_list); 2344 INIT_LIST_HEAD(&info->inuse_pbl_list); 2345 2346 rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info, 2347 page_list_len, two_layered); 2348 if (rc) 2349 goto done; 2350 2351 info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL); 2352 if (IS_ERR(info->pbl_table)) { 2353 rc = PTR_ERR(info->pbl_table); 2354 goto done; 2355 } 2356 2357 DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n", 2358 &info->pbl_table->pa); 2359 2360 /* in usual case we use 2 PBLs, so we add one to free 2361 * list and allocating another one 2362 */ 2363 tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL); 2364 if (IS_ERR(tmp)) { 2365 DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n"); 2366 goto done; 2367 } 2368 2369 list_add_tail(&tmp->list_entry, &info->free_pbl_list); 2370 2371 DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa); 2372 2373 done: 2374 if (rc) 2375 free_mr_info(dev, info); 2376 2377 return rc; 2378 } 2379 2380 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, 2381 u64 usr_addr, int acc, struct ib_udata *udata) 2382 { 2383 struct qedr_dev *dev = get_qedr_dev(ibpd->device); 2384 struct qedr_mr *mr; 2385 struct qedr_pd *pd; 2386 int rc = -ENOMEM; 2387 2388 pd = get_qedr_pd(ibpd); 2389 DP_DEBUG(dev, QEDR_MSG_MR, 2390 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n", 2391 pd->pd_id, start, len, usr_addr, acc); 2392 2393 if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) 2394 return ERR_PTR(-EINVAL); 2395 2396 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 2397 if (!mr) 2398 return ERR_PTR(rc); 2399 2400 mr->type = QEDR_MR_USER; 2401 2402 mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0); 2403 if (IS_ERR(mr->umem)) { 2404 rc = -EFAULT; 2405 goto err0; 2406 } 2407 2408 rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1); 2409 if (rc) 2410 goto err1; 2411 2412 qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table, 2413 &mr->info.pbl_info, mr->umem->page_shift); 2414 2415 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); 2416 if (rc) { 2417 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc); 2418 goto err1; 2419 } 2420 2421 /* Index only, 18 bit long, lkey = itid << 8 | key */ 2422 mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR; 2423 mr->hw_mr.key = 0; 2424 mr->hw_mr.pd = pd->pd_id; 2425 mr->hw_mr.local_read = 1; 2426 mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; 2427 mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; 2428 mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; 2429 mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; 2430 mr->hw_mr.mw_bind = false; 2431 mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa; 2432 mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; 2433 mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); 2434 mr->hw_mr.page_size_log = mr->umem->page_shift; 2435 mr->hw_mr.fbo = ib_umem_offset(mr->umem); 2436 mr->hw_mr.length = len; 2437 mr->hw_mr.vaddr = usr_addr; 2438 mr->hw_mr.zbva = false; 2439 mr->hw_mr.phy_mr = false; 2440 mr->hw_mr.dma_mr = false; 2441 2442 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); 2443 if (rc) { 2444 DP_ERR(dev, "roce register tid returned an error %d\n", rc); 2445 goto err2; 2446 } 2447 2448 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; 2449 if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || 2450 mr->hw_mr.remote_atomic) 2451 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; 2452 2453 DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n", 2454 mr->ibmr.lkey); 2455 return &mr->ibmr; 2456 2457 err2: 2458 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); 2459 err1: 2460 qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); 2461 err0: 2462 kfree(mr); 2463 return ERR_PTR(rc); 2464 } 2465 2466 int qedr_dereg_mr(struct ib_mr *ib_mr) 2467 { 2468 struct qedr_mr *mr = get_qedr_mr(ib_mr); 2469 struct qedr_dev *dev = get_qedr_dev(ib_mr->device); 2470 int rc = 0; 2471 2472 rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid); 2473 if (rc) 2474 return rc; 2475 2476 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); 2477 2478 if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR)) 2479 qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); 2480 2481 /* it could be user registered memory. */ 2482 if (mr->umem) 2483 ib_umem_release(mr->umem); 2484 2485 kfree(mr); 2486 2487 return rc; 2488 } 2489 2490 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, 2491 int max_page_list_len) 2492 { 2493 struct qedr_pd *pd = get_qedr_pd(ibpd); 2494 struct qedr_dev *dev = get_qedr_dev(ibpd->device); 2495 struct qedr_mr *mr; 2496 int rc = -ENOMEM; 2497 2498 DP_DEBUG(dev, QEDR_MSG_MR, 2499 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id, 2500 max_page_list_len); 2501 2502 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 2503 if (!mr) 2504 return ERR_PTR(rc); 2505 2506 mr->dev = dev; 2507 mr->type = QEDR_MR_FRMR; 2508 2509 rc = init_mr_info(dev, &mr->info, max_page_list_len, 1); 2510 if (rc) 2511 goto err0; 2512 2513 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); 2514 if (rc) { 2515 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc); 2516 goto err0; 2517 } 2518 2519 /* Index only, 18 bit long, lkey = itid << 8 | key */ 2520 mr->hw_mr.tid_type = QED_RDMA_TID_FMR; 2521 mr->hw_mr.key = 0; 2522 mr->hw_mr.pd = pd->pd_id; 2523 mr->hw_mr.local_read = 1; 2524 mr->hw_mr.local_write = 0; 2525 mr->hw_mr.remote_read = 0; 2526 mr->hw_mr.remote_write = 0; 2527 mr->hw_mr.remote_atomic = 0; 2528 mr->hw_mr.mw_bind = false; 2529 mr->hw_mr.pbl_ptr = 0; 2530 mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; 2531 mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); 2532 mr->hw_mr.fbo = 0; 2533 mr->hw_mr.length = 0; 2534 mr->hw_mr.vaddr = 0; 2535 mr->hw_mr.zbva = false; 2536 mr->hw_mr.phy_mr = true; 2537 mr->hw_mr.dma_mr = false; 2538 2539 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); 2540 if (rc) { 2541 DP_ERR(dev, "roce register tid returned an error %d\n", rc); 2542 goto err1; 2543 } 2544 2545 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; 2546 mr->ibmr.rkey = mr->ibmr.lkey; 2547 2548 DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey); 2549 return mr; 2550 2551 err1: 2552 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); 2553 err0: 2554 kfree(mr); 2555 return ERR_PTR(rc); 2556 } 2557 2558 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, 2559 enum ib_mr_type mr_type, u32 max_num_sg) 2560 { 2561 struct qedr_mr *mr; 2562 2563 if (mr_type != IB_MR_TYPE_MEM_REG) 2564 return ERR_PTR(-EINVAL); 2565 2566 mr = __qedr_alloc_mr(ibpd, max_num_sg); 2567 2568 if (IS_ERR(mr)) 2569 return ERR_PTR(-EINVAL); 2570 2571 return &mr->ibmr; 2572 } 2573 2574 static int qedr_set_page(struct ib_mr *ibmr, u64 addr) 2575 { 2576 struct qedr_mr *mr = get_qedr_mr(ibmr); 2577 struct qedr_pbl *pbl_table; 2578 struct regpair *pbe; 2579 u32 pbes_in_page; 2580 2581 if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) { 2582 DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages); 2583 return -ENOMEM; 2584 } 2585 2586 DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n", 2587 mr->npages, addr); 2588 2589 pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64); 2590 pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page); 2591 pbe = (struct regpair *)pbl_table->va; 2592 pbe += mr->npages % pbes_in_page; 2593 pbe->lo = cpu_to_le32((u32)addr); 2594 pbe->hi = cpu_to_le32((u32)upper_32_bits(addr)); 2595 2596 mr->npages++; 2597 2598 return 0; 2599 } 2600 2601 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info) 2602 { 2603 int work = info->completed - info->completed_handled - 1; 2604 2605 DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work); 2606 while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) { 2607 struct qedr_pbl *pbl; 2608 2609 /* Free all the page list that are possible to be freed 2610 * (all the ones that were invalidated), under the assumption 2611 * that if an FMR was completed successfully that means that 2612 * if there was an invalidate operation before it also ended 2613 */ 2614 pbl = list_first_entry(&info->inuse_pbl_list, 2615 struct qedr_pbl, list_entry); 2616 list_move_tail(&pbl->list_entry, &info->free_pbl_list); 2617 info->completed_handled++; 2618 } 2619 } 2620 2621 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 2622 int sg_nents, unsigned int *sg_offset) 2623 { 2624 struct qedr_mr *mr = get_qedr_mr(ibmr); 2625 2626 mr->npages = 0; 2627 2628 handle_completed_mrs(mr->dev, &mr->info); 2629 return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page); 2630 } 2631 2632 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc) 2633 { 2634 struct qedr_dev *dev = get_qedr_dev(ibpd->device); 2635 struct qedr_pd *pd = get_qedr_pd(ibpd); 2636 struct qedr_mr *mr; 2637 int rc; 2638 2639 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 2640 if (!mr) 2641 return ERR_PTR(-ENOMEM); 2642 2643 mr->type = QEDR_MR_DMA; 2644 2645 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); 2646 if (rc) { 2647 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc); 2648 goto err1; 2649 } 2650 2651 /* index only, 18 bit long, lkey = itid << 8 | key */ 2652 mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR; 2653 mr->hw_mr.pd = pd->pd_id; 2654 mr->hw_mr.local_read = 1; 2655 mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; 2656 mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; 2657 mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; 2658 mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; 2659 mr->hw_mr.dma_mr = true; 2660 2661 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); 2662 if (rc) { 2663 DP_ERR(dev, "roce register tid returned an error %d\n", rc); 2664 goto err2; 2665 } 2666 2667 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; 2668 if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || 2669 mr->hw_mr.remote_atomic) 2670 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; 2671 2672 DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey); 2673 return &mr->ibmr; 2674 2675 err2: 2676 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); 2677 err1: 2678 kfree(mr); 2679 return ERR_PTR(rc); 2680 } 2681 2682 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq) 2683 { 2684 return (((wq->prod + 1) % wq->max_wr) == wq->cons); 2685 } 2686 2687 static int sge_data_len(struct ib_sge *sg_list, int num_sge) 2688 { 2689 int i, len = 0; 2690 2691 for (i = 0; i < num_sge; i++) 2692 len += sg_list[i].length; 2693 2694 return len; 2695 } 2696 2697 static void swap_wqe_data64(u64 *p) 2698 { 2699 int i; 2700 2701 for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++) 2702 *p = cpu_to_be64(cpu_to_le64(*p)); 2703 } 2704 2705 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev, 2706 struct qedr_qp *qp, u8 *wqe_size, 2707 struct ib_send_wr *wr, 2708 struct ib_send_wr **bad_wr, u8 *bits, 2709 u8 bit) 2710 { 2711 u32 data_size = sge_data_len(wr->sg_list, wr->num_sge); 2712 char *seg_prt, *wqe; 2713 int i, seg_siz; 2714 2715 if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) { 2716 DP_ERR(dev, "Too much inline data in WR: %d\n", data_size); 2717 *bad_wr = wr; 2718 return 0; 2719 } 2720 2721 if (!data_size) 2722 return data_size; 2723 2724 *bits |= bit; 2725 2726 seg_prt = NULL; 2727 wqe = NULL; 2728 seg_siz = 0; 2729 2730 /* Copy data inline */ 2731 for (i = 0; i < wr->num_sge; i++) { 2732 u32 len = wr->sg_list[i].length; 2733 void *src = (void *)(uintptr_t)wr->sg_list[i].addr; 2734 2735 while (len > 0) { 2736 u32 cur; 2737 2738 /* New segment required */ 2739 if (!seg_siz) { 2740 wqe = (char *)qed_chain_produce(&qp->sq.pbl); 2741 seg_prt = wqe; 2742 seg_siz = sizeof(struct rdma_sq_common_wqe); 2743 (*wqe_size)++; 2744 } 2745 2746 /* Calculate currently allowed length */ 2747 cur = min_t(u32, len, seg_siz); 2748 memcpy(seg_prt, src, cur); 2749 2750 /* Update segment variables */ 2751 seg_prt += cur; 2752 seg_siz -= cur; 2753 2754 /* Update sge variables */ 2755 src += cur; 2756 len -= cur; 2757 2758 /* Swap fully-completed segments */ 2759 if (!seg_siz) 2760 swap_wqe_data64((u64 *)wqe); 2761 } 2762 } 2763 2764 /* swap last not completed segment */ 2765 if (seg_siz) 2766 swap_wqe_data64((u64 *)wqe); 2767 2768 return data_size; 2769 } 2770 2771 #define RQ_SGE_SET(sge, vaddr, vlength, vflags) \ 2772 do { \ 2773 DMA_REGPAIR_LE(sge->addr, vaddr); \ 2774 (sge)->length = cpu_to_le32(vlength); \ 2775 (sge)->flags = cpu_to_le32(vflags); \ 2776 } while (0) 2777 2778 #define SRQ_HDR_SET(hdr, vwr_id, num_sge) \ 2779 do { \ 2780 DMA_REGPAIR_LE(hdr->wr_id, vwr_id); \ 2781 (hdr)->num_sges = num_sge; \ 2782 } while (0) 2783 2784 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey) \ 2785 do { \ 2786 DMA_REGPAIR_LE(sge->addr, vaddr); \ 2787 (sge)->length = cpu_to_le32(vlength); \ 2788 (sge)->l_key = cpu_to_le32(vlkey); \ 2789 } while (0) 2790 2791 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size, 2792 struct ib_send_wr *wr) 2793 { 2794 u32 data_size = 0; 2795 int i; 2796 2797 for (i = 0; i < wr->num_sge; i++) { 2798 struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl); 2799 2800 DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr); 2801 sge->l_key = cpu_to_le32(wr->sg_list[i].lkey); 2802 sge->length = cpu_to_le32(wr->sg_list[i].length); 2803 data_size += wr->sg_list[i].length; 2804 } 2805 2806 if (wqe_size) 2807 *wqe_size += wr->num_sge; 2808 2809 return data_size; 2810 } 2811 2812 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev, 2813 struct qedr_qp *qp, 2814 struct rdma_sq_rdma_wqe_1st *rwqe, 2815 struct rdma_sq_rdma_wqe_2nd *rwqe2, 2816 struct ib_send_wr *wr, 2817 struct ib_send_wr **bad_wr) 2818 { 2819 rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey); 2820 DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr); 2821 2822 if (wr->send_flags & IB_SEND_INLINE && 2823 (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || 2824 wr->opcode == IB_WR_RDMA_WRITE)) { 2825 u8 flags = 0; 2826 2827 SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1); 2828 return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr, 2829 bad_wr, &rwqe->flags, flags); 2830 } 2831 2832 return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr); 2833 } 2834 2835 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev, 2836 struct qedr_qp *qp, 2837 struct rdma_sq_send_wqe_1st *swqe, 2838 struct rdma_sq_send_wqe_2st *swqe2, 2839 struct ib_send_wr *wr, 2840 struct ib_send_wr **bad_wr) 2841 { 2842 memset(swqe2, 0, sizeof(*swqe2)); 2843 if (wr->send_flags & IB_SEND_INLINE) { 2844 u8 flags = 0; 2845 2846 SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1); 2847 return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr, 2848 bad_wr, &swqe->flags, flags); 2849 } 2850 2851 return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr); 2852 } 2853 2854 static int qedr_prepare_reg(struct qedr_qp *qp, 2855 struct rdma_sq_fmr_wqe_1st *fwqe1, 2856 struct ib_reg_wr *wr) 2857 { 2858 struct qedr_mr *mr = get_qedr_mr(wr->mr); 2859 struct rdma_sq_fmr_wqe_2nd *fwqe2; 2860 2861 fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl); 2862 fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova); 2863 fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova); 2864 fwqe1->l_key = wr->key; 2865 2866 fwqe2->access_ctrl = 0; 2867 2868 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ, 2869 !!(wr->access & IB_ACCESS_REMOTE_READ)); 2870 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE, 2871 !!(wr->access & IB_ACCESS_REMOTE_WRITE)); 2872 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC, 2873 !!(wr->access & IB_ACCESS_REMOTE_ATOMIC)); 2874 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1); 2875 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE, 2876 !!(wr->access & IB_ACCESS_LOCAL_WRITE)); 2877 fwqe2->fmr_ctrl = 0; 2878 2879 SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG, 2880 ilog2(mr->ibmr.page_size) - 12); 2881 2882 fwqe2->length_hi = 0; 2883 fwqe2->length_lo = mr->ibmr.length; 2884 fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa); 2885 fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa); 2886 2887 qp->wqe_wr_id[qp->sq.prod].mr = mr; 2888 2889 return 0; 2890 } 2891 2892 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode) 2893 { 2894 switch (opcode) { 2895 case IB_WR_RDMA_WRITE: 2896 case IB_WR_RDMA_WRITE_WITH_IMM: 2897 return IB_WC_RDMA_WRITE; 2898 case IB_WR_SEND_WITH_IMM: 2899 case IB_WR_SEND: 2900 case IB_WR_SEND_WITH_INV: 2901 return IB_WC_SEND; 2902 case IB_WR_RDMA_READ: 2903 case IB_WR_RDMA_READ_WITH_INV: 2904 return IB_WC_RDMA_READ; 2905 case IB_WR_ATOMIC_CMP_AND_SWP: 2906 return IB_WC_COMP_SWAP; 2907 case IB_WR_ATOMIC_FETCH_AND_ADD: 2908 return IB_WC_FETCH_ADD; 2909 case IB_WR_REG_MR: 2910 return IB_WC_REG_MR; 2911 case IB_WR_LOCAL_INV: 2912 return IB_WC_LOCAL_INV; 2913 default: 2914 return IB_WC_SEND; 2915 } 2916 } 2917 2918 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr) 2919 { 2920 int wq_is_full, err_wr, pbl_is_full; 2921 struct qedr_dev *dev = qp->dev; 2922 2923 /* prevent SQ overflow and/or processing of a bad WR */ 2924 err_wr = wr->num_sge > qp->sq.max_sges; 2925 wq_is_full = qedr_wq_is_full(&qp->sq); 2926 pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) < 2927 QEDR_MAX_SQE_ELEMENTS_PER_SQE; 2928 if (wq_is_full || err_wr || pbl_is_full) { 2929 if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) { 2930 DP_ERR(dev, 2931 "error: WQ is full. Post send on QP %p failed (this error appears only once)\n", 2932 qp); 2933 qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL; 2934 } 2935 2936 if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) { 2937 DP_ERR(dev, 2938 "error: WR is bad. Post send on QP %p failed (this error appears only once)\n", 2939 qp); 2940 qp->err_bitmap |= QEDR_QP_ERR_BAD_SR; 2941 } 2942 2943 if (pbl_is_full && 2944 !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) { 2945 DP_ERR(dev, 2946 "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n", 2947 qp); 2948 qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL; 2949 } 2950 return false; 2951 } 2952 return true; 2953 } 2954 2955 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 2956 struct ib_send_wr **bad_wr) 2957 { 2958 struct qedr_dev *dev = get_qedr_dev(ibqp->device); 2959 struct qedr_qp *qp = get_qedr_qp(ibqp); 2960 struct rdma_sq_atomic_wqe_1st *awqe1; 2961 struct rdma_sq_atomic_wqe_2nd *awqe2; 2962 struct rdma_sq_atomic_wqe_3rd *awqe3; 2963 struct rdma_sq_send_wqe_2st *swqe2; 2964 struct rdma_sq_local_inv_wqe *iwqe; 2965 struct rdma_sq_rdma_wqe_2nd *rwqe2; 2966 struct rdma_sq_send_wqe_1st *swqe; 2967 struct rdma_sq_rdma_wqe_1st *rwqe; 2968 struct rdma_sq_fmr_wqe_1st *fwqe1; 2969 struct rdma_sq_common_wqe *wqe; 2970 u32 length; 2971 int rc = 0; 2972 bool comp; 2973 2974 if (!qedr_can_post_send(qp, wr)) { 2975 *bad_wr = wr; 2976 return -ENOMEM; 2977 } 2978 2979 wqe = qed_chain_produce(&qp->sq.pbl); 2980 qp->wqe_wr_id[qp->sq.prod].signaled = 2981 !!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled; 2982 2983 wqe->flags = 0; 2984 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG, 2985 !!(wr->send_flags & IB_SEND_SOLICITED)); 2986 comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled; 2987 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp); 2988 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG, 2989 !!(wr->send_flags & IB_SEND_FENCE)); 2990 wqe->prev_wqe_size = qp->prev_wqe_size; 2991 2992 qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode); 2993 2994 switch (wr->opcode) { 2995 case IB_WR_SEND_WITH_IMM: 2996 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) { 2997 rc = -EINVAL; 2998 *bad_wr = wr; 2999 break; 3000 } 3001 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM; 3002 swqe = (struct rdma_sq_send_wqe_1st *)wqe; 3003 swqe->wqe_size = 2; 3004 swqe2 = qed_chain_produce(&qp->sq.pbl); 3005 3006 swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); 3007 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2, 3008 wr, bad_wr); 3009 swqe->length = cpu_to_le32(length); 3010 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; 3011 qp->prev_wqe_size = swqe->wqe_size; 3012 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; 3013 break; 3014 case IB_WR_SEND: 3015 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND; 3016 swqe = (struct rdma_sq_send_wqe_1st *)wqe; 3017 3018 swqe->wqe_size = 2; 3019 swqe2 = qed_chain_produce(&qp->sq.pbl); 3020 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2, 3021 wr, bad_wr); 3022 swqe->length = cpu_to_le32(length); 3023 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; 3024 qp->prev_wqe_size = swqe->wqe_size; 3025 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; 3026 break; 3027 case IB_WR_SEND_WITH_INV: 3028 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE; 3029 swqe = (struct rdma_sq_send_wqe_1st *)wqe; 3030 swqe2 = qed_chain_produce(&qp->sq.pbl); 3031 swqe->wqe_size = 2; 3032 swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey); 3033 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2, 3034 wr, bad_wr); 3035 swqe->length = cpu_to_le32(length); 3036 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; 3037 qp->prev_wqe_size = swqe->wqe_size; 3038 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; 3039 break; 3040 3041 case IB_WR_RDMA_WRITE_WITH_IMM: 3042 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) { 3043 rc = -EINVAL; 3044 *bad_wr = wr; 3045 break; 3046 } 3047 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM; 3048 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; 3049 3050 rwqe->wqe_size = 2; 3051 rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data)); 3052 rwqe2 = qed_chain_produce(&qp->sq.pbl); 3053 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2, 3054 wr, bad_wr); 3055 rwqe->length = cpu_to_le32(length); 3056 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; 3057 qp->prev_wqe_size = rwqe->wqe_size; 3058 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; 3059 break; 3060 case IB_WR_RDMA_WRITE: 3061 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR; 3062 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; 3063 3064 rwqe->wqe_size = 2; 3065 rwqe2 = qed_chain_produce(&qp->sq.pbl); 3066 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2, 3067 wr, bad_wr); 3068 rwqe->length = cpu_to_le32(length); 3069 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; 3070 qp->prev_wqe_size = rwqe->wqe_size; 3071 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; 3072 break; 3073 case IB_WR_RDMA_READ_WITH_INV: 3074 SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1); 3075 /* fallthrough -- same is identical to RDMA READ */ 3076 3077 case IB_WR_RDMA_READ: 3078 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD; 3079 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; 3080 3081 rwqe->wqe_size = 2; 3082 rwqe2 = qed_chain_produce(&qp->sq.pbl); 3083 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2, 3084 wr, bad_wr); 3085 rwqe->length = cpu_to_le32(length); 3086 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; 3087 qp->prev_wqe_size = rwqe->wqe_size; 3088 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; 3089 break; 3090 3091 case IB_WR_ATOMIC_CMP_AND_SWP: 3092 case IB_WR_ATOMIC_FETCH_AND_ADD: 3093 awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe; 3094 awqe1->wqe_size = 4; 3095 3096 awqe2 = qed_chain_produce(&qp->sq.pbl); 3097 DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr); 3098 awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey); 3099 3100 awqe3 = qed_chain_produce(&qp->sq.pbl); 3101 3102 if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { 3103 wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD; 3104 DMA_REGPAIR_LE(awqe3->swap_data, 3105 atomic_wr(wr)->compare_add); 3106 } else { 3107 wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP; 3108 DMA_REGPAIR_LE(awqe3->swap_data, 3109 atomic_wr(wr)->swap); 3110 DMA_REGPAIR_LE(awqe3->cmp_data, 3111 atomic_wr(wr)->compare_add); 3112 } 3113 3114 qedr_prepare_sq_sges(qp, NULL, wr); 3115 3116 qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size; 3117 qp->prev_wqe_size = awqe1->wqe_size; 3118 break; 3119 3120 case IB_WR_LOCAL_INV: 3121 iwqe = (struct rdma_sq_local_inv_wqe *)wqe; 3122 iwqe->wqe_size = 1; 3123 3124 iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE; 3125 iwqe->inv_l_key = wr->ex.invalidate_rkey; 3126 qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size; 3127 qp->prev_wqe_size = iwqe->wqe_size; 3128 break; 3129 case IB_WR_REG_MR: 3130 DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n"); 3131 wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR; 3132 fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe; 3133 fwqe1->wqe_size = 2; 3134 3135 rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr)); 3136 if (rc) { 3137 DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc); 3138 *bad_wr = wr; 3139 break; 3140 } 3141 3142 qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size; 3143 qp->prev_wqe_size = fwqe1->wqe_size; 3144 break; 3145 default: 3146 DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode); 3147 rc = -EINVAL; 3148 *bad_wr = wr; 3149 break; 3150 } 3151 3152 if (*bad_wr) { 3153 u16 value; 3154 3155 /* Restore prod to its position before 3156 * this WR was processed 3157 */ 3158 value = le16_to_cpu(qp->sq.db_data.data.value); 3159 qed_chain_set_prod(&qp->sq.pbl, value, wqe); 3160 3161 /* Restore prev_wqe_size */ 3162 qp->prev_wqe_size = wqe->prev_wqe_size; 3163 rc = -EINVAL; 3164 DP_ERR(dev, "POST SEND FAILED\n"); 3165 } 3166 3167 return rc; 3168 } 3169 3170 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 3171 struct ib_send_wr **bad_wr) 3172 { 3173 struct qedr_dev *dev = get_qedr_dev(ibqp->device); 3174 struct qedr_qp *qp = get_qedr_qp(ibqp); 3175 unsigned long flags; 3176 int rc = 0; 3177 3178 *bad_wr = NULL; 3179 3180 if (qp->qp_type == IB_QPT_GSI) 3181 return qedr_gsi_post_send(ibqp, wr, bad_wr); 3182 3183 spin_lock_irqsave(&qp->q_lock, flags); 3184 3185 if (rdma_protocol_roce(&dev->ibdev, 1)) { 3186 if ((qp->state != QED_ROCE_QP_STATE_RTS) && 3187 (qp->state != QED_ROCE_QP_STATE_ERR) && 3188 (qp->state != QED_ROCE_QP_STATE_SQD)) { 3189 spin_unlock_irqrestore(&qp->q_lock, flags); 3190 *bad_wr = wr; 3191 DP_DEBUG(dev, QEDR_MSG_CQ, 3192 "QP in wrong state! QP icid=0x%x state %d\n", 3193 qp->icid, qp->state); 3194 return -EINVAL; 3195 } 3196 } 3197 3198 while (wr) { 3199 rc = __qedr_post_send(ibqp, wr, bad_wr); 3200 if (rc) 3201 break; 3202 3203 qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id; 3204 3205 qedr_inc_sw_prod(&qp->sq); 3206 3207 qp->sq.db_data.data.value++; 3208 3209 wr = wr->next; 3210 } 3211 3212 /* Trigger doorbell 3213 * If there was a failure in the first WR then it will be triggered in 3214 * vane. However this is not harmful (as long as the producer value is 3215 * unchanged). For performance reasons we avoid checking for this 3216 * redundant doorbell. 3217 * 3218 * qp->wqe_wr_id is accessed during qedr_poll_cq, as 3219 * soon as we give the doorbell, we could get a completion 3220 * for this wr, therefore we need to make sure that the 3221 * memory is updated before giving the doorbell. 3222 * During qedr_poll_cq, rmb is called before accessing the 3223 * cqe. This covers for the smp_rmb as well. 3224 */ 3225 smp_wmb(); 3226 writel(qp->sq.db_data.raw, qp->sq.db); 3227 3228 /* Make sure write sticks */ 3229 mmiowb(); 3230 3231 spin_unlock_irqrestore(&qp->q_lock, flags); 3232 3233 return rc; 3234 } 3235 3236 int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 3237 struct ib_recv_wr **bad_wr) 3238 { 3239 struct qedr_qp *qp = get_qedr_qp(ibqp); 3240 struct qedr_dev *dev = qp->dev; 3241 unsigned long flags; 3242 int status = 0; 3243 3244 if (qp->qp_type == IB_QPT_GSI) 3245 return qedr_gsi_post_recv(ibqp, wr, bad_wr); 3246 3247 spin_lock_irqsave(&qp->q_lock, flags); 3248 3249 if (qp->state == QED_ROCE_QP_STATE_RESET) { 3250 spin_unlock_irqrestore(&qp->q_lock, flags); 3251 *bad_wr = wr; 3252 return -EINVAL; 3253 } 3254 3255 while (wr) { 3256 int i; 3257 3258 if (qed_chain_get_elem_left_u32(&qp->rq.pbl) < 3259 QEDR_MAX_RQE_ELEMENTS_PER_RQE || 3260 wr->num_sge > qp->rq.max_sges) { 3261 DP_ERR(dev, "Can't post WR (%d < %d) || (%d > %d)\n", 3262 qed_chain_get_elem_left_u32(&qp->rq.pbl), 3263 QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge, 3264 qp->rq.max_sges); 3265 status = -ENOMEM; 3266 *bad_wr = wr; 3267 break; 3268 } 3269 for (i = 0; i < wr->num_sge; i++) { 3270 u32 flags = 0; 3271 struct rdma_rq_sge *rqe = 3272 qed_chain_produce(&qp->rq.pbl); 3273 3274 /* First one must include the number 3275 * of SGE in the list 3276 */ 3277 if (!i) 3278 SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 3279 wr->num_sge); 3280 3281 SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 3282 wr->sg_list[i].lkey); 3283 3284 RQ_SGE_SET(rqe, wr->sg_list[i].addr, 3285 wr->sg_list[i].length, flags); 3286 } 3287 3288 /* Special case of no sges. FW requires between 1-4 sges... 3289 * in this case we need to post 1 sge with length zero. this is 3290 * because rdma write with immediate consumes an RQ. 3291 */ 3292 if (!wr->num_sge) { 3293 u32 flags = 0; 3294 struct rdma_rq_sge *rqe = 3295 qed_chain_produce(&qp->rq.pbl); 3296 3297 /* First one must include the number 3298 * of SGE in the list 3299 */ 3300 SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0); 3301 SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1); 3302 3303 RQ_SGE_SET(rqe, 0, 0, flags); 3304 i = 1; 3305 } 3306 3307 qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id; 3308 qp->rqe_wr_id[qp->rq.prod].wqe_size = i; 3309 3310 qedr_inc_sw_prod(&qp->rq); 3311 3312 /* qp->rqe_wr_id is accessed during qedr_poll_cq, as 3313 * soon as we give the doorbell, we could get a completion 3314 * for this wr, therefore we need to make sure that the 3315 * memory is update before giving the doorbell. 3316 * During qedr_poll_cq, rmb is called before accessing the 3317 * cqe. This covers for the smp_rmb as well. 3318 */ 3319 smp_wmb(); 3320 3321 qp->rq.db_data.data.value++; 3322 3323 writel(qp->rq.db_data.raw, qp->rq.db); 3324 3325 /* Make sure write sticks */ 3326 mmiowb(); 3327 3328 if (rdma_protocol_iwarp(&dev->ibdev, 1)) { 3329 writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2); 3330 mmiowb(); /* for second doorbell */ 3331 } 3332 3333 wr = wr->next; 3334 } 3335 3336 spin_unlock_irqrestore(&qp->q_lock, flags); 3337 3338 return status; 3339 } 3340 3341 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe) 3342 { 3343 struct rdma_cqe_requester *resp_cqe = &cqe->req; 3344 3345 return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) == 3346 cq->pbl_toggle; 3347 } 3348 3349 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe) 3350 { 3351 struct rdma_cqe_requester *resp_cqe = &cqe->req; 3352 struct qedr_qp *qp; 3353 3354 qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi, 3355 resp_cqe->qp_handle.lo, 3356 u64); 3357 return qp; 3358 } 3359 3360 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe) 3361 { 3362 struct rdma_cqe_requester *resp_cqe = &cqe->req; 3363 3364 return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE); 3365 } 3366 3367 /* Return latest CQE (needs processing) */ 3368 static union rdma_cqe *get_cqe(struct qedr_cq *cq) 3369 { 3370 return cq->latest_cqe; 3371 } 3372 3373 /* In fmr we need to increase the number of fmr completed counter for the fmr 3374 * algorithm determining whether we can free a pbl or not. 3375 * we need to perform this whether the work request was signaled or not. for 3376 * this purpose we call this function from the condition that checks if a wr 3377 * should be skipped, to make sure we don't miss it ( possibly this fmr 3378 * operation was not signalted) 3379 */ 3380 static inline void qedr_chk_if_fmr(struct qedr_qp *qp) 3381 { 3382 if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR) 3383 qp->wqe_wr_id[qp->sq.cons].mr->info.completed++; 3384 } 3385 3386 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp, 3387 struct qedr_cq *cq, int num_entries, 3388 struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status, 3389 int force) 3390 { 3391 u16 cnt = 0; 3392 3393 while (num_entries && qp->sq.wqe_cons != hw_cons) { 3394 if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) { 3395 qedr_chk_if_fmr(qp); 3396 /* skip WC */ 3397 goto next_cqe; 3398 } 3399 3400 /* fill WC */ 3401 wc->status = status; 3402 wc->vendor_err = 0; 3403 wc->wc_flags = 0; 3404 wc->src_qp = qp->id; 3405 wc->qp = &qp->ibqp; 3406 3407 wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id; 3408 wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode; 3409 3410 switch (wc->opcode) { 3411 case IB_WC_RDMA_WRITE: 3412 wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len; 3413 break; 3414 case IB_WC_COMP_SWAP: 3415 case IB_WC_FETCH_ADD: 3416 wc->byte_len = 8; 3417 break; 3418 case IB_WC_REG_MR: 3419 qp->wqe_wr_id[qp->sq.cons].mr->info.completed++; 3420 break; 3421 case IB_WC_RDMA_READ: 3422 case IB_WC_SEND: 3423 wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len; 3424 break; 3425 default: 3426 break; 3427 } 3428 3429 num_entries--; 3430 wc++; 3431 cnt++; 3432 next_cqe: 3433 while (qp->wqe_wr_id[qp->sq.cons].wqe_size--) 3434 qed_chain_consume(&qp->sq.pbl); 3435 qedr_inc_sw_cons(&qp->sq); 3436 } 3437 3438 return cnt; 3439 } 3440 3441 static int qedr_poll_cq_req(struct qedr_dev *dev, 3442 struct qedr_qp *qp, struct qedr_cq *cq, 3443 int num_entries, struct ib_wc *wc, 3444 struct rdma_cqe_requester *req) 3445 { 3446 int cnt = 0; 3447 3448 switch (req->status) { 3449 case RDMA_CQE_REQ_STS_OK: 3450 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons, 3451 IB_WC_SUCCESS, 0); 3452 break; 3453 case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR: 3454 if (qp->state != QED_ROCE_QP_STATE_ERR) 3455 DP_DEBUG(dev, QEDR_MSG_CQ, 3456 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3457 cq->icid, qp->icid); 3458 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons, 3459 IB_WC_WR_FLUSH_ERR, 1); 3460 break; 3461 default: 3462 /* process all WQE before the cosumer */ 3463 qp->state = QED_ROCE_QP_STATE_ERR; 3464 cnt = process_req(dev, qp, cq, num_entries, wc, 3465 req->sq_cons - 1, IB_WC_SUCCESS, 0); 3466 wc += cnt; 3467 /* if we have extra WC fill it with actual error info */ 3468 if (cnt < num_entries) { 3469 enum ib_wc_status wc_status; 3470 3471 switch (req->status) { 3472 case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR: 3473 DP_ERR(dev, 3474 "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3475 cq->icid, qp->icid); 3476 wc_status = IB_WC_BAD_RESP_ERR; 3477 break; 3478 case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR: 3479 DP_ERR(dev, 3480 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3481 cq->icid, qp->icid); 3482 wc_status = IB_WC_LOC_LEN_ERR; 3483 break; 3484 case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR: 3485 DP_ERR(dev, 3486 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3487 cq->icid, qp->icid); 3488 wc_status = IB_WC_LOC_QP_OP_ERR; 3489 break; 3490 case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR: 3491 DP_ERR(dev, 3492 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3493 cq->icid, qp->icid); 3494 wc_status = IB_WC_LOC_PROT_ERR; 3495 break; 3496 case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR: 3497 DP_ERR(dev, 3498 "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3499 cq->icid, qp->icid); 3500 wc_status = IB_WC_MW_BIND_ERR; 3501 break; 3502 case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR: 3503 DP_ERR(dev, 3504 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3505 cq->icid, qp->icid); 3506 wc_status = IB_WC_REM_INV_REQ_ERR; 3507 break; 3508 case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR: 3509 DP_ERR(dev, 3510 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3511 cq->icid, qp->icid); 3512 wc_status = IB_WC_REM_ACCESS_ERR; 3513 break; 3514 case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR: 3515 DP_ERR(dev, 3516 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3517 cq->icid, qp->icid); 3518 wc_status = IB_WC_REM_OP_ERR; 3519 break; 3520 case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR: 3521 DP_ERR(dev, 3522 "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3523 cq->icid, qp->icid); 3524 wc_status = IB_WC_RNR_RETRY_EXC_ERR; 3525 break; 3526 case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR: 3527 DP_ERR(dev, 3528 "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3529 cq->icid, qp->icid); 3530 wc_status = IB_WC_RETRY_EXC_ERR; 3531 break; 3532 default: 3533 DP_ERR(dev, 3534 "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3535 cq->icid, qp->icid); 3536 wc_status = IB_WC_GENERAL_ERR; 3537 } 3538 cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons, 3539 wc_status, 1); 3540 } 3541 } 3542 3543 return cnt; 3544 } 3545 3546 static inline int qedr_cqe_resp_status_to_ib(u8 status) 3547 { 3548 switch (status) { 3549 case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR: 3550 return IB_WC_LOC_ACCESS_ERR; 3551 case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR: 3552 return IB_WC_LOC_LEN_ERR; 3553 case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR: 3554 return IB_WC_LOC_QP_OP_ERR; 3555 case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR: 3556 return IB_WC_LOC_PROT_ERR; 3557 case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR: 3558 return IB_WC_MW_BIND_ERR; 3559 case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR: 3560 return IB_WC_REM_INV_RD_REQ_ERR; 3561 case RDMA_CQE_RESP_STS_OK: 3562 return IB_WC_SUCCESS; 3563 default: 3564 return IB_WC_GENERAL_ERR; 3565 } 3566 } 3567 3568 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp, 3569 struct ib_wc *wc) 3570 { 3571 wc->status = IB_WC_SUCCESS; 3572 wc->byte_len = le32_to_cpu(resp->length); 3573 3574 if (resp->flags & QEDR_RESP_IMM) { 3575 wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key)); 3576 wc->wc_flags |= IB_WC_WITH_IMM; 3577 3578 if (resp->flags & QEDR_RESP_RDMA) 3579 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; 3580 3581 if (resp->flags & QEDR_RESP_INV) 3582 return -EINVAL; 3583 3584 } else if (resp->flags & QEDR_RESP_INV) { 3585 wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key); 3586 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 3587 3588 if (resp->flags & QEDR_RESP_RDMA) 3589 return -EINVAL; 3590 3591 } else if (resp->flags & QEDR_RESP_RDMA) { 3592 return -EINVAL; 3593 } 3594 3595 return 0; 3596 } 3597 3598 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp, 3599 struct qedr_cq *cq, struct ib_wc *wc, 3600 struct rdma_cqe_responder *resp, u64 wr_id) 3601 { 3602 /* Must fill fields before qedr_set_ok_cqe_resp_wc() */ 3603 wc->opcode = IB_WC_RECV; 3604 wc->wc_flags = 0; 3605 3606 if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) { 3607 if (qedr_set_ok_cqe_resp_wc(resp, wc)) 3608 DP_ERR(dev, 3609 "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n", 3610 cq, cq->icid, resp->flags); 3611 3612 } else { 3613 wc->status = qedr_cqe_resp_status_to_ib(resp->status); 3614 if (wc->status == IB_WC_GENERAL_ERR) 3615 DP_ERR(dev, 3616 "CQ %p (icid=%d) contains an invalid CQE status %d\n", 3617 cq, cq->icid, resp->status); 3618 } 3619 3620 /* Fill the rest of the WC */ 3621 wc->vendor_err = 0; 3622 wc->src_qp = qp->id; 3623 wc->qp = &qp->ibqp; 3624 wc->wr_id = wr_id; 3625 } 3626 3627 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp, 3628 struct qedr_cq *cq, struct ib_wc *wc, 3629 struct rdma_cqe_responder *resp) 3630 { 3631 u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id; 3632 3633 __process_resp_one(dev, qp, cq, wc, resp, wr_id); 3634 3635 while (qp->rqe_wr_id[qp->rq.cons].wqe_size--) 3636 qed_chain_consume(&qp->rq.pbl); 3637 qedr_inc_sw_cons(&qp->rq); 3638 3639 return 1; 3640 } 3641 3642 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq, 3643 int num_entries, struct ib_wc *wc, u16 hw_cons) 3644 { 3645 u16 cnt = 0; 3646 3647 while (num_entries && qp->rq.wqe_cons != hw_cons) { 3648 /* fill WC */ 3649 wc->status = IB_WC_WR_FLUSH_ERR; 3650 wc->vendor_err = 0; 3651 wc->wc_flags = 0; 3652 wc->src_qp = qp->id; 3653 wc->byte_len = 0; 3654 wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id; 3655 wc->qp = &qp->ibqp; 3656 num_entries--; 3657 wc++; 3658 cnt++; 3659 while (qp->rqe_wr_id[qp->rq.cons].wqe_size--) 3660 qed_chain_consume(&qp->rq.pbl); 3661 qedr_inc_sw_cons(&qp->rq); 3662 } 3663 3664 return cnt; 3665 } 3666 3667 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp, 3668 struct rdma_cqe_responder *resp, int *update) 3669 { 3670 if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) { 3671 consume_cqe(cq); 3672 *update |= 1; 3673 } 3674 } 3675 3676 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp, 3677 struct qedr_cq *cq, int num_entries, 3678 struct ib_wc *wc, struct rdma_cqe_responder *resp, 3679 int *update) 3680 { 3681 int cnt; 3682 3683 if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) { 3684 cnt = process_resp_flush(qp, cq, num_entries, wc, 3685 resp->rq_cons_or_srq_id); 3686 try_consume_resp_cqe(cq, qp, resp, update); 3687 } else { 3688 cnt = process_resp_one(dev, qp, cq, wc, resp); 3689 consume_cqe(cq); 3690 *update |= 1; 3691 } 3692 3693 return cnt; 3694 } 3695 3696 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp, 3697 struct rdma_cqe_requester *req, int *update) 3698 { 3699 if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) { 3700 consume_cqe(cq); 3701 *update |= 1; 3702 } 3703 } 3704 3705 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 3706 { 3707 struct qedr_dev *dev = get_qedr_dev(ibcq->device); 3708 struct qedr_cq *cq = get_qedr_cq(ibcq); 3709 union rdma_cqe *cqe; 3710 u32 old_cons, new_cons; 3711 unsigned long flags; 3712 int update = 0; 3713 int done = 0; 3714 3715 if (cq->destroyed) { 3716 DP_ERR(dev, 3717 "warning: poll was invoked after destroy for cq %p (icid=%d)\n", 3718 cq, cq->icid); 3719 return 0; 3720 } 3721 3722 if (cq->cq_type == QEDR_CQ_TYPE_GSI) 3723 return qedr_gsi_poll_cq(ibcq, num_entries, wc); 3724 3725 spin_lock_irqsave(&cq->cq_lock, flags); 3726 cqe = cq->latest_cqe; 3727 old_cons = qed_chain_get_cons_idx_u32(&cq->pbl); 3728 while (num_entries && is_valid_cqe(cq, cqe)) { 3729 struct qedr_qp *qp; 3730 int cnt = 0; 3731 3732 /* prevent speculative reads of any field of CQE */ 3733 rmb(); 3734 3735 qp = cqe_get_qp(cqe); 3736 if (!qp) { 3737 WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe); 3738 break; 3739 } 3740 3741 wc->qp = &qp->ibqp; 3742 3743 switch (cqe_get_type(cqe)) { 3744 case RDMA_CQE_TYPE_REQUESTER: 3745 cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc, 3746 &cqe->req); 3747 try_consume_req_cqe(cq, qp, &cqe->req, &update); 3748 break; 3749 case RDMA_CQE_TYPE_RESPONDER_RQ: 3750 cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc, 3751 &cqe->resp, &update); 3752 break; 3753 case RDMA_CQE_TYPE_INVALID: 3754 default: 3755 DP_ERR(dev, "Error: invalid CQE type = %d\n", 3756 cqe_get_type(cqe)); 3757 } 3758 num_entries -= cnt; 3759 wc += cnt; 3760 done += cnt; 3761 3762 cqe = get_cqe(cq); 3763 } 3764 new_cons = qed_chain_get_cons_idx_u32(&cq->pbl); 3765 3766 cq->cq_cons += new_cons - old_cons; 3767 3768 if (update) 3769 /* doorbell notifies abount latest VALID entry, 3770 * but chain already point to the next INVALID one 3771 */ 3772 doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags); 3773 3774 spin_unlock_irqrestore(&cq->cq_lock, flags); 3775 return done; 3776 } 3777 3778 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, 3779 u8 port_num, 3780 const struct ib_wc *in_wc, 3781 const struct ib_grh *in_grh, 3782 const struct ib_mad_hdr *mad_hdr, 3783 size_t in_mad_size, struct ib_mad_hdr *out_mad, 3784 size_t *out_mad_size, u16 *out_mad_pkey_index) 3785 { 3786 struct qedr_dev *dev = get_qedr_dev(ibdev); 3787 3788 DP_DEBUG(dev, QEDR_MSG_GSI, 3789 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n", 3790 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod, 3791 mad_hdr->class_specific, mad_hdr->class_version, 3792 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status); 3793 return IB_MAD_RESULT_SUCCESS; 3794 } 3795