1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 */ 5 6 #include <linux/dma-buf.h> 7 #include <linux/dma-resv.h> 8 #include <linux/vmalloc.h> 9 #include <linux/log2.h> 10 11 #include <rdma/ib_addr.h> 12 #include <rdma/ib_umem.h> 13 #include <rdma/ib_user_verbs.h> 14 #include <rdma/ib_verbs.h> 15 #include <rdma/uverbs_ioctl.h> 16 17 #include "efa.h" 18 #include "efa_io_defs.h" 19 20 enum { 21 EFA_MMAP_DMA_PAGE = 0, 22 EFA_MMAP_IO_WC, 23 EFA_MMAP_IO_NC, 24 }; 25 26 #define EFA_AENQ_ENABLED_GROUPS \ 27 (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ 28 BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) 29 30 struct efa_user_mmap_entry { 31 struct rdma_user_mmap_entry rdma_entry; 32 u64 address; 33 u8 mmap_flag; 34 }; 35 36 #define EFA_DEFINE_DEVICE_STATS(op) \ 37 op(EFA_SUBMITTED_CMDS, "submitted_cmds") \ 38 op(EFA_COMPLETED_CMDS, "completed_cmds") \ 39 op(EFA_CMDS_ERR, "cmds_err") \ 40 op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \ 41 op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \ 42 op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \ 43 op(EFA_CREATE_QP_ERR, "create_qp_err") \ 44 op(EFA_CREATE_CQ_ERR, "create_cq_err") \ 45 op(EFA_REG_MR_ERR, "reg_mr_err") \ 46 op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \ 47 op(EFA_CREATE_AH_ERR, "create_ah_err") \ 48 op(EFA_MMAP_ERR, "mmap_err") 49 50 #define EFA_DEFINE_PORT_STATS(op) \ 51 op(EFA_TX_BYTES, "tx_bytes") \ 52 op(EFA_TX_PKTS, "tx_pkts") \ 53 op(EFA_RX_BYTES, "rx_bytes") \ 54 op(EFA_RX_PKTS, "rx_pkts") \ 55 op(EFA_RX_DROPS, "rx_drops") \ 56 op(EFA_SEND_BYTES, "send_bytes") \ 57 op(EFA_SEND_WRS, "send_wrs") \ 58 op(EFA_RECV_BYTES, "recv_bytes") \ 59 op(EFA_RECV_WRS, "recv_wrs") \ 60 op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \ 61 op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \ 62 op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \ 63 op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \ 64 op(EFA_RDMA_WRITE_WRS, "rdma_write_wrs") \ 65 op(EFA_RDMA_WRITE_BYTES, "rdma_write_bytes") \ 66 op(EFA_RDMA_WRITE_WR_ERR, "rdma_write_wr_err") \ 67 op(EFA_RDMA_WRITE_RECV_BYTES, "rdma_write_recv_bytes") \ 68 69 #define EFA_STATS_ENUM(ename, name) ename, 70 #define EFA_STATS_STR(ename, nam) \ 71 [ename].name = nam, 72 73 enum efa_hw_device_stats { 74 EFA_DEFINE_DEVICE_STATS(EFA_STATS_ENUM) 75 }; 76 77 static const struct rdma_stat_desc efa_device_stats_descs[] = { 78 EFA_DEFINE_DEVICE_STATS(EFA_STATS_STR) 79 }; 80 81 enum efa_hw_port_stats { 82 EFA_DEFINE_PORT_STATS(EFA_STATS_ENUM) 83 }; 84 85 static const struct rdma_stat_desc efa_port_stats_descs[] = { 86 EFA_DEFINE_PORT_STATS(EFA_STATS_STR) 87 }; 88 89 #define EFA_CHUNK_PAYLOAD_SHIFT 12 90 #define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT) 91 #define EFA_CHUNK_PAYLOAD_PTR_SIZE 8 92 93 #define EFA_CHUNK_SHIFT 12 94 #define EFA_CHUNK_SIZE BIT(EFA_CHUNK_SHIFT) 95 #define EFA_CHUNK_PTR_SIZE sizeof(struct efa_com_ctrl_buff_info) 96 97 #define EFA_PTRS_PER_CHUNK \ 98 ((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE) 99 100 #define EFA_CHUNK_USED_SIZE \ 101 ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE) 102 103 struct pbl_chunk { 104 dma_addr_t dma_addr; 105 u64 *buf; 106 u32 length; 107 }; 108 109 struct pbl_chunk_list { 110 struct pbl_chunk *chunks; 111 unsigned int size; 112 }; 113 114 struct pbl_context { 115 union { 116 struct { 117 dma_addr_t dma_addr; 118 } continuous; 119 struct { 120 u32 pbl_buf_size_in_pages; 121 struct scatterlist *sgl; 122 int sg_dma_cnt; 123 struct pbl_chunk_list chunk_list; 124 } indirect; 125 } phys; 126 u64 *pbl_buf; 127 u32 pbl_buf_size_in_bytes; 128 u8 physically_continuous; 129 }; 130 131 static inline struct efa_dev *to_edev(struct ib_device *ibdev) 132 { 133 return container_of(ibdev, struct efa_dev, ibdev); 134 } 135 136 static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext) 137 { 138 return container_of(ibucontext, struct efa_ucontext, ibucontext); 139 } 140 141 static inline struct efa_pd *to_epd(struct ib_pd *ibpd) 142 { 143 return container_of(ibpd, struct efa_pd, ibpd); 144 } 145 146 static inline struct efa_mr *to_emr(struct ib_mr *ibmr) 147 { 148 return container_of(ibmr, struct efa_mr, ibmr); 149 } 150 151 static inline struct efa_qp *to_eqp(struct ib_qp *ibqp) 152 { 153 return container_of(ibqp, struct efa_qp, ibqp); 154 } 155 156 static inline struct efa_cq *to_ecq(struct ib_cq *ibcq) 157 { 158 return container_of(ibcq, struct efa_cq, ibcq); 159 } 160 161 static inline struct efa_ah *to_eah(struct ib_ah *ibah) 162 { 163 return container_of(ibah, struct efa_ah, ibah); 164 } 165 166 static inline struct efa_user_mmap_entry * 167 to_emmap(struct rdma_user_mmap_entry *rdma_entry) 168 { 169 return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry); 170 } 171 172 #define EFA_DEV_CAP(dev, cap) \ 173 ((dev)->dev_attr.device_caps & \ 174 EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK) 175 176 #define is_reserved_cleared(reserved) \ 177 !memchr_inv(reserved, 0, sizeof(reserved)) 178 179 static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr, 180 size_t size, enum dma_data_direction dir) 181 { 182 void *addr; 183 184 addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); 185 if (!addr) 186 return NULL; 187 188 *dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir); 189 if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) { 190 ibdev_err(&dev->ibdev, "Failed to map DMA address\n"); 191 free_pages_exact(addr, size); 192 return NULL; 193 } 194 195 return addr; 196 } 197 198 static void efa_free_mapped(struct efa_dev *dev, void *cpu_addr, 199 dma_addr_t dma_addr, 200 size_t size, enum dma_data_direction dir) 201 { 202 dma_unmap_single(&dev->pdev->dev, dma_addr, size, dir); 203 free_pages_exact(cpu_addr, size); 204 } 205 206 int efa_query_device(struct ib_device *ibdev, 207 struct ib_device_attr *props, 208 struct ib_udata *udata) 209 { 210 struct efa_com_get_device_attr_result *dev_attr; 211 struct efa_ibv_ex_query_device_resp resp = {}; 212 struct efa_dev *dev = to_edev(ibdev); 213 int err; 214 215 if (udata && udata->inlen && 216 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 217 ibdev_dbg(ibdev, 218 "Incompatible ABI params, udata not cleared\n"); 219 return -EINVAL; 220 } 221 222 dev_attr = &dev->dev_attr; 223 224 memset(props, 0, sizeof(*props)); 225 props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE; 226 props->page_size_cap = dev_attr->page_size_cap; 227 props->vendor_id = dev->pdev->vendor; 228 props->vendor_part_id = dev->pdev->device; 229 props->hw_ver = dev->pdev->subsystem_device; 230 props->max_qp = dev_attr->max_qp; 231 props->max_cq = dev_attr->max_cq; 232 props->max_pd = dev_attr->max_pd; 233 props->max_mr = dev_attr->max_mr; 234 props->max_ah = dev_attr->max_ah; 235 props->max_cqe = dev_attr->max_cq_depth; 236 props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth, 237 dev_attr->max_rq_depth); 238 props->max_send_sge = dev_attr->max_sq_sge; 239 props->max_recv_sge = dev_attr->max_rq_sge; 240 props->max_sge_rd = dev_attr->max_wr_rdma_sge; 241 props->max_pkeys = 1; 242 243 if (udata && udata->outlen) { 244 resp.max_sq_sge = dev_attr->max_sq_sge; 245 resp.max_rq_sge = dev_attr->max_rq_sge; 246 resp.max_sq_wr = dev_attr->max_sq_depth; 247 resp.max_rq_wr = dev_attr->max_rq_depth; 248 resp.max_rdma_size = dev_attr->max_rdma_size; 249 250 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID; 251 if (EFA_DEV_CAP(dev, RDMA_READ)) 252 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; 253 254 if (EFA_DEV_CAP(dev, RNR_RETRY)) 255 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY; 256 257 if (EFA_DEV_CAP(dev, DATA_POLLING_128)) 258 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128; 259 260 if (EFA_DEV_CAP(dev, RDMA_WRITE)) 261 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_WRITE; 262 263 if (dev->neqs) 264 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS; 265 266 err = ib_copy_to_udata(udata, &resp, 267 min(sizeof(resp), udata->outlen)); 268 if (err) { 269 ibdev_dbg(ibdev, 270 "Failed to copy udata for query_device\n"); 271 return err; 272 } 273 } 274 275 return 0; 276 } 277 278 int efa_query_port(struct ib_device *ibdev, u32 port, 279 struct ib_port_attr *props) 280 { 281 struct efa_dev *dev = to_edev(ibdev); 282 283 props->lmc = 1; 284 285 props->state = IB_PORT_ACTIVE; 286 props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; 287 props->gid_tbl_len = 1; 288 props->pkey_tbl_len = 1; 289 props->active_speed = IB_SPEED_EDR; 290 props->active_width = IB_WIDTH_4X; 291 props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); 292 props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); 293 props->max_msg_sz = dev->dev_attr.mtu; 294 props->max_vl_num = 1; 295 296 return 0; 297 } 298 299 int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, 300 int qp_attr_mask, 301 struct ib_qp_init_attr *qp_init_attr) 302 { 303 struct efa_dev *dev = to_edev(ibqp->device); 304 struct efa_com_query_qp_params params = {}; 305 struct efa_com_query_qp_result result; 306 struct efa_qp *qp = to_eqp(ibqp); 307 int err; 308 309 #define EFA_QUERY_QP_SUPP_MASK \ 310 (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \ 311 IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY) 312 313 if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) { 314 ibdev_dbg(&dev->ibdev, 315 "Unsupported qp_attr_mask[%#x] supported[%#x]\n", 316 qp_attr_mask, EFA_QUERY_QP_SUPP_MASK); 317 return -EOPNOTSUPP; 318 } 319 320 memset(qp_attr, 0, sizeof(*qp_attr)); 321 memset(qp_init_attr, 0, sizeof(*qp_init_attr)); 322 323 params.qp_handle = qp->qp_handle; 324 err = efa_com_query_qp(&dev->edev, ¶ms, &result); 325 if (err) 326 return err; 327 328 qp_attr->qp_state = result.qp_state; 329 qp_attr->qkey = result.qkey; 330 qp_attr->sq_psn = result.sq_psn; 331 qp_attr->sq_draining = result.sq_draining; 332 qp_attr->port_num = 1; 333 qp_attr->rnr_retry = result.rnr_retry; 334 335 qp_attr->cap.max_send_wr = qp->max_send_wr; 336 qp_attr->cap.max_recv_wr = qp->max_recv_wr; 337 qp_attr->cap.max_send_sge = qp->max_send_sge; 338 qp_attr->cap.max_recv_sge = qp->max_recv_sge; 339 qp_attr->cap.max_inline_data = qp->max_inline_data; 340 341 qp_init_attr->qp_type = ibqp->qp_type; 342 qp_init_attr->recv_cq = ibqp->recv_cq; 343 qp_init_attr->send_cq = ibqp->send_cq; 344 qp_init_attr->qp_context = ibqp->qp_context; 345 qp_init_attr->cap = qp_attr->cap; 346 347 return 0; 348 } 349 350 int efa_query_gid(struct ib_device *ibdev, u32 port, int index, 351 union ib_gid *gid) 352 { 353 struct efa_dev *dev = to_edev(ibdev); 354 355 memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr)); 356 357 return 0; 358 } 359 360 int efa_query_pkey(struct ib_device *ibdev, u32 port, u16 index, 361 u16 *pkey) 362 { 363 if (index > 0) 364 return -EINVAL; 365 366 *pkey = 0xffff; 367 return 0; 368 } 369 370 static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn) 371 { 372 struct efa_com_dealloc_pd_params params = { 373 .pdn = pdn, 374 }; 375 376 return efa_com_dealloc_pd(&dev->edev, ¶ms); 377 } 378 379 int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) 380 { 381 struct efa_dev *dev = to_edev(ibpd->device); 382 struct efa_ibv_alloc_pd_resp resp = {}; 383 struct efa_com_alloc_pd_result result; 384 struct efa_pd *pd = to_epd(ibpd); 385 int err; 386 387 if (udata->inlen && 388 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 389 ibdev_dbg(&dev->ibdev, 390 "Incompatible ABI params, udata not cleared\n"); 391 err = -EINVAL; 392 goto err_out; 393 } 394 395 err = efa_com_alloc_pd(&dev->edev, &result); 396 if (err) 397 goto err_out; 398 399 pd->pdn = result.pdn; 400 resp.pdn = result.pdn; 401 402 if (udata->outlen) { 403 err = ib_copy_to_udata(udata, &resp, 404 min(sizeof(resp), udata->outlen)); 405 if (err) { 406 ibdev_dbg(&dev->ibdev, 407 "Failed to copy udata for alloc_pd\n"); 408 goto err_dealloc_pd; 409 } 410 } 411 412 ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn); 413 414 return 0; 415 416 err_dealloc_pd: 417 efa_pd_dealloc(dev, result.pdn); 418 err_out: 419 atomic64_inc(&dev->stats.alloc_pd_err); 420 return err; 421 } 422 423 int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) 424 { 425 struct efa_dev *dev = to_edev(ibpd->device); 426 struct efa_pd *pd = to_epd(ibpd); 427 428 ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn); 429 efa_pd_dealloc(dev, pd->pdn); 430 return 0; 431 } 432 433 static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) 434 { 435 struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle }; 436 437 return efa_com_destroy_qp(&dev->edev, ¶ms); 438 } 439 440 static void efa_qp_user_mmap_entries_remove(struct efa_qp *qp) 441 { 442 rdma_user_mmap_entry_remove(qp->rq_mmap_entry); 443 rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry); 444 rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry); 445 rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry); 446 } 447 448 int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) 449 { 450 struct efa_dev *dev = to_edev(ibqp->pd->device); 451 struct efa_qp *qp = to_eqp(ibqp); 452 int err; 453 454 ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num); 455 456 err = efa_destroy_qp_handle(dev, qp->qp_handle); 457 if (err) 458 return err; 459 460 efa_qp_user_mmap_entries_remove(qp); 461 462 if (qp->rq_cpu_addr) { 463 ibdev_dbg(&dev->ibdev, 464 "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n", 465 qp->rq_cpu_addr, qp->rq_size, 466 &qp->rq_dma_addr); 467 efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr, 468 qp->rq_size, DMA_TO_DEVICE); 469 } 470 471 return 0; 472 } 473 474 static struct rdma_user_mmap_entry* 475 efa_user_mmap_entry_insert(struct ib_ucontext *ucontext, 476 u64 address, size_t length, 477 u8 mmap_flag, u64 *offset) 478 { 479 struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); 480 int err; 481 482 if (!entry) 483 return NULL; 484 485 entry->address = address; 486 entry->mmap_flag = mmap_flag; 487 488 err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry, 489 length); 490 if (err) { 491 kfree(entry); 492 return NULL; 493 } 494 *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); 495 496 return &entry->rdma_entry; 497 } 498 499 static int qp_mmap_entries_setup(struct efa_qp *qp, 500 struct efa_dev *dev, 501 struct efa_ucontext *ucontext, 502 struct efa_com_create_qp_params *params, 503 struct efa_ibv_create_qp_resp *resp) 504 { 505 size_t length; 506 u64 address; 507 508 address = dev->db_bar_addr + resp->sq_db_offset; 509 qp->sq_db_mmap_entry = 510 efa_user_mmap_entry_insert(&ucontext->ibucontext, 511 address, 512 PAGE_SIZE, EFA_MMAP_IO_NC, 513 &resp->sq_db_mmap_key); 514 if (!qp->sq_db_mmap_entry) 515 return -ENOMEM; 516 517 resp->sq_db_offset &= ~PAGE_MASK; 518 519 address = dev->mem_bar_addr + resp->llq_desc_offset; 520 length = PAGE_ALIGN(params->sq_ring_size_in_bytes + 521 (resp->llq_desc_offset & ~PAGE_MASK)); 522 523 qp->llq_desc_mmap_entry = 524 efa_user_mmap_entry_insert(&ucontext->ibucontext, 525 address, length, 526 EFA_MMAP_IO_WC, 527 &resp->llq_desc_mmap_key); 528 if (!qp->llq_desc_mmap_entry) 529 goto err_remove_mmap; 530 531 resp->llq_desc_offset &= ~PAGE_MASK; 532 533 if (qp->rq_size) { 534 address = dev->db_bar_addr + resp->rq_db_offset; 535 536 qp->rq_db_mmap_entry = 537 efa_user_mmap_entry_insert(&ucontext->ibucontext, 538 address, PAGE_SIZE, 539 EFA_MMAP_IO_NC, 540 &resp->rq_db_mmap_key); 541 if (!qp->rq_db_mmap_entry) 542 goto err_remove_mmap; 543 544 resp->rq_db_offset &= ~PAGE_MASK; 545 546 address = virt_to_phys(qp->rq_cpu_addr); 547 qp->rq_mmap_entry = 548 efa_user_mmap_entry_insert(&ucontext->ibucontext, 549 address, qp->rq_size, 550 EFA_MMAP_DMA_PAGE, 551 &resp->rq_mmap_key); 552 if (!qp->rq_mmap_entry) 553 goto err_remove_mmap; 554 555 resp->rq_mmap_size = qp->rq_size; 556 } 557 558 return 0; 559 560 err_remove_mmap: 561 efa_qp_user_mmap_entries_remove(qp); 562 563 return -ENOMEM; 564 } 565 566 static int efa_qp_validate_cap(struct efa_dev *dev, 567 struct ib_qp_init_attr *init_attr) 568 { 569 if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) { 570 ibdev_dbg(&dev->ibdev, 571 "qp: requested send wr[%u] exceeds the max[%u]\n", 572 init_attr->cap.max_send_wr, 573 dev->dev_attr.max_sq_depth); 574 return -EINVAL; 575 } 576 if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) { 577 ibdev_dbg(&dev->ibdev, 578 "qp: requested receive wr[%u] exceeds the max[%u]\n", 579 init_attr->cap.max_recv_wr, 580 dev->dev_attr.max_rq_depth); 581 return -EINVAL; 582 } 583 if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) { 584 ibdev_dbg(&dev->ibdev, 585 "qp: requested sge send[%u] exceeds the max[%u]\n", 586 init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge); 587 return -EINVAL; 588 } 589 if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) { 590 ibdev_dbg(&dev->ibdev, 591 "qp: requested sge recv[%u] exceeds the max[%u]\n", 592 init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge); 593 return -EINVAL; 594 } 595 if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) { 596 ibdev_dbg(&dev->ibdev, 597 "qp: requested inline data[%u] exceeds the max[%u]\n", 598 init_attr->cap.max_inline_data, 599 dev->dev_attr.inline_buf_size); 600 return -EINVAL; 601 } 602 603 return 0; 604 } 605 606 static int efa_qp_validate_attr(struct efa_dev *dev, 607 struct ib_qp_init_attr *init_attr) 608 { 609 if (init_attr->qp_type != IB_QPT_DRIVER && 610 init_attr->qp_type != IB_QPT_UD) { 611 ibdev_dbg(&dev->ibdev, 612 "Unsupported qp type %d\n", init_attr->qp_type); 613 return -EOPNOTSUPP; 614 } 615 616 if (init_attr->srq) { 617 ibdev_dbg(&dev->ibdev, "SRQ is not supported\n"); 618 return -EOPNOTSUPP; 619 } 620 621 if (init_attr->create_flags) { 622 ibdev_dbg(&dev->ibdev, "Unsupported create flags\n"); 623 return -EOPNOTSUPP; 624 } 625 626 return 0; 627 } 628 629 int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, 630 struct ib_udata *udata) 631 { 632 struct efa_com_create_qp_params create_qp_params = {}; 633 struct efa_com_create_qp_result create_qp_resp; 634 struct efa_dev *dev = to_edev(ibqp->device); 635 struct efa_ibv_create_qp_resp resp = {}; 636 struct efa_ibv_create_qp cmd = {}; 637 struct efa_qp *qp = to_eqp(ibqp); 638 struct efa_ucontext *ucontext; 639 int err; 640 641 ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext, 642 ibucontext); 643 644 err = efa_qp_validate_cap(dev, init_attr); 645 if (err) 646 goto err_out; 647 648 err = efa_qp_validate_attr(dev, init_attr); 649 if (err) 650 goto err_out; 651 652 if (offsetofend(typeof(cmd), driver_qp_type) > udata->inlen) { 653 ibdev_dbg(&dev->ibdev, 654 "Incompatible ABI params, no input udata\n"); 655 err = -EINVAL; 656 goto err_out; 657 } 658 659 if (udata->inlen > sizeof(cmd) && 660 !ib_is_udata_cleared(udata, sizeof(cmd), 661 udata->inlen - sizeof(cmd))) { 662 ibdev_dbg(&dev->ibdev, 663 "Incompatible ABI params, unknown fields in udata\n"); 664 err = -EINVAL; 665 goto err_out; 666 } 667 668 err = ib_copy_from_udata(&cmd, udata, 669 min(sizeof(cmd), udata->inlen)); 670 if (err) { 671 ibdev_dbg(&dev->ibdev, 672 "Cannot copy udata for create_qp\n"); 673 goto err_out; 674 } 675 676 if (cmd.comp_mask) { 677 ibdev_dbg(&dev->ibdev, 678 "Incompatible ABI params, unknown fields in udata\n"); 679 err = -EINVAL; 680 goto err_out; 681 } 682 683 create_qp_params.uarn = ucontext->uarn; 684 create_qp_params.pd = to_epd(ibqp->pd)->pdn; 685 686 if (init_attr->qp_type == IB_QPT_UD) { 687 create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD; 688 } else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) { 689 create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD; 690 } else { 691 ibdev_dbg(&dev->ibdev, 692 "Unsupported qp type %d driver qp type %d\n", 693 init_attr->qp_type, cmd.driver_qp_type); 694 err = -EOPNOTSUPP; 695 goto err_out; 696 } 697 698 ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n", 699 init_attr->qp_type, cmd.driver_qp_type); 700 create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx; 701 create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx; 702 create_qp_params.sq_depth = init_attr->cap.max_send_wr; 703 create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size; 704 705 create_qp_params.rq_depth = init_attr->cap.max_recv_wr; 706 create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size; 707 qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes); 708 if (qp->rq_size) { 709 qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr, 710 qp->rq_size, DMA_TO_DEVICE); 711 if (!qp->rq_cpu_addr) { 712 err = -ENOMEM; 713 goto err_out; 714 } 715 716 ibdev_dbg(&dev->ibdev, 717 "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n", 718 qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr); 719 create_qp_params.rq_base_addr = qp->rq_dma_addr; 720 } 721 722 err = efa_com_create_qp(&dev->edev, &create_qp_params, 723 &create_qp_resp); 724 if (err) 725 goto err_free_mapped; 726 727 resp.sq_db_offset = create_qp_resp.sq_db_offset; 728 resp.rq_db_offset = create_qp_resp.rq_db_offset; 729 resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset; 730 resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx; 731 resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx; 732 733 err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params, 734 &resp); 735 if (err) 736 goto err_destroy_qp; 737 738 qp->qp_handle = create_qp_resp.qp_handle; 739 qp->ibqp.qp_num = create_qp_resp.qp_num; 740 qp->max_send_wr = init_attr->cap.max_send_wr; 741 qp->max_recv_wr = init_attr->cap.max_recv_wr; 742 qp->max_send_sge = init_attr->cap.max_send_sge; 743 qp->max_recv_sge = init_attr->cap.max_recv_sge; 744 qp->max_inline_data = init_attr->cap.max_inline_data; 745 746 if (udata->outlen) { 747 err = ib_copy_to_udata(udata, &resp, 748 min(sizeof(resp), udata->outlen)); 749 if (err) { 750 ibdev_dbg(&dev->ibdev, 751 "Failed to copy udata for qp[%u]\n", 752 create_qp_resp.qp_num); 753 goto err_remove_mmap_entries; 754 } 755 } 756 757 ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num); 758 759 return 0; 760 761 err_remove_mmap_entries: 762 efa_qp_user_mmap_entries_remove(qp); 763 err_destroy_qp: 764 efa_destroy_qp_handle(dev, create_qp_resp.qp_handle); 765 err_free_mapped: 766 if (qp->rq_size) 767 efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr, 768 qp->rq_size, DMA_TO_DEVICE); 769 err_out: 770 atomic64_inc(&dev->stats.create_qp_err); 771 return err; 772 } 773 774 static const struct { 775 int valid; 776 enum ib_qp_attr_mask req_param; 777 enum ib_qp_attr_mask opt_param; 778 } srd_qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { 779 [IB_QPS_RESET] = { 780 [IB_QPS_RESET] = { .valid = 1 }, 781 [IB_QPS_INIT] = { 782 .valid = 1, 783 .req_param = IB_QP_PKEY_INDEX | 784 IB_QP_PORT | 785 IB_QP_QKEY, 786 }, 787 }, 788 [IB_QPS_INIT] = { 789 [IB_QPS_RESET] = { .valid = 1 }, 790 [IB_QPS_ERR] = { .valid = 1 }, 791 [IB_QPS_INIT] = { 792 .valid = 1, 793 .opt_param = IB_QP_PKEY_INDEX | 794 IB_QP_PORT | 795 IB_QP_QKEY, 796 }, 797 [IB_QPS_RTR] = { 798 .valid = 1, 799 .opt_param = IB_QP_PKEY_INDEX | 800 IB_QP_QKEY, 801 }, 802 }, 803 [IB_QPS_RTR] = { 804 [IB_QPS_RESET] = { .valid = 1 }, 805 [IB_QPS_ERR] = { .valid = 1 }, 806 [IB_QPS_RTS] = { 807 .valid = 1, 808 .req_param = IB_QP_SQ_PSN, 809 .opt_param = IB_QP_CUR_STATE | 810 IB_QP_QKEY | 811 IB_QP_RNR_RETRY, 812 813 } 814 }, 815 [IB_QPS_RTS] = { 816 [IB_QPS_RESET] = { .valid = 1 }, 817 [IB_QPS_ERR] = { .valid = 1 }, 818 [IB_QPS_RTS] = { 819 .valid = 1, 820 .opt_param = IB_QP_CUR_STATE | 821 IB_QP_QKEY, 822 }, 823 [IB_QPS_SQD] = { 824 .valid = 1, 825 .opt_param = IB_QP_EN_SQD_ASYNC_NOTIFY, 826 }, 827 }, 828 [IB_QPS_SQD] = { 829 [IB_QPS_RESET] = { .valid = 1 }, 830 [IB_QPS_ERR] = { .valid = 1 }, 831 [IB_QPS_RTS] = { 832 .valid = 1, 833 .opt_param = IB_QP_CUR_STATE | 834 IB_QP_QKEY, 835 }, 836 [IB_QPS_SQD] = { 837 .valid = 1, 838 .opt_param = IB_QP_PKEY_INDEX | 839 IB_QP_QKEY, 840 } 841 }, 842 [IB_QPS_SQE] = { 843 [IB_QPS_RESET] = { .valid = 1 }, 844 [IB_QPS_ERR] = { .valid = 1 }, 845 [IB_QPS_RTS] = { 846 .valid = 1, 847 .opt_param = IB_QP_CUR_STATE | 848 IB_QP_QKEY, 849 } 850 }, 851 [IB_QPS_ERR] = { 852 [IB_QPS_RESET] = { .valid = 1 }, 853 [IB_QPS_ERR] = { .valid = 1 }, 854 } 855 }; 856 857 static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state, 858 enum ib_qp_state next_state, 859 enum ib_qp_attr_mask mask) 860 { 861 enum ib_qp_attr_mask req_param, opt_param; 862 863 if (mask & IB_QP_CUR_STATE && 864 cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && 865 cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) 866 return false; 867 868 if (!srd_qp_state_table[cur_state][next_state].valid) 869 return false; 870 871 req_param = srd_qp_state_table[cur_state][next_state].req_param; 872 opt_param = srd_qp_state_table[cur_state][next_state].opt_param; 873 874 if ((mask & req_param) != req_param) 875 return false; 876 877 if (mask & ~(req_param | opt_param | IB_QP_STATE)) 878 return false; 879 880 return true; 881 } 882 883 static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, 884 struct ib_qp_attr *qp_attr, int qp_attr_mask, 885 enum ib_qp_state cur_state, 886 enum ib_qp_state new_state) 887 { 888 int err; 889 890 #define EFA_MODIFY_QP_SUPP_MASK \ 891 (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \ 892 IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \ 893 IB_QP_RNR_RETRY) 894 895 if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) { 896 ibdev_dbg(&dev->ibdev, 897 "Unsupported qp_attr_mask[%#x] supported[%#x]\n", 898 qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK); 899 return -EOPNOTSUPP; 900 } 901 902 if (qp->ibqp.qp_type == IB_QPT_DRIVER) 903 err = !efa_modify_srd_qp_is_ok(cur_state, new_state, 904 qp_attr_mask); 905 else 906 err = !ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD, 907 qp_attr_mask); 908 909 if (err) { 910 ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n"); 911 return -EINVAL; 912 } 913 914 if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) { 915 ibdev_dbg(&dev->ibdev, "Can't change port num\n"); 916 return -EOPNOTSUPP; 917 } 918 919 if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) { 920 ibdev_dbg(&dev->ibdev, "Can't change pkey index\n"); 921 return -EOPNOTSUPP; 922 } 923 924 return 0; 925 } 926 927 int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, 928 int qp_attr_mask, struct ib_udata *udata) 929 { 930 struct efa_dev *dev = to_edev(ibqp->device); 931 struct efa_com_modify_qp_params params = {}; 932 struct efa_qp *qp = to_eqp(ibqp); 933 enum ib_qp_state cur_state; 934 enum ib_qp_state new_state; 935 int err; 936 937 if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) 938 return -EOPNOTSUPP; 939 940 if (udata->inlen && 941 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 942 ibdev_dbg(&dev->ibdev, 943 "Incompatible ABI params, udata not cleared\n"); 944 return -EINVAL; 945 } 946 947 cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state : 948 qp->state; 949 new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state; 950 951 err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state, 952 new_state); 953 if (err) 954 return err; 955 956 params.qp_handle = qp->qp_handle; 957 958 if (qp_attr_mask & IB_QP_STATE) { 959 EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE, 960 1); 961 EFA_SET(¶ms.modify_mask, 962 EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1); 963 params.cur_qp_state = cur_state; 964 params.qp_state = new_state; 965 } 966 967 if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { 968 EFA_SET(¶ms.modify_mask, 969 EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY, 1); 970 params.sq_drained_async_notify = qp_attr->en_sqd_async_notify; 971 } 972 973 if (qp_attr_mask & IB_QP_QKEY) { 974 EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QKEY, 1); 975 params.qkey = qp_attr->qkey; 976 } 977 978 if (qp_attr_mask & IB_QP_SQ_PSN) { 979 EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN, 1); 980 params.sq_psn = qp_attr->sq_psn; 981 } 982 983 if (qp_attr_mask & IB_QP_RNR_RETRY) { 984 EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY, 985 1); 986 params.rnr_retry = qp_attr->rnr_retry; 987 } 988 989 err = efa_com_modify_qp(&dev->edev, ¶ms); 990 if (err) 991 return err; 992 993 qp->state = new_state; 994 995 return 0; 996 } 997 998 static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) 999 { 1000 struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx }; 1001 1002 return efa_com_destroy_cq(&dev->edev, ¶ms); 1003 } 1004 1005 static void efa_cq_user_mmap_entries_remove(struct efa_cq *cq) 1006 { 1007 rdma_user_mmap_entry_remove(cq->db_mmap_entry); 1008 rdma_user_mmap_entry_remove(cq->mmap_entry); 1009 } 1010 1011 int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) 1012 { 1013 struct efa_dev *dev = to_edev(ibcq->device); 1014 struct efa_cq *cq = to_ecq(ibcq); 1015 1016 ibdev_dbg(&dev->ibdev, 1017 "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", 1018 cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); 1019 1020 efa_destroy_cq_idx(dev, cq->cq_idx); 1021 efa_cq_user_mmap_entries_remove(cq); 1022 if (cq->eq) { 1023 xa_erase(&dev->cqs_xa, cq->cq_idx); 1024 synchronize_irq(cq->eq->irq.irqn); 1025 } 1026 efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, 1027 DMA_FROM_DEVICE); 1028 return 0; 1029 } 1030 1031 static struct efa_eq *efa_vec2eq(struct efa_dev *dev, int vec) 1032 { 1033 return &dev->eqs[vec]; 1034 } 1035 1036 static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, 1037 struct efa_ibv_create_cq_resp *resp, 1038 bool db_valid) 1039 { 1040 resp->q_mmap_size = cq->size; 1041 cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, 1042 virt_to_phys(cq->cpu_addr), 1043 cq->size, EFA_MMAP_DMA_PAGE, 1044 &resp->q_mmap_key); 1045 if (!cq->mmap_entry) 1046 return -ENOMEM; 1047 1048 if (db_valid) { 1049 cq->db_mmap_entry = 1050 efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, 1051 dev->db_bar_addr + resp->db_off, 1052 PAGE_SIZE, EFA_MMAP_IO_NC, 1053 &resp->db_mmap_key); 1054 if (!cq->db_mmap_entry) { 1055 rdma_user_mmap_entry_remove(cq->mmap_entry); 1056 return -ENOMEM; 1057 } 1058 1059 resp->db_off &= ~PAGE_MASK; 1060 resp->comp_mask |= EFA_CREATE_CQ_RESP_DB_OFF; 1061 } 1062 1063 return 0; 1064 } 1065 1066 int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, 1067 struct ib_udata *udata) 1068 { 1069 struct efa_ucontext *ucontext = rdma_udata_to_drv_context( 1070 udata, struct efa_ucontext, ibucontext); 1071 struct efa_com_create_cq_params params = {}; 1072 struct efa_ibv_create_cq_resp resp = {}; 1073 struct efa_com_create_cq_result result; 1074 struct ib_device *ibdev = ibcq->device; 1075 struct efa_dev *dev = to_edev(ibdev); 1076 struct efa_ibv_create_cq cmd = {}; 1077 struct efa_cq *cq = to_ecq(ibcq); 1078 int entries = attr->cqe; 1079 bool set_src_addr; 1080 int err; 1081 1082 ibdev_dbg(ibdev, "create_cq entries %d\n", entries); 1083 1084 if (attr->flags) 1085 return -EOPNOTSUPP; 1086 1087 if (entries < 1 || entries > dev->dev_attr.max_cq_depth) { 1088 ibdev_dbg(ibdev, 1089 "cq: requested entries[%u] non-positive or greater than max[%u]\n", 1090 entries, dev->dev_attr.max_cq_depth); 1091 err = -EINVAL; 1092 goto err_out; 1093 } 1094 1095 if (offsetofend(typeof(cmd), num_sub_cqs) > udata->inlen) { 1096 ibdev_dbg(ibdev, 1097 "Incompatible ABI params, no input udata\n"); 1098 err = -EINVAL; 1099 goto err_out; 1100 } 1101 1102 if (udata->inlen > sizeof(cmd) && 1103 !ib_is_udata_cleared(udata, sizeof(cmd), 1104 udata->inlen - sizeof(cmd))) { 1105 ibdev_dbg(ibdev, 1106 "Incompatible ABI params, unknown fields in udata\n"); 1107 err = -EINVAL; 1108 goto err_out; 1109 } 1110 1111 err = ib_copy_from_udata(&cmd, udata, 1112 min(sizeof(cmd), udata->inlen)); 1113 if (err) { 1114 ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n"); 1115 goto err_out; 1116 } 1117 1118 if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_58)) { 1119 ibdev_dbg(ibdev, 1120 "Incompatible ABI params, unknown fields in udata\n"); 1121 err = -EINVAL; 1122 goto err_out; 1123 } 1124 1125 set_src_addr = !!(cmd.flags & EFA_CREATE_CQ_WITH_SGID); 1126 if ((cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc_ex)) && 1127 (set_src_addr || 1128 cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc))) { 1129 ibdev_dbg(ibdev, 1130 "Invalid entry size [%u]\n", cmd.cq_entry_size); 1131 err = -EINVAL; 1132 goto err_out; 1133 } 1134 1135 if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) { 1136 ibdev_dbg(ibdev, 1137 "Invalid number of sub cqs[%u] expected[%u]\n", 1138 cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq); 1139 err = -EINVAL; 1140 goto err_out; 1141 } 1142 1143 cq->ucontext = ucontext; 1144 cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs); 1145 cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size, 1146 DMA_FROM_DEVICE); 1147 if (!cq->cpu_addr) { 1148 err = -ENOMEM; 1149 goto err_out; 1150 } 1151 1152 params.uarn = cq->ucontext->uarn; 1153 params.cq_depth = entries; 1154 params.dma_addr = cq->dma_addr; 1155 params.entry_size_in_bytes = cmd.cq_entry_size; 1156 params.num_sub_cqs = cmd.num_sub_cqs; 1157 params.set_src_addr = set_src_addr; 1158 if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) { 1159 cq->eq = efa_vec2eq(dev, attr->comp_vector); 1160 params.eqn = cq->eq->eeq.eqn; 1161 params.interrupt_mode_enabled = true; 1162 } 1163 1164 err = efa_com_create_cq(&dev->edev, ¶ms, &result); 1165 if (err) 1166 goto err_free_mapped; 1167 1168 resp.db_off = result.db_off; 1169 resp.cq_idx = result.cq_idx; 1170 cq->cq_idx = result.cq_idx; 1171 cq->ibcq.cqe = result.actual_depth; 1172 WARN_ON_ONCE(entries != result.actual_depth); 1173 1174 err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid); 1175 if (err) { 1176 ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n", 1177 cq->cq_idx); 1178 goto err_destroy_cq; 1179 } 1180 1181 if (cq->eq) { 1182 err = xa_err(xa_store(&dev->cqs_xa, cq->cq_idx, cq, GFP_KERNEL)); 1183 if (err) { 1184 ibdev_dbg(ibdev, "Failed to store cq[%u] in xarray\n", 1185 cq->cq_idx); 1186 goto err_remove_mmap; 1187 } 1188 } 1189 1190 if (udata->outlen) { 1191 err = ib_copy_to_udata(udata, &resp, 1192 min(sizeof(resp), udata->outlen)); 1193 if (err) { 1194 ibdev_dbg(ibdev, 1195 "Failed to copy udata for create_cq\n"); 1196 goto err_xa_erase; 1197 } 1198 } 1199 1200 ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n", 1201 cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr); 1202 1203 return 0; 1204 1205 err_xa_erase: 1206 if (cq->eq) 1207 xa_erase(&dev->cqs_xa, cq->cq_idx); 1208 err_remove_mmap: 1209 efa_cq_user_mmap_entries_remove(cq); 1210 err_destroy_cq: 1211 efa_destroy_cq_idx(dev, cq->cq_idx); 1212 err_free_mapped: 1213 efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, 1214 DMA_FROM_DEVICE); 1215 1216 err_out: 1217 atomic64_inc(&dev->stats.create_cq_err); 1218 return err; 1219 } 1220 1221 static int umem_to_page_list(struct efa_dev *dev, 1222 struct ib_umem *umem, 1223 u64 *page_list, 1224 u32 hp_cnt, 1225 u8 hp_shift) 1226 { 1227 u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT); 1228 struct ib_block_iter biter; 1229 unsigned int hp_idx = 0; 1230 1231 ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n", 1232 hp_cnt, pages_in_hp); 1233 1234 rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift)) 1235 page_list[hp_idx++] = rdma_block_iter_dma_address(&biter); 1236 1237 return 0; 1238 } 1239 1240 static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt) 1241 { 1242 struct scatterlist *sglist; 1243 struct page *pg; 1244 int i; 1245 1246 sglist = kmalloc_array(page_cnt, sizeof(*sglist), GFP_KERNEL); 1247 if (!sglist) 1248 return NULL; 1249 sg_init_table(sglist, page_cnt); 1250 for (i = 0; i < page_cnt; i++) { 1251 pg = vmalloc_to_page(buf); 1252 if (!pg) 1253 goto err; 1254 sg_set_page(&sglist[i], pg, PAGE_SIZE, 0); 1255 buf += PAGE_SIZE / sizeof(*buf); 1256 } 1257 return sglist; 1258 1259 err: 1260 kfree(sglist); 1261 return NULL; 1262 } 1263 1264 /* 1265 * create a chunk list of physical pages dma addresses from the supplied 1266 * scatter gather list 1267 */ 1268 static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl) 1269 { 1270 struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list; 1271 int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages; 1272 struct scatterlist *pages_sgl = pbl->phys.indirect.sgl; 1273 unsigned int chunk_list_size, chunk_idx, payload_idx; 1274 int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt; 1275 struct efa_com_ctrl_buff_info *ctrl_buf; 1276 u64 *cur_chunk_buf, *prev_chunk_buf; 1277 struct ib_block_iter biter; 1278 dma_addr_t dma_addr; 1279 int i; 1280 1281 /* allocate a chunk list that consists of 4KB chunks */ 1282 chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK); 1283 1284 chunk_list->size = chunk_list_size; 1285 chunk_list->chunks = kcalloc(chunk_list_size, 1286 sizeof(*chunk_list->chunks), 1287 GFP_KERNEL); 1288 if (!chunk_list->chunks) 1289 return -ENOMEM; 1290 1291 ibdev_dbg(&dev->ibdev, 1292 "chunk_list_size[%u] - pages[%u]\n", chunk_list_size, 1293 page_cnt); 1294 1295 /* allocate chunk buffers: */ 1296 for (i = 0; i < chunk_list_size; i++) { 1297 chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL); 1298 if (!chunk_list->chunks[i].buf) 1299 goto chunk_list_dealloc; 1300 1301 chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE; 1302 } 1303 chunk_list->chunks[chunk_list_size - 1].length = 1304 ((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) + 1305 EFA_CHUNK_PTR_SIZE; 1306 1307 /* fill the dma addresses of sg list pages to chunks: */ 1308 chunk_idx = 0; 1309 payload_idx = 0; 1310 cur_chunk_buf = chunk_list->chunks[0].buf; 1311 rdma_for_each_block(pages_sgl, &biter, sg_dma_cnt, 1312 EFA_CHUNK_PAYLOAD_SIZE) { 1313 cur_chunk_buf[payload_idx++] = 1314 rdma_block_iter_dma_address(&biter); 1315 1316 if (payload_idx == EFA_PTRS_PER_CHUNK) { 1317 chunk_idx++; 1318 cur_chunk_buf = chunk_list->chunks[chunk_idx].buf; 1319 payload_idx = 0; 1320 } 1321 } 1322 1323 /* map chunks to dma and fill chunks next ptrs */ 1324 for (i = chunk_list_size - 1; i >= 0; i--) { 1325 dma_addr = dma_map_single(&dev->pdev->dev, 1326 chunk_list->chunks[i].buf, 1327 chunk_list->chunks[i].length, 1328 DMA_TO_DEVICE); 1329 if (dma_mapping_error(&dev->pdev->dev, dma_addr)) { 1330 ibdev_err(&dev->ibdev, 1331 "chunk[%u] dma_map_failed\n", i); 1332 goto chunk_list_unmap; 1333 } 1334 1335 chunk_list->chunks[i].dma_addr = dma_addr; 1336 ibdev_dbg(&dev->ibdev, 1337 "chunk[%u] mapped at [%pad]\n", i, &dma_addr); 1338 1339 if (!i) 1340 break; 1341 1342 prev_chunk_buf = chunk_list->chunks[i - 1].buf; 1343 1344 ctrl_buf = (struct efa_com_ctrl_buff_info *) 1345 &prev_chunk_buf[EFA_PTRS_PER_CHUNK]; 1346 ctrl_buf->length = chunk_list->chunks[i].length; 1347 1348 efa_com_set_dma_addr(dma_addr, 1349 &ctrl_buf->address.mem_addr_high, 1350 &ctrl_buf->address.mem_addr_low); 1351 } 1352 1353 return 0; 1354 1355 chunk_list_unmap: 1356 for (; i < chunk_list_size; i++) { 1357 dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr, 1358 chunk_list->chunks[i].length, DMA_TO_DEVICE); 1359 } 1360 chunk_list_dealloc: 1361 for (i = 0; i < chunk_list_size; i++) 1362 kfree(chunk_list->chunks[i].buf); 1363 1364 kfree(chunk_list->chunks); 1365 return -ENOMEM; 1366 } 1367 1368 static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl) 1369 { 1370 struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list; 1371 int i; 1372 1373 for (i = 0; i < chunk_list->size; i++) { 1374 dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr, 1375 chunk_list->chunks[i].length, DMA_TO_DEVICE); 1376 kfree(chunk_list->chunks[i].buf); 1377 } 1378 1379 kfree(chunk_list->chunks); 1380 } 1381 1382 /* initialize pbl continuous mode: map pbl buffer to a dma address. */ 1383 static int pbl_continuous_initialize(struct efa_dev *dev, 1384 struct pbl_context *pbl) 1385 { 1386 dma_addr_t dma_addr; 1387 1388 dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf, 1389 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE); 1390 if (dma_mapping_error(&dev->pdev->dev, dma_addr)) { 1391 ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n"); 1392 return -ENOMEM; 1393 } 1394 1395 pbl->phys.continuous.dma_addr = dma_addr; 1396 ibdev_dbg(&dev->ibdev, 1397 "pbl continuous - dma_addr = %pad, size[%u]\n", 1398 &dma_addr, pbl->pbl_buf_size_in_bytes); 1399 1400 return 0; 1401 } 1402 1403 /* 1404 * initialize pbl indirect mode: 1405 * create a chunk list out of the dma addresses of the physical pages of 1406 * pbl buffer. 1407 */ 1408 static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl) 1409 { 1410 u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, EFA_CHUNK_PAYLOAD_SIZE); 1411 struct scatterlist *sgl; 1412 int sg_dma_cnt, err; 1413 1414 BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE); 1415 sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages); 1416 if (!sgl) 1417 return -ENOMEM; 1418 1419 sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE); 1420 if (!sg_dma_cnt) { 1421 err = -EINVAL; 1422 goto err_map; 1423 } 1424 1425 pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages; 1426 pbl->phys.indirect.sgl = sgl; 1427 pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt; 1428 err = pbl_chunk_list_create(dev, pbl); 1429 if (err) { 1430 ibdev_dbg(&dev->ibdev, 1431 "chunk_list creation failed[%d]\n", err); 1432 goto err_chunk; 1433 } 1434 1435 ibdev_dbg(&dev->ibdev, 1436 "pbl indirect - size[%u], chunks[%u]\n", 1437 pbl->pbl_buf_size_in_bytes, 1438 pbl->phys.indirect.chunk_list.size); 1439 1440 return 0; 1441 1442 err_chunk: 1443 dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE); 1444 err_map: 1445 kfree(sgl); 1446 return err; 1447 } 1448 1449 static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl) 1450 { 1451 pbl_chunk_list_destroy(dev, pbl); 1452 dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl, 1453 pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE); 1454 kfree(pbl->phys.indirect.sgl); 1455 } 1456 1457 /* create a page buffer list from a mapped user memory region */ 1458 static int pbl_create(struct efa_dev *dev, 1459 struct pbl_context *pbl, 1460 struct ib_umem *umem, 1461 int hp_cnt, 1462 u8 hp_shift) 1463 { 1464 int err; 1465 1466 pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE; 1467 pbl->pbl_buf = kvzalloc(pbl->pbl_buf_size_in_bytes, GFP_KERNEL); 1468 if (!pbl->pbl_buf) 1469 return -ENOMEM; 1470 1471 if (is_vmalloc_addr(pbl->pbl_buf)) { 1472 pbl->physically_continuous = 0; 1473 err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, 1474 hp_shift); 1475 if (err) 1476 goto err_free; 1477 1478 err = pbl_indirect_initialize(dev, pbl); 1479 if (err) 1480 goto err_free; 1481 } else { 1482 pbl->physically_continuous = 1; 1483 err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, 1484 hp_shift); 1485 if (err) 1486 goto err_free; 1487 1488 err = pbl_continuous_initialize(dev, pbl); 1489 if (err) 1490 goto err_free; 1491 } 1492 1493 ibdev_dbg(&dev->ibdev, 1494 "user_pbl_created: user_pages[%u], continuous[%u]\n", 1495 hp_cnt, pbl->physically_continuous); 1496 1497 return 0; 1498 1499 err_free: 1500 kvfree(pbl->pbl_buf); 1501 return err; 1502 } 1503 1504 static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl) 1505 { 1506 if (pbl->physically_continuous) 1507 dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr, 1508 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE); 1509 else 1510 pbl_indirect_terminate(dev, pbl); 1511 1512 kvfree(pbl->pbl_buf); 1513 } 1514 1515 static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr, 1516 struct efa_com_reg_mr_params *params) 1517 { 1518 int err; 1519 1520 params->inline_pbl = 1; 1521 err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array, 1522 params->page_num, params->page_shift); 1523 if (err) 1524 return err; 1525 1526 ibdev_dbg(&dev->ibdev, 1527 "inline_pbl_array - pages[%u]\n", params->page_num); 1528 1529 return 0; 1530 } 1531 1532 static int efa_create_pbl(struct efa_dev *dev, 1533 struct pbl_context *pbl, 1534 struct efa_mr *mr, 1535 struct efa_com_reg_mr_params *params) 1536 { 1537 int err; 1538 1539 err = pbl_create(dev, pbl, mr->umem, params->page_num, 1540 params->page_shift); 1541 if (err) { 1542 ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err); 1543 return err; 1544 } 1545 1546 params->inline_pbl = 0; 1547 params->indirect = !pbl->physically_continuous; 1548 if (pbl->physically_continuous) { 1549 params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes; 1550 1551 efa_com_set_dma_addr(pbl->phys.continuous.dma_addr, 1552 ¶ms->pbl.pbl.address.mem_addr_high, 1553 ¶ms->pbl.pbl.address.mem_addr_low); 1554 } else { 1555 params->pbl.pbl.length = 1556 pbl->phys.indirect.chunk_list.chunks[0].length; 1557 1558 efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr, 1559 ¶ms->pbl.pbl.address.mem_addr_high, 1560 ¶ms->pbl.pbl.address.mem_addr_low); 1561 } 1562 1563 return 0; 1564 } 1565 1566 static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags, 1567 struct ib_udata *udata) 1568 { 1569 struct efa_dev *dev = to_edev(ibpd->device); 1570 int supp_access_flags; 1571 struct efa_mr *mr; 1572 1573 if (udata && udata->inlen && 1574 !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { 1575 ibdev_dbg(&dev->ibdev, 1576 "Incompatible ABI params, udata not cleared\n"); 1577 return ERR_PTR(-EINVAL); 1578 } 1579 1580 supp_access_flags = 1581 IB_ACCESS_LOCAL_WRITE | 1582 (EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0) | 1583 (EFA_DEV_CAP(dev, RDMA_WRITE) ? IB_ACCESS_REMOTE_WRITE : 0); 1584 1585 access_flags &= ~IB_ACCESS_OPTIONAL; 1586 if (access_flags & ~supp_access_flags) { 1587 ibdev_dbg(&dev->ibdev, 1588 "Unsupported access flags[%#x], supported[%#x]\n", 1589 access_flags, supp_access_flags); 1590 return ERR_PTR(-EOPNOTSUPP); 1591 } 1592 1593 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1594 if (!mr) 1595 return ERR_PTR(-ENOMEM); 1596 1597 return mr; 1598 } 1599 1600 static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start, 1601 u64 length, u64 virt_addr, int access_flags) 1602 { 1603 struct efa_dev *dev = to_edev(ibpd->device); 1604 struct efa_com_reg_mr_params params = {}; 1605 struct efa_com_reg_mr_result result = {}; 1606 struct pbl_context pbl; 1607 unsigned int pg_sz; 1608 int inline_size; 1609 int err; 1610 1611 params.pd = to_epd(ibpd)->pdn; 1612 params.iova = virt_addr; 1613 params.mr_length_in_bytes = length; 1614 params.permissions = access_flags; 1615 1616 pg_sz = ib_umem_find_best_pgsz(mr->umem, 1617 dev->dev_attr.page_size_cap, 1618 virt_addr); 1619 if (!pg_sz) { 1620 ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n", 1621 dev->dev_attr.page_size_cap); 1622 return -EOPNOTSUPP; 1623 } 1624 1625 params.page_shift = order_base_2(pg_sz); 1626 params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz); 1627 1628 ibdev_dbg(&dev->ibdev, 1629 "start %#llx length %#llx params.page_shift %u params.page_num %u\n", 1630 start, length, params.page_shift, params.page_num); 1631 1632 inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array); 1633 if (params.page_num <= inline_size) { 1634 err = efa_create_inline_pbl(dev, mr, ¶ms); 1635 if (err) 1636 return err; 1637 1638 err = efa_com_register_mr(&dev->edev, ¶ms, &result); 1639 if (err) 1640 return err; 1641 } else { 1642 err = efa_create_pbl(dev, &pbl, mr, ¶ms); 1643 if (err) 1644 return err; 1645 1646 err = efa_com_register_mr(&dev->edev, ¶ms, &result); 1647 pbl_destroy(dev, &pbl); 1648 1649 if (err) 1650 return err; 1651 } 1652 1653 mr->ibmr.lkey = result.l_key; 1654 mr->ibmr.rkey = result.r_key; 1655 mr->ibmr.length = length; 1656 ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey); 1657 1658 return 0; 1659 } 1660 1661 struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, 1662 u64 length, u64 virt_addr, 1663 int fd, int access_flags, 1664 struct ib_udata *udata) 1665 { 1666 struct efa_dev *dev = to_edev(ibpd->device); 1667 struct ib_umem_dmabuf *umem_dmabuf; 1668 struct efa_mr *mr; 1669 int err; 1670 1671 mr = efa_alloc_mr(ibpd, access_flags, udata); 1672 if (IS_ERR(mr)) { 1673 err = PTR_ERR(mr); 1674 goto err_out; 1675 } 1676 1677 umem_dmabuf = ib_umem_dmabuf_get_pinned(ibpd->device, start, length, fd, 1678 access_flags); 1679 if (IS_ERR(umem_dmabuf)) { 1680 err = PTR_ERR(umem_dmabuf); 1681 ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%d]\n", err); 1682 goto err_free; 1683 } 1684 1685 mr->umem = &umem_dmabuf->umem; 1686 err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); 1687 if (err) 1688 goto err_release; 1689 1690 return &mr->ibmr; 1691 1692 err_release: 1693 ib_umem_release(mr->umem); 1694 err_free: 1695 kfree(mr); 1696 err_out: 1697 atomic64_inc(&dev->stats.reg_mr_err); 1698 return ERR_PTR(err); 1699 } 1700 1701 struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, 1702 u64 virt_addr, int access_flags, 1703 struct ib_udata *udata) 1704 { 1705 struct efa_dev *dev = to_edev(ibpd->device); 1706 struct efa_mr *mr; 1707 int err; 1708 1709 mr = efa_alloc_mr(ibpd, access_flags, udata); 1710 if (IS_ERR(mr)) { 1711 err = PTR_ERR(mr); 1712 goto err_out; 1713 } 1714 1715 mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); 1716 if (IS_ERR(mr->umem)) { 1717 err = PTR_ERR(mr->umem); 1718 ibdev_dbg(&dev->ibdev, 1719 "Failed to pin and map user space memory[%d]\n", err); 1720 goto err_free; 1721 } 1722 1723 err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); 1724 if (err) 1725 goto err_release; 1726 1727 return &mr->ibmr; 1728 1729 err_release: 1730 ib_umem_release(mr->umem); 1731 err_free: 1732 kfree(mr); 1733 err_out: 1734 atomic64_inc(&dev->stats.reg_mr_err); 1735 return ERR_PTR(err); 1736 } 1737 1738 int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 1739 { 1740 struct efa_dev *dev = to_edev(ibmr->device); 1741 struct efa_com_dereg_mr_params params; 1742 struct efa_mr *mr = to_emr(ibmr); 1743 int err; 1744 1745 ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey); 1746 1747 params.l_key = mr->ibmr.lkey; 1748 err = efa_com_dereg_mr(&dev->edev, ¶ms); 1749 if (err) 1750 return err; 1751 1752 ib_umem_release(mr->umem); 1753 kfree(mr); 1754 1755 return 0; 1756 } 1757 1758 int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num, 1759 struct ib_port_immutable *immutable) 1760 { 1761 struct ib_port_attr attr; 1762 int err; 1763 1764 err = ib_query_port(ibdev, port_num, &attr); 1765 if (err) { 1766 ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err); 1767 return err; 1768 } 1769 1770 immutable->pkey_tbl_len = attr.pkey_tbl_len; 1771 immutable->gid_tbl_len = attr.gid_tbl_len; 1772 1773 return 0; 1774 } 1775 1776 static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn) 1777 { 1778 struct efa_com_dealloc_uar_params params = { 1779 .uarn = uarn, 1780 }; 1781 1782 return efa_com_dealloc_uar(&dev->edev, ¶ms); 1783 } 1784 1785 #define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \ 1786 (_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \ 1787 NULL : #_attr) 1788 1789 static int efa_user_comp_handshake(const struct ib_ucontext *ibucontext, 1790 const struct efa_ibv_alloc_ucontext_cmd *cmd) 1791 { 1792 struct efa_dev *dev = to_edev(ibucontext->device); 1793 char *attr_str; 1794 1795 if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, max_tx_batch, 1796 EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH, attr_str)) 1797 goto err; 1798 1799 if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, min_sq_depth, 1800 EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR, 1801 attr_str)) 1802 goto err; 1803 1804 return 0; 1805 1806 err: 1807 ibdev_dbg(&dev->ibdev, "Userspace handshake failed for %s attribute\n", 1808 attr_str); 1809 return -EOPNOTSUPP; 1810 } 1811 1812 int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) 1813 { 1814 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 1815 struct efa_dev *dev = to_edev(ibucontext->device); 1816 struct efa_ibv_alloc_ucontext_resp resp = {}; 1817 struct efa_ibv_alloc_ucontext_cmd cmd = {}; 1818 struct efa_com_alloc_uar_result result; 1819 int err; 1820 1821 /* 1822 * it's fine if the driver does not know all request fields, 1823 * we will ack input fields in our response. 1824 */ 1825 1826 err = ib_copy_from_udata(&cmd, udata, 1827 min(sizeof(cmd), udata->inlen)); 1828 if (err) { 1829 ibdev_dbg(&dev->ibdev, 1830 "Cannot copy udata for alloc_ucontext\n"); 1831 goto err_out; 1832 } 1833 1834 err = efa_user_comp_handshake(ibucontext, &cmd); 1835 if (err) 1836 goto err_out; 1837 1838 err = efa_com_alloc_uar(&dev->edev, &result); 1839 if (err) 1840 goto err_out; 1841 1842 ucontext->uarn = result.uarn; 1843 1844 resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE; 1845 resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH; 1846 resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq; 1847 resp.inline_buf_size = dev->dev_attr.inline_buf_size; 1848 resp.max_llq_size = dev->dev_attr.max_llq_size; 1849 resp.max_tx_batch = dev->dev_attr.max_tx_batch; 1850 resp.min_sq_wr = dev->dev_attr.min_sq_depth; 1851 1852 err = ib_copy_to_udata(udata, &resp, 1853 min(sizeof(resp), udata->outlen)); 1854 if (err) 1855 goto err_dealloc_uar; 1856 1857 return 0; 1858 1859 err_dealloc_uar: 1860 efa_dealloc_uar(dev, result.uarn); 1861 err_out: 1862 atomic64_inc(&dev->stats.alloc_ucontext_err); 1863 return err; 1864 } 1865 1866 void efa_dealloc_ucontext(struct ib_ucontext *ibucontext) 1867 { 1868 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 1869 struct efa_dev *dev = to_edev(ibucontext->device); 1870 1871 efa_dealloc_uar(dev, ucontext->uarn); 1872 } 1873 1874 void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry) 1875 { 1876 struct efa_user_mmap_entry *entry = to_emmap(rdma_entry); 1877 1878 kfree(entry); 1879 } 1880 1881 static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, 1882 struct vm_area_struct *vma) 1883 { 1884 struct rdma_user_mmap_entry *rdma_entry; 1885 struct efa_user_mmap_entry *entry; 1886 unsigned long va; 1887 int err = 0; 1888 u64 pfn; 1889 1890 rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma); 1891 if (!rdma_entry) { 1892 ibdev_dbg(&dev->ibdev, 1893 "pgoff[%#lx] does not have valid entry\n", 1894 vma->vm_pgoff); 1895 atomic64_inc(&dev->stats.mmap_err); 1896 return -EINVAL; 1897 } 1898 entry = to_emmap(rdma_entry); 1899 1900 ibdev_dbg(&dev->ibdev, 1901 "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n", 1902 entry->address, rdma_entry->npages * PAGE_SIZE, 1903 entry->mmap_flag); 1904 1905 pfn = entry->address >> PAGE_SHIFT; 1906 switch (entry->mmap_flag) { 1907 case EFA_MMAP_IO_NC: 1908 err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, 1909 entry->rdma_entry.npages * PAGE_SIZE, 1910 pgprot_noncached(vma->vm_page_prot), 1911 rdma_entry); 1912 break; 1913 case EFA_MMAP_IO_WC: 1914 err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, 1915 entry->rdma_entry.npages * PAGE_SIZE, 1916 pgprot_writecombine(vma->vm_page_prot), 1917 rdma_entry); 1918 break; 1919 case EFA_MMAP_DMA_PAGE: 1920 for (va = vma->vm_start; va < vma->vm_end; 1921 va += PAGE_SIZE, pfn++) { 1922 err = vm_insert_page(vma, va, pfn_to_page(pfn)); 1923 if (err) 1924 break; 1925 } 1926 break; 1927 default: 1928 err = -EINVAL; 1929 } 1930 1931 if (err) { 1932 ibdev_dbg( 1933 &dev->ibdev, 1934 "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", 1935 entry->address, rdma_entry->npages * PAGE_SIZE, 1936 entry->mmap_flag, err); 1937 atomic64_inc(&dev->stats.mmap_err); 1938 } 1939 1940 rdma_user_mmap_entry_put(rdma_entry); 1941 return err; 1942 } 1943 1944 int efa_mmap(struct ib_ucontext *ibucontext, 1945 struct vm_area_struct *vma) 1946 { 1947 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 1948 struct efa_dev *dev = to_edev(ibucontext->device); 1949 size_t length = vma->vm_end - vma->vm_start; 1950 1951 ibdev_dbg(&dev->ibdev, 1952 "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n", 1953 vma->vm_start, vma->vm_end, length, vma->vm_pgoff); 1954 1955 return __efa_mmap(dev, ucontext, vma); 1956 } 1957 1958 static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) 1959 { 1960 struct efa_com_destroy_ah_params params = { 1961 .ah = ah->ah, 1962 .pdn = to_epd(ah->ibah.pd)->pdn, 1963 }; 1964 1965 return efa_com_destroy_ah(&dev->edev, ¶ms); 1966 } 1967 1968 int efa_create_ah(struct ib_ah *ibah, 1969 struct rdma_ah_init_attr *init_attr, 1970 struct ib_udata *udata) 1971 { 1972 struct rdma_ah_attr *ah_attr = init_attr->ah_attr; 1973 struct efa_dev *dev = to_edev(ibah->device); 1974 struct efa_com_create_ah_params params = {}; 1975 struct efa_ibv_create_ah_resp resp = {}; 1976 struct efa_com_create_ah_result result; 1977 struct efa_ah *ah = to_eah(ibah); 1978 int err; 1979 1980 if (!(init_attr->flags & RDMA_CREATE_AH_SLEEPABLE)) { 1981 ibdev_dbg(&dev->ibdev, 1982 "Create address handle is not supported in atomic context\n"); 1983 err = -EOPNOTSUPP; 1984 goto err_out; 1985 } 1986 1987 if (udata->inlen && 1988 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 1989 ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); 1990 err = -EINVAL; 1991 goto err_out; 1992 } 1993 1994 memcpy(params.dest_addr, ah_attr->grh.dgid.raw, 1995 sizeof(params.dest_addr)); 1996 params.pdn = to_epd(ibah->pd)->pdn; 1997 err = efa_com_create_ah(&dev->edev, ¶ms, &result); 1998 if (err) 1999 goto err_out; 2000 2001 memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id)); 2002 ah->ah = result.ah; 2003 2004 resp.efa_address_handle = result.ah; 2005 2006 if (udata->outlen) { 2007 err = ib_copy_to_udata(udata, &resp, 2008 min(sizeof(resp), udata->outlen)); 2009 if (err) { 2010 ibdev_dbg(&dev->ibdev, 2011 "Failed to copy udata for create_ah response\n"); 2012 goto err_destroy_ah; 2013 } 2014 } 2015 ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah); 2016 2017 return 0; 2018 2019 err_destroy_ah: 2020 efa_ah_destroy(dev, ah); 2021 err_out: 2022 atomic64_inc(&dev->stats.create_ah_err); 2023 return err; 2024 } 2025 2026 int efa_destroy_ah(struct ib_ah *ibah, u32 flags) 2027 { 2028 struct efa_dev *dev = to_edev(ibah->pd->device); 2029 struct efa_ah *ah = to_eah(ibah); 2030 2031 ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah); 2032 2033 if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) { 2034 ibdev_dbg(&dev->ibdev, 2035 "Destroy address handle is not supported in atomic context\n"); 2036 return -EOPNOTSUPP; 2037 } 2038 2039 efa_ah_destroy(dev, ah); 2040 return 0; 2041 } 2042 2043 struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev, 2044 u32 port_num) 2045 { 2046 return rdma_alloc_hw_stats_struct(efa_port_stats_descs, 2047 ARRAY_SIZE(efa_port_stats_descs), 2048 RDMA_HW_STATS_DEFAULT_LIFESPAN); 2049 } 2050 2051 struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev) 2052 { 2053 return rdma_alloc_hw_stats_struct(efa_device_stats_descs, 2054 ARRAY_SIZE(efa_device_stats_descs), 2055 RDMA_HW_STATS_DEFAULT_LIFESPAN); 2056 } 2057 2058 static int efa_fill_device_stats(struct efa_dev *dev, 2059 struct rdma_hw_stats *stats) 2060 { 2061 struct efa_com_stats_admin *as = &dev->edev.aq.stats; 2062 struct efa_stats *s = &dev->stats; 2063 2064 stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd); 2065 stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd); 2066 stats->value[EFA_CMDS_ERR] = atomic64_read(&as->cmd_err); 2067 stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion); 2068 2069 stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd); 2070 stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err); 2071 stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err); 2072 stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err); 2073 stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err); 2074 stats->value[EFA_ALLOC_UCONTEXT_ERR] = 2075 atomic64_read(&s->alloc_ucontext_err); 2076 stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err); 2077 stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err); 2078 2079 return ARRAY_SIZE(efa_device_stats_descs); 2080 } 2081 2082 static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats, 2083 u32 port_num) 2084 { 2085 struct efa_com_get_stats_params params = {}; 2086 union efa_com_get_stats_result result; 2087 struct efa_com_rdma_write_stats *rws; 2088 struct efa_com_rdma_read_stats *rrs; 2089 struct efa_com_messages_stats *ms; 2090 struct efa_com_basic_stats *bs; 2091 int err; 2092 2093 params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL; 2094 params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC; 2095 2096 err = efa_com_get_stats(&dev->edev, ¶ms, &result); 2097 if (err) 2098 return err; 2099 2100 bs = &result.basic_stats; 2101 stats->value[EFA_TX_BYTES] = bs->tx_bytes; 2102 stats->value[EFA_TX_PKTS] = bs->tx_pkts; 2103 stats->value[EFA_RX_BYTES] = bs->rx_bytes; 2104 stats->value[EFA_RX_PKTS] = bs->rx_pkts; 2105 stats->value[EFA_RX_DROPS] = bs->rx_drops; 2106 2107 params.type = EFA_ADMIN_GET_STATS_TYPE_MESSAGES; 2108 err = efa_com_get_stats(&dev->edev, ¶ms, &result); 2109 if (err) 2110 return err; 2111 2112 ms = &result.messages_stats; 2113 stats->value[EFA_SEND_BYTES] = ms->send_bytes; 2114 stats->value[EFA_SEND_WRS] = ms->send_wrs; 2115 stats->value[EFA_RECV_BYTES] = ms->recv_bytes; 2116 stats->value[EFA_RECV_WRS] = ms->recv_wrs; 2117 2118 params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_READ; 2119 err = efa_com_get_stats(&dev->edev, ¶ms, &result); 2120 if (err) 2121 return err; 2122 2123 rrs = &result.rdma_read_stats; 2124 stats->value[EFA_RDMA_READ_WRS] = rrs->read_wrs; 2125 stats->value[EFA_RDMA_READ_BYTES] = rrs->read_bytes; 2126 stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err; 2127 stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes; 2128 2129 if (EFA_DEV_CAP(dev, RDMA_WRITE)) { 2130 params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE; 2131 err = efa_com_get_stats(&dev->edev, ¶ms, &result); 2132 if (err) 2133 return err; 2134 2135 rws = &result.rdma_write_stats; 2136 stats->value[EFA_RDMA_WRITE_WRS] = rws->write_wrs; 2137 stats->value[EFA_RDMA_WRITE_BYTES] = rws->write_bytes; 2138 stats->value[EFA_RDMA_WRITE_WR_ERR] = rws->write_wr_err; 2139 stats->value[EFA_RDMA_WRITE_RECV_BYTES] = rws->write_recv_bytes; 2140 } 2141 2142 return ARRAY_SIZE(efa_port_stats_descs); 2143 } 2144 2145 int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, 2146 u32 port_num, int index) 2147 { 2148 if (port_num) 2149 return efa_fill_port_stats(to_edev(ibdev), stats, port_num); 2150 else 2151 return efa_fill_device_stats(to_edev(ibdev), stats); 2152 } 2153 2154 enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, 2155 u32 port_num) 2156 { 2157 return IB_LINK_LAYER_UNSPECIFIED; 2158 } 2159 2160