1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. 4 */ 5 6 #include <linux/vmalloc.h> 7 8 #include <rdma/ib_addr.h> 9 #include <rdma/ib_umem.h> 10 #include <rdma/ib_user_verbs.h> 11 #include <rdma/ib_verbs.h> 12 #include <rdma/uverbs_ioctl.h> 13 14 #include "efa.h" 15 16 enum { 17 EFA_MMAP_DMA_PAGE = 0, 18 EFA_MMAP_IO_WC, 19 EFA_MMAP_IO_NC, 20 }; 21 22 #define EFA_AENQ_ENABLED_GROUPS \ 23 (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ 24 BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) 25 26 struct efa_user_mmap_entry { 27 struct rdma_user_mmap_entry rdma_entry; 28 u64 address; 29 u8 mmap_flag; 30 }; 31 32 #define EFA_DEFINE_STATS(op) \ 33 op(EFA_TX_BYTES, "tx_bytes") \ 34 op(EFA_TX_PKTS, "tx_pkts") \ 35 op(EFA_RX_BYTES, "rx_bytes") \ 36 op(EFA_RX_PKTS, "rx_pkts") \ 37 op(EFA_RX_DROPS, "rx_drops") \ 38 op(EFA_SUBMITTED_CMDS, "submitted_cmds") \ 39 op(EFA_COMPLETED_CMDS, "completed_cmds") \ 40 op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \ 41 op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \ 42 op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \ 43 op(EFA_CREATE_QP_ERR, "create_qp_err") \ 44 op(EFA_REG_MR_ERR, "reg_mr_err") \ 45 op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \ 46 op(EFA_CREATE_AH_ERR, "create_ah_err") 47 48 #define EFA_STATS_ENUM(ename, name) ename, 49 #define EFA_STATS_STR(ename, name) [ename] = name, 50 51 enum efa_hw_stats { 52 EFA_DEFINE_STATS(EFA_STATS_ENUM) 53 }; 54 55 static const char *const efa_stats_names[] = { 56 EFA_DEFINE_STATS(EFA_STATS_STR) 57 }; 58 59 #define EFA_CHUNK_PAYLOAD_SHIFT 12 60 #define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT) 61 #define EFA_CHUNK_PAYLOAD_PTR_SIZE 8 62 63 #define EFA_CHUNK_SHIFT 12 64 #define EFA_CHUNK_SIZE BIT(EFA_CHUNK_SHIFT) 65 #define EFA_CHUNK_PTR_SIZE sizeof(struct efa_com_ctrl_buff_info) 66 67 #define EFA_PTRS_PER_CHUNK \ 68 ((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE) 69 70 #define EFA_CHUNK_USED_SIZE \ 71 ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE) 72 73 struct pbl_chunk { 74 dma_addr_t dma_addr; 75 u64 *buf; 76 u32 length; 77 }; 78 79 struct pbl_chunk_list { 80 struct pbl_chunk *chunks; 81 unsigned int size; 82 }; 83 84 struct pbl_context { 85 union { 86 struct { 87 dma_addr_t dma_addr; 88 } continuous; 89 struct { 90 u32 pbl_buf_size_in_pages; 91 struct scatterlist *sgl; 92 int sg_dma_cnt; 93 struct pbl_chunk_list chunk_list; 94 } indirect; 95 } phys; 96 u64 *pbl_buf; 97 u32 pbl_buf_size_in_bytes; 98 u8 physically_continuous; 99 }; 100 101 static inline struct efa_dev *to_edev(struct ib_device *ibdev) 102 { 103 return container_of(ibdev, struct efa_dev, ibdev); 104 } 105 106 static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext) 107 { 108 return container_of(ibucontext, struct efa_ucontext, ibucontext); 109 } 110 111 static inline struct efa_pd *to_epd(struct ib_pd *ibpd) 112 { 113 return container_of(ibpd, struct efa_pd, ibpd); 114 } 115 116 static inline struct efa_mr *to_emr(struct ib_mr *ibmr) 117 { 118 return container_of(ibmr, struct efa_mr, ibmr); 119 } 120 121 static inline struct efa_qp *to_eqp(struct ib_qp *ibqp) 122 { 123 return container_of(ibqp, struct efa_qp, ibqp); 124 } 125 126 static inline struct efa_cq *to_ecq(struct ib_cq *ibcq) 127 { 128 return container_of(ibcq, struct efa_cq, ibcq); 129 } 130 131 static inline struct efa_ah *to_eah(struct ib_ah *ibah) 132 { 133 return container_of(ibah, struct efa_ah, ibah); 134 } 135 136 static inline struct efa_user_mmap_entry * 137 to_emmap(struct rdma_user_mmap_entry *rdma_entry) 138 { 139 return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry); 140 } 141 142 static inline bool is_rdma_read_cap(struct efa_dev *dev) 143 { 144 return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK; 145 } 146 147 #define is_reserved_cleared(reserved) \ 148 !memchr_inv(reserved, 0, sizeof(reserved)) 149 150 static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr, 151 size_t size, enum dma_data_direction dir) 152 { 153 void *addr; 154 155 addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); 156 if (!addr) 157 return NULL; 158 159 *dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir); 160 if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) { 161 ibdev_err(&dev->ibdev, "Failed to map DMA address\n"); 162 free_pages_exact(addr, size); 163 return NULL; 164 } 165 166 return addr; 167 } 168 169 static void efa_free_mapped(struct efa_dev *dev, void *cpu_addr, 170 dma_addr_t dma_addr, 171 size_t size, enum dma_data_direction dir) 172 { 173 dma_unmap_single(&dev->pdev->dev, dma_addr, size, dir); 174 free_pages_exact(cpu_addr, size); 175 } 176 177 int efa_query_device(struct ib_device *ibdev, 178 struct ib_device_attr *props, 179 struct ib_udata *udata) 180 { 181 struct efa_com_get_device_attr_result *dev_attr; 182 struct efa_ibv_ex_query_device_resp resp = {}; 183 struct efa_dev *dev = to_edev(ibdev); 184 int err; 185 186 if (udata && udata->inlen && 187 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 188 ibdev_dbg(ibdev, 189 "Incompatible ABI params, udata not cleared\n"); 190 return -EINVAL; 191 } 192 193 dev_attr = &dev->dev_attr; 194 195 memset(props, 0, sizeof(*props)); 196 props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE; 197 props->page_size_cap = dev_attr->page_size_cap; 198 props->vendor_id = dev->pdev->vendor; 199 props->vendor_part_id = dev->pdev->device; 200 props->hw_ver = dev->pdev->subsystem_device; 201 props->max_qp = dev_attr->max_qp; 202 props->max_cq = dev_attr->max_cq; 203 props->max_pd = dev_attr->max_pd; 204 props->max_mr = dev_attr->max_mr; 205 props->max_ah = dev_attr->max_ah; 206 props->max_cqe = dev_attr->max_cq_depth; 207 props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth, 208 dev_attr->max_rq_depth); 209 props->max_send_sge = dev_attr->max_sq_sge; 210 props->max_recv_sge = dev_attr->max_rq_sge; 211 props->max_sge_rd = dev_attr->max_wr_rdma_sge; 212 213 if (udata && udata->outlen) { 214 resp.max_sq_sge = dev_attr->max_sq_sge; 215 resp.max_rq_sge = dev_attr->max_rq_sge; 216 resp.max_sq_wr = dev_attr->max_sq_depth; 217 resp.max_rq_wr = dev_attr->max_rq_depth; 218 resp.max_rdma_size = dev_attr->max_rdma_size; 219 220 if (is_rdma_read_cap(dev)) 221 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; 222 223 err = ib_copy_to_udata(udata, &resp, 224 min(sizeof(resp), udata->outlen)); 225 if (err) { 226 ibdev_dbg(ibdev, 227 "Failed to copy udata for query_device\n"); 228 return err; 229 } 230 } 231 232 return 0; 233 } 234 235 int efa_query_port(struct ib_device *ibdev, u8 port, 236 struct ib_port_attr *props) 237 { 238 struct efa_dev *dev = to_edev(ibdev); 239 240 props->lmc = 1; 241 242 props->state = IB_PORT_ACTIVE; 243 props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; 244 props->gid_tbl_len = 1; 245 props->pkey_tbl_len = 1; 246 props->active_speed = IB_SPEED_EDR; 247 props->active_width = IB_WIDTH_4X; 248 props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); 249 props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); 250 props->max_msg_sz = dev->dev_attr.mtu; 251 props->max_vl_num = 1; 252 253 return 0; 254 } 255 256 int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, 257 int qp_attr_mask, 258 struct ib_qp_init_attr *qp_init_attr) 259 { 260 struct efa_dev *dev = to_edev(ibqp->device); 261 struct efa_com_query_qp_params params = {}; 262 struct efa_com_query_qp_result result; 263 struct efa_qp *qp = to_eqp(ibqp); 264 int err; 265 266 #define EFA_QUERY_QP_SUPP_MASK \ 267 (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \ 268 IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP) 269 270 if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) { 271 ibdev_dbg(&dev->ibdev, 272 "Unsupported qp_attr_mask[%#x] supported[%#x]\n", 273 qp_attr_mask, EFA_QUERY_QP_SUPP_MASK); 274 return -EOPNOTSUPP; 275 } 276 277 memset(qp_attr, 0, sizeof(*qp_attr)); 278 memset(qp_init_attr, 0, sizeof(*qp_init_attr)); 279 280 params.qp_handle = qp->qp_handle; 281 err = efa_com_query_qp(&dev->edev, ¶ms, &result); 282 if (err) 283 return err; 284 285 qp_attr->qp_state = result.qp_state; 286 qp_attr->qkey = result.qkey; 287 qp_attr->sq_psn = result.sq_psn; 288 qp_attr->sq_draining = result.sq_draining; 289 qp_attr->port_num = 1; 290 291 qp_attr->cap.max_send_wr = qp->max_send_wr; 292 qp_attr->cap.max_recv_wr = qp->max_recv_wr; 293 qp_attr->cap.max_send_sge = qp->max_send_sge; 294 qp_attr->cap.max_recv_sge = qp->max_recv_sge; 295 qp_attr->cap.max_inline_data = qp->max_inline_data; 296 297 qp_init_attr->qp_type = ibqp->qp_type; 298 qp_init_attr->recv_cq = ibqp->recv_cq; 299 qp_init_attr->send_cq = ibqp->send_cq; 300 qp_init_attr->qp_context = ibqp->qp_context; 301 qp_init_attr->cap = qp_attr->cap; 302 303 return 0; 304 } 305 306 int efa_query_gid(struct ib_device *ibdev, u8 port, int index, 307 union ib_gid *gid) 308 { 309 struct efa_dev *dev = to_edev(ibdev); 310 311 memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr)); 312 313 return 0; 314 } 315 316 int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 317 u16 *pkey) 318 { 319 if (index > 0) 320 return -EINVAL; 321 322 *pkey = 0xffff; 323 return 0; 324 } 325 326 static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn) 327 { 328 struct efa_com_dealloc_pd_params params = { 329 .pdn = pdn, 330 }; 331 332 return efa_com_dealloc_pd(&dev->edev, ¶ms); 333 } 334 335 int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) 336 { 337 struct efa_dev *dev = to_edev(ibpd->device); 338 struct efa_ibv_alloc_pd_resp resp = {}; 339 struct efa_com_alloc_pd_result result; 340 struct efa_pd *pd = to_epd(ibpd); 341 int err; 342 343 if (udata->inlen && 344 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 345 ibdev_dbg(&dev->ibdev, 346 "Incompatible ABI params, udata not cleared\n"); 347 err = -EINVAL; 348 goto err_out; 349 } 350 351 err = efa_com_alloc_pd(&dev->edev, &result); 352 if (err) 353 goto err_out; 354 355 pd->pdn = result.pdn; 356 resp.pdn = result.pdn; 357 358 if (udata->outlen) { 359 err = ib_copy_to_udata(udata, &resp, 360 min(sizeof(resp), udata->outlen)); 361 if (err) { 362 ibdev_dbg(&dev->ibdev, 363 "Failed to copy udata for alloc_pd\n"); 364 goto err_dealloc_pd; 365 } 366 } 367 368 ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn); 369 370 return 0; 371 372 err_dealloc_pd: 373 efa_pd_dealloc(dev, result.pdn); 374 err_out: 375 atomic64_inc(&dev->stats.sw_stats.alloc_pd_err); 376 return err; 377 } 378 379 void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) 380 { 381 struct efa_dev *dev = to_edev(ibpd->device); 382 struct efa_pd *pd = to_epd(ibpd); 383 384 ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn); 385 efa_pd_dealloc(dev, pd->pdn); 386 } 387 388 static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) 389 { 390 struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle }; 391 392 return efa_com_destroy_qp(&dev->edev, ¶ms); 393 } 394 395 static void efa_qp_user_mmap_entries_remove(struct efa_qp *qp) 396 { 397 rdma_user_mmap_entry_remove(qp->rq_mmap_entry); 398 rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry); 399 rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry); 400 rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry); 401 } 402 403 int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) 404 { 405 struct efa_dev *dev = to_edev(ibqp->pd->device); 406 struct efa_qp *qp = to_eqp(ibqp); 407 int err; 408 409 ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num); 410 411 efa_qp_user_mmap_entries_remove(qp); 412 413 err = efa_destroy_qp_handle(dev, qp->qp_handle); 414 if (err) 415 return err; 416 417 if (qp->rq_cpu_addr) { 418 ibdev_dbg(&dev->ibdev, 419 "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n", 420 qp->rq_cpu_addr, qp->rq_size, 421 &qp->rq_dma_addr); 422 efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr, 423 qp->rq_size, DMA_TO_DEVICE); 424 } 425 426 kfree(qp); 427 return 0; 428 } 429 430 static struct rdma_user_mmap_entry* 431 efa_user_mmap_entry_insert(struct ib_ucontext *ucontext, 432 u64 address, size_t length, 433 u8 mmap_flag, u64 *offset) 434 { 435 struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); 436 int err; 437 438 if (!entry) 439 return NULL; 440 441 entry->address = address; 442 entry->mmap_flag = mmap_flag; 443 444 err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry, 445 length); 446 if (err) { 447 kfree(entry); 448 return NULL; 449 } 450 *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); 451 452 return &entry->rdma_entry; 453 } 454 455 static int qp_mmap_entries_setup(struct efa_qp *qp, 456 struct efa_dev *dev, 457 struct efa_ucontext *ucontext, 458 struct efa_com_create_qp_params *params, 459 struct efa_ibv_create_qp_resp *resp) 460 { 461 size_t length; 462 u64 address; 463 464 address = dev->db_bar_addr + resp->sq_db_offset; 465 qp->sq_db_mmap_entry = 466 efa_user_mmap_entry_insert(&ucontext->ibucontext, 467 address, 468 PAGE_SIZE, EFA_MMAP_IO_NC, 469 &resp->sq_db_mmap_key); 470 if (!qp->sq_db_mmap_entry) 471 return -ENOMEM; 472 473 resp->sq_db_offset &= ~PAGE_MASK; 474 475 address = dev->mem_bar_addr + resp->llq_desc_offset; 476 length = PAGE_ALIGN(params->sq_ring_size_in_bytes + 477 (resp->llq_desc_offset & ~PAGE_MASK)); 478 479 qp->llq_desc_mmap_entry = 480 efa_user_mmap_entry_insert(&ucontext->ibucontext, 481 address, length, 482 EFA_MMAP_IO_WC, 483 &resp->llq_desc_mmap_key); 484 if (!qp->llq_desc_mmap_entry) 485 goto err_remove_mmap; 486 487 resp->llq_desc_offset &= ~PAGE_MASK; 488 489 if (qp->rq_size) { 490 address = dev->db_bar_addr + resp->rq_db_offset; 491 492 qp->rq_db_mmap_entry = 493 efa_user_mmap_entry_insert(&ucontext->ibucontext, 494 address, PAGE_SIZE, 495 EFA_MMAP_IO_NC, 496 &resp->rq_db_mmap_key); 497 if (!qp->rq_db_mmap_entry) 498 goto err_remove_mmap; 499 500 resp->rq_db_offset &= ~PAGE_MASK; 501 502 address = virt_to_phys(qp->rq_cpu_addr); 503 qp->rq_mmap_entry = 504 efa_user_mmap_entry_insert(&ucontext->ibucontext, 505 address, qp->rq_size, 506 EFA_MMAP_DMA_PAGE, 507 &resp->rq_mmap_key); 508 if (!qp->rq_mmap_entry) 509 goto err_remove_mmap; 510 511 resp->rq_mmap_size = qp->rq_size; 512 } 513 514 return 0; 515 516 err_remove_mmap: 517 efa_qp_user_mmap_entries_remove(qp); 518 519 return -ENOMEM; 520 } 521 522 static int efa_qp_validate_cap(struct efa_dev *dev, 523 struct ib_qp_init_attr *init_attr) 524 { 525 if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) { 526 ibdev_dbg(&dev->ibdev, 527 "qp: requested send wr[%u] exceeds the max[%u]\n", 528 init_attr->cap.max_send_wr, 529 dev->dev_attr.max_sq_depth); 530 return -EINVAL; 531 } 532 if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) { 533 ibdev_dbg(&dev->ibdev, 534 "qp: requested receive wr[%u] exceeds the max[%u]\n", 535 init_attr->cap.max_recv_wr, 536 dev->dev_attr.max_rq_depth); 537 return -EINVAL; 538 } 539 if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) { 540 ibdev_dbg(&dev->ibdev, 541 "qp: requested sge send[%u] exceeds the max[%u]\n", 542 init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge); 543 return -EINVAL; 544 } 545 if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) { 546 ibdev_dbg(&dev->ibdev, 547 "qp: requested sge recv[%u] exceeds the max[%u]\n", 548 init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge); 549 return -EINVAL; 550 } 551 if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) { 552 ibdev_dbg(&dev->ibdev, 553 "qp: requested inline data[%u] exceeds the max[%u]\n", 554 init_attr->cap.max_inline_data, 555 dev->dev_attr.inline_buf_size); 556 return -EINVAL; 557 } 558 559 return 0; 560 } 561 562 static int efa_qp_validate_attr(struct efa_dev *dev, 563 struct ib_qp_init_attr *init_attr) 564 { 565 if (init_attr->qp_type != IB_QPT_DRIVER && 566 init_attr->qp_type != IB_QPT_UD) { 567 ibdev_dbg(&dev->ibdev, 568 "Unsupported qp type %d\n", init_attr->qp_type); 569 return -EOPNOTSUPP; 570 } 571 572 if (init_attr->srq) { 573 ibdev_dbg(&dev->ibdev, "SRQ is not supported\n"); 574 return -EOPNOTSUPP; 575 } 576 577 if (init_attr->create_flags) { 578 ibdev_dbg(&dev->ibdev, "Unsupported create flags\n"); 579 return -EOPNOTSUPP; 580 } 581 582 return 0; 583 } 584 585 struct ib_qp *efa_create_qp(struct ib_pd *ibpd, 586 struct ib_qp_init_attr *init_attr, 587 struct ib_udata *udata) 588 { 589 struct efa_com_create_qp_params create_qp_params = {}; 590 struct efa_com_create_qp_result create_qp_resp; 591 struct efa_dev *dev = to_edev(ibpd->device); 592 struct efa_ibv_create_qp_resp resp = {}; 593 struct efa_ibv_create_qp cmd = {}; 594 struct efa_ucontext *ucontext; 595 struct efa_qp *qp; 596 int err; 597 598 ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext, 599 ibucontext); 600 601 err = efa_qp_validate_cap(dev, init_attr); 602 if (err) 603 goto err_out; 604 605 err = efa_qp_validate_attr(dev, init_attr); 606 if (err) 607 goto err_out; 608 609 if (offsetofend(typeof(cmd), driver_qp_type) > udata->inlen) { 610 ibdev_dbg(&dev->ibdev, 611 "Incompatible ABI params, no input udata\n"); 612 err = -EINVAL; 613 goto err_out; 614 } 615 616 if (udata->inlen > sizeof(cmd) && 617 !ib_is_udata_cleared(udata, sizeof(cmd), 618 udata->inlen - sizeof(cmd))) { 619 ibdev_dbg(&dev->ibdev, 620 "Incompatible ABI params, unknown fields in udata\n"); 621 err = -EINVAL; 622 goto err_out; 623 } 624 625 err = ib_copy_from_udata(&cmd, udata, 626 min(sizeof(cmd), udata->inlen)); 627 if (err) { 628 ibdev_dbg(&dev->ibdev, 629 "Cannot copy udata for create_qp\n"); 630 goto err_out; 631 } 632 633 if (cmd.comp_mask) { 634 ibdev_dbg(&dev->ibdev, 635 "Incompatible ABI params, unknown fields in udata\n"); 636 err = -EINVAL; 637 goto err_out; 638 } 639 640 qp = kzalloc(sizeof(*qp), GFP_KERNEL); 641 if (!qp) { 642 err = -ENOMEM; 643 goto err_out; 644 } 645 646 create_qp_params.uarn = ucontext->uarn; 647 create_qp_params.pd = to_epd(ibpd)->pdn; 648 649 if (init_attr->qp_type == IB_QPT_UD) { 650 create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD; 651 } else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) { 652 create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD; 653 } else { 654 ibdev_dbg(&dev->ibdev, 655 "Unsupported qp type %d driver qp type %d\n", 656 init_attr->qp_type, cmd.driver_qp_type); 657 err = -EOPNOTSUPP; 658 goto err_free_qp; 659 } 660 661 ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n", 662 init_attr->qp_type, cmd.driver_qp_type); 663 create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx; 664 create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx; 665 create_qp_params.sq_depth = init_attr->cap.max_send_wr; 666 create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size; 667 668 create_qp_params.rq_depth = init_attr->cap.max_recv_wr; 669 create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size; 670 qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes); 671 if (qp->rq_size) { 672 qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr, 673 qp->rq_size, DMA_TO_DEVICE); 674 if (!qp->rq_cpu_addr) { 675 err = -ENOMEM; 676 goto err_free_qp; 677 } 678 679 ibdev_dbg(&dev->ibdev, 680 "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n", 681 qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr); 682 create_qp_params.rq_base_addr = qp->rq_dma_addr; 683 } 684 685 err = efa_com_create_qp(&dev->edev, &create_qp_params, 686 &create_qp_resp); 687 if (err) 688 goto err_free_mapped; 689 690 resp.sq_db_offset = create_qp_resp.sq_db_offset; 691 resp.rq_db_offset = create_qp_resp.rq_db_offset; 692 resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset; 693 resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx; 694 resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx; 695 696 err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params, 697 &resp); 698 if (err) 699 goto err_destroy_qp; 700 701 qp->qp_handle = create_qp_resp.qp_handle; 702 qp->ibqp.qp_num = create_qp_resp.qp_num; 703 qp->ibqp.qp_type = init_attr->qp_type; 704 qp->max_send_wr = init_attr->cap.max_send_wr; 705 qp->max_recv_wr = init_attr->cap.max_recv_wr; 706 qp->max_send_sge = init_attr->cap.max_send_sge; 707 qp->max_recv_sge = init_attr->cap.max_recv_sge; 708 qp->max_inline_data = init_attr->cap.max_inline_data; 709 710 if (udata->outlen) { 711 err = ib_copy_to_udata(udata, &resp, 712 min(sizeof(resp), udata->outlen)); 713 if (err) { 714 ibdev_dbg(&dev->ibdev, 715 "Failed to copy udata for qp[%u]\n", 716 create_qp_resp.qp_num); 717 goto err_remove_mmap_entries; 718 } 719 } 720 721 ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num); 722 723 return &qp->ibqp; 724 725 err_remove_mmap_entries: 726 efa_qp_user_mmap_entries_remove(qp); 727 err_destroy_qp: 728 efa_destroy_qp_handle(dev, create_qp_resp.qp_handle); 729 err_free_mapped: 730 if (qp->rq_size) 731 efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr, 732 qp->rq_size, DMA_TO_DEVICE); 733 err_free_qp: 734 kfree(qp); 735 err_out: 736 atomic64_inc(&dev->stats.sw_stats.create_qp_err); 737 return ERR_PTR(err); 738 } 739 740 static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, 741 struct ib_qp_attr *qp_attr, int qp_attr_mask, 742 enum ib_qp_state cur_state, 743 enum ib_qp_state new_state) 744 { 745 #define EFA_MODIFY_QP_SUPP_MASK \ 746 (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \ 747 IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN) 748 749 if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) { 750 ibdev_dbg(&dev->ibdev, 751 "Unsupported qp_attr_mask[%#x] supported[%#x]\n", 752 qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK); 753 return -EOPNOTSUPP; 754 } 755 756 if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD, 757 qp_attr_mask)) { 758 ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n"); 759 return -EINVAL; 760 } 761 762 if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) { 763 ibdev_dbg(&dev->ibdev, "Can't change port num\n"); 764 return -EOPNOTSUPP; 765 } 766 767 if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) { 768 ibdev_dbg(&dev->ibdev, "Can't change pkey index\n"); 769 return -EOPNOTSUPP; 770 } 771 772 return 0; 773 } 774 775 int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, 776 int qp_attr_mask, struct ib_udata *udata) 777 { 778 struct efa_dev *dev = to_edev(ibqp->device); 779 struct efa_com_modify_qp_params params = {}; 780 struct efa_qp *qp = to_eqp(ibqp); 781 enum ib_qp_state cur_state; 782 enum ib_qp_state new_state; 783 int err; 784 785 if (udata->inlen && 786 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 787 ibdev_dbg(&dev->ibdev, 788 "Incompatible ABI params, udata not cleared\n"); 789 return -EINVAL; 790 } 791 792 cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state : 793 qp->state; 794 new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state; 795 796 err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state, 797 new_state); 798 if (err) 799 return err; 800 801 params.qp_handle = qp->qp_handle; 802 803 if (qp_attr_mask & IB_QP_STATE) { 804 params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) | 805 BIT(EFA_ADMIN_CUR_QP_STATE_BIT); 806 params.cur_qp_state = qp_attr->cur_qp_state; 807 params.qp_state = qp_attr->qp_state; 808 } 809 810 if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { 811 params.modify_mask |= 812 BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT); 813 params.sq_drained_async_notify = qp_attr->en_sqd_async_notify; 814 } 815 816 if (qp_attr_mask & IB_QP_QKEY) { 817 params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT); 818 params.qkey = qp_attr->qkey; 819 } 820 821 if (qp_attr_mask & IB_QP_SQ_PSN) { 822 params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT); 823 params.sq_psn = qp_attr->sq_psn; 824 } 825 826 err = efa_com_modify_qp(&dev->edev, ¶ms); 827 if (err) 828 return err; 829 830 qp->state = new_state; 831 832 return 0; 833 } 834 835 static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) 836 { 837 struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx }; 838 839 return efa_com_destroy_cq(&dev->edev, ¶ms); 840 } 841 842 void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) 843 { 844 struct efa_dev *dev = to_edev(ibcq->device); 845 struct efa_cq *cq = to_ecq(ibcq); 846 847 ibdev_dbg(&dev->ibdev, 848 "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", 849 cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); 850 851 rdma_user_mmap_entry_remove(cq->mmap_entry); 852 efa_destroy_cq_idx(dev, cq->cq_idx); 853 efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, 854 DMA_FROM_DEVICE); 855 } 856 857 static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, 858 struct efa_ibv_create_cq_resp *resp) 859 { 860 resp->q_mmap_size = cq->size; 861 cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, 862 virt_to_phys(cq->cpu_addr), 863 cq->size, EFA_MMAP_DMA_PAGE, 864 &resp->q_mmap_key); 865 if (!cq->mmap_entry) 866 return -ENOMEM; 867 868 return 0; 869 } 870 871 int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, 872 struct ib_udata *udata) 873 { 874 struct efa_ucontext *ucontext = rdma_udata_to_drv_context( 875 udata, struct efa_ucontext, ibucontext); 876 struct efa_ibv_create_cq_resp resp = {}; 877 struct efa_com_create_cq_params params; 878 struct efa_com_create_cq_result result; 879 struct ib_device *ibdev = ibcq->device; 880 struct efa_dev *dev = to_edev(ibdev); 881 struct efa_ibv_create_cq cmd = {}; 882 struct efa_cq *cq = to_ecq(ibcq); 883 int entries = attr->cqe; 884 int err; 885 886 ibdev_dbg(ibdev, "create_cq entries %d\n", entries); 887 888 if (entries < 1 || entries > dev->dev_attr.max_cq_depth) { 889 ibdev_dbg(ibdev, 890 "cq: requested entries[%u] non-positive or greater than max[%u]\n", 891 entries, dev->dev_attr.max_cq_depth); 892 err = -EINVAL; 893 goto err_out; 894 } 895 896 if (offsetofend(typeof(cmd), num_sub_cqs) > udata->inlen) { 897 ibdev_dbg(ibdev, 898 "Incompatible ABI params, no input udata\n"); 899 err = -EINVAL; 900 goto err_out; 901 } 902 903 if (udata->inlen > sizeof(cmd) && 904 !ib_is_udata_cleared(udata, sizeof(cmd), 905 udata->inlen - sizeof(cmd))) { 906 ibdev_dbg(ibdev, 907 "Incompatible ABI params, unknown fields in udata\n"); 908 err = -EINVAL; 909 goto err_out; 910 } 911 912 err = ib_copy_from_udata(&cmd, udata, 913 min(sizeof(cmd), udata->inlen)); 914 if (err) { 915 ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n"); 916 goto err_out; 917 } 918 919 if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) { 920 ibdev_dbg(ibdev, 921 "Incompatible ABI params, unknown fields in udata\n"); 922 err = -EINVAL; 923 goto err_out; 924 } 925 926 if (!cmd.cq_entry_size) { 927 ibdev_dbg(ibdev, 928 "Invalid entry size [%u]\n", cmd.cq_entry_size); 929 err = -EINVAL; 930 goto err_out; 931 } 932 933 if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) { 934 ibdev_dbg(ibdev, 935 "Invalid number of sub cqs[%u] expected[%u]\n", 936 cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq); 937 err = -EINVAL; 938 goto err_out; 939 } 940 941 cq->ucontext = ucontext; 942 cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs); 943 cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size, 944 DMA_FROM_DEVICE); 945 if (!cq->cpu_addr) { 946 err = -ENOMEM; 947 goto err_out; 948 } 949 950 params.uarn = cq->ucontext->uarn; 951 params.cq_depth = entries; 952 params.dma_addr = cq->dma_addr; 953 params.entry_size_in_bytes = cmd.cq_entry_size; 954 params.num_sub_cqs = cmd.num_sub_cqs; 955 err = efa_com_create_cq(&dev->edev, ¶ms, &result); 956 if (err) 957 goto err_free_mapped; 958 959 resp.cq_idx = result.cq_idx; 960 cq->cq_idx = result.cq_idx; 961 cq->ibcq.cqe = result.actual_depth; 962 WARN_ON_ONCE(entries != result.actual_depth); 963 964 err = cq_mmap_entries_setup(dev, cq, &resp); 965 if (err) { 966 ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n", 967 cq->cq_idx); 968 goto err_destroy_cq; 969 } 970 971 if (udata->outlen) { 972 err = ib_copy_to_udata(udata, &resp, 973 min(sizeof(resp), udata->outlen)); 974 if (err) { 975 ibdev_dbg(ibdev, 976 "Failed to copy udata for create_cq\n"); 977 goto err_remove_mmap; 978 } 979 } 980 981 ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n", 982 cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr); 983 984 return 0; 985 986 err_remove_mmap: 987 rdma_user_mmap_entry_remove(cq->mmap_entry); 988 err_destroy_cq: 989 efa_destroy_cq_idx(dev, cq->cq_idx); 990 err_free_mapped: 991 efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, 992 DMA_FROM_DEVICE); 993 994 err_out: 995 atomic64_inc(&dev->stats.sw_stats.create_cq_err); 996 return err; 997 } 998 999 static int umem_to_page_list(struct efa_dev *dev, 1000 struct ib_umem *umem, 1001 u64 *page_list, 1002 u32 hp_cnt, 1003 u8 hp_shift) 1004 { 1005 u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT); 1006 struct ib_block_iter biter; 1007 unsigned int hp_idx = 0; 1008 1009 ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n", 1010 hp_cnt, pages_in_hp); 1011 1012 rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, 1013 BIT(hp_shift)) 1014 page_list[hp_idx++] = rdma_block_iter_dma_address(&biter); 1015 1016 return 0; 1017 } 1018 1019 static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt) 1020 { 1021 struct scatterlist *sglist; 1022 struct page *pg; 1023 int i; 1024 1025 sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL); 1026 if (!sglist) 1027 return NULL; 1028 sg_init_table(sglist, page_cnt); 1029 for (i = 0; i < page_cnt; i++) { 1030 pg = vmalloc_to_page(buf); 1031 if (!pg) 1032 goto err; 1033 sg_set_page(&sglist[i], pg, PAGE_SIZE, 0); 1034 buf += PAGE_SIZE / sizeof(*buf); 1035 } 1036 return sglist; 1037 1038 err: 1039 kfree(sglist); 1040 return NULL; 1041 } 1042 1043 /* 1044 * create a chunk list of physical pages dma addresses from the supplied 1045 * scatter gather list 1046 */ 1047 static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl) 1048 { 1049 struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list; 1050 int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages; 1051 struct scatterlist *pages_sgl = pbl->phys.indirect.sgl; 1052 unsigned int chunk_list_size, chunk_idx, payload_idx; 1053 int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt; 1054 struct efa_com_ctrl_buff_info *ctrl_buf; 1055 u64 *cur_chunk_buf, *prev_chunk_buf; 1056 struct ib_block_iter biter; 1057 dma_addr_t dma_addr; 1058 int i; 1059 1060 /* allocate a chunk list that consists of 4KB chunks */ 1061 chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK); 1062 1063 chunk_list->size = chunk_list_size; 1064 chunk_list->chunks = kcalloc(chunk_list_size, 1065 sizeof(*chunk_list->chunks), 1066 GFP_KERNEL); 1067 if (!chunk_list->chunks) 1068 return -ENOMEM; 1069 1070 ibdev_dbg(&dev->ibdev, 1071 "chunk_list_size[%u] - pages[%u]\n", chunk_list_size, 1072 page_cnt); 1073 1074 /* allocate chunk buffers: */ 1075 for (i = 0; i < chunk_list_size; i++) { 1076 chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL); 1077 if (!chunk_list->chunks[i].buf) 1078 goto chunk_list_dealloc; 1079 1080 chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE; 1081 } 1082 chunk_list->chunks[chunk_list_size - 1].length = 1083 ((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) + 1084 EFA_CHUNK_PTR_SIZE; 1085 1086 /* fill the dma addresses of sg list pages to chunks: */ 1087 chunk_idx = 0; 1088 payload_idx = 0; 1089 cur_chunk_buf = chunk_list->chunks[0].buf; 1090 rdma_for_each_block(pages_sgl, &biter, sg_dma_cnt, 1091 EFA_CHUNK_PAYLOAD_SIZE) { 1092 cur_chunk_buf[payload_idx++] = 1093 rdma_block_iter_dma_address(&biter); 1094 1095 if (payload_idx == EFA_PTRS_PER_CHUNK) { 1096 chunk_idx++; 1097 cur_chunk_buf = chunk_list->chunks[chunk_idx].buf; 1098 payload_idx = 0; 1099 } 1100 } 1101 1102 /* map chunks to dma and fill chunks next ptrs */ 1103 for (i = chunk_list_size - 1; i >= 0; i--) { 1104 dma_addr = dma_map_single(&dev->pdev->dev, 1105 chunk_list->chunks[i].buf, 1106 chunk_list->chunks[i].length, 1107 DMA_TO_DEVICE); 1108 if (dma_mapping_error(&dev->pdev->dev, dma_addr)) { 1109 ibdev_err(&dev->ibdev, 1110 "chunk[%u] dma_map_failed\n", i); 1111 goto chunk_list_unmap; 1112 } 1113 1114 chunk_list->chunks[i].dma_addr = dma_addr; 1115 ibdev_dbg(&dev->ibdev, 1116 "chunk[%u] mapped at [%pad]\n", i, &dma_addr); 1117 1118 if (!i) 1119 break; 1120 1121 prev_chunk_buf = chunk_list->chunks[i - 1].buf; 1122 1123 ctrl_buf = (struct efa_com_ctrl_buff_info *) 1124 &prev_chunk_buf[EFA_PTRS_PER_CHUNK]; 1125 ctrl_buf->length = chunk_list->chunks[i].length; 1126 1127 efa_com_set_dma_addr(dma_addr, 1128 &ctrl_buf->address.mem_addr_high, 1129 &ctrl_buf->address.mem_addr_low); 1130 } 1131 1132 return 0; 1133 1134 chunk_list_unmap: 1135 for (; i < chunk_list_size; i++) { 1136 dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr, 1137 chunk_list->chunks[i].length, DMA_TO_DEVICE); 1138 } 1139 chunk_list_dealloc: 1140 for (i = 0; i < chunk_list_size; i++) 1141 kfree(chunk_list->chunks[i].buf); 1142 1143 kfree(chunk_list->chunks); 1144 return -ENOMEM; 1145 } 1146 1147 static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl) 1148 { 1149 struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list; 1150 int i; 1151 1152 for (i = 0; i < chunk_list->size; i++) { 1153 dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr, 1154 chunk_list->chunks[i].length, DMA_TO_DEVICE); 1155 kfree(chunk_list->chunks[i].buf); 1156 } 1157 1158 kfree(chunk_list->chunks); 1159 } 1160 1161 /* initialize pbl continuous mode: map pbl buffer to a dma address. */ 1162 static int pbl_continuous_initialize(struct efa_dev *dev, 1163 struct pbl_context *pbl) 1164 { 1165 dma_addr_t dma_addr; 1166 1167 dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf, 1168 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE); 1169 if (dma_mapping_error(&dev->pdev->dev, dma_addr)) { 1170 ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n"); 1171 return -ENOMEM; 1172 } 1173 1174 pbl->phys.continuous.dma_addr = dma_addr; 1175 ibdev_dbg(&dev->ibdev, 1176 "pbl continuous - dma_addr = %pad, size[%u]\n", 1177 &dma_addr, pbl->pbl_buf_size_in_bytes); 1178 1179 return 0; 1180 } 1181 1182 /* 1183 * initialize pbl indirect mode: 1184 * create a chunk list out of the dma addresses of the physical pages of 1185 * pbl buffer. 1186 */ 1187 static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl) 1188 { 1189 u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE); 1190 struct scatterlist *sgl; 1191 int sg_dma_cnt, err; 1192 1193 BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE); 1194 sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages); 1195 if (!sgl) 1196 return -ENOMEM; 1197 1198 sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE); 1199 if (!sg_dma_cnt) { 1200 err = -EINVAL; 1201 goto err_map; 1202 } 1203 1204 pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages; 1205 pbl->phys.indirect.sgl = sgl; 1206 pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt; 1207 err = pbl_chunk_list_create(dev, pbl); 1208 if (err) { 1209 ibdev_dbg(&dev->ibdev, 1210 "chunk_list creation failed[%d]\n", err); 1211 goto err_chunk; 1212 } 1213 1214 ibdev_dbg(&dev->ibdev, 1215 "pbl indirect - size[%u], chunks[%u]\n", 1216 pbl->pbl_buf_size_in_bytes, 1217 pbl->phys.indirect.chunk_list.size); 1218 1219 return 0; 1220 1221 err_chunk: 1222 dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE); 1223 err_map: 1224 kfree(sgl); 1225 return err; 1226 } 1227 1228 static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl) 1229 { 1230 pbl_chunk_list_destroy(dev, pbl); 1231 dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl, 1232 pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE); 1233 kfree(pbl->phys.indirect.sgl); 1234 } 1235 1236 /* create a page buffer list from a mapped user memory region */ 1237 static int pbl_create(struct efa_dev *dev, 1238 struct pbl_context *pbl, 1239 struct ib_umem *umem, 1240 int hp_cnt, 1241 u8 hp_shift) 1242 { 1243 int err; 1244 1245 pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE; 1246 pbl->pbl_buf = kvzalloc(pbl->pbl_buf_size_in_bytes, GFP_KERNEL); 1247 if (!pbl->pbl_buf) 1248 return -ENOMEM; 1249 1250 if (is_vmalloc_addr(pbl->pbl_buf)) { 1251 pbl->physically_continuous = 0; 1252 err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, 1253 hp_shift); 1254 if (err) 1255 goto err_free; 1256 1257 err = pbl_indirect_initialize(dev, pbl); 1258 if (err) 1259 goto err_free; 1260 } else { 1261 pbl->physically_continuous = 1; 1262 err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, 1263 hp_shift); 1264 if (err) 1265 goto err_free; 1266 1267 err = pbl_continuous_initialize(dev, pbl); 1268 if (err) 1269 goto err_free; 1270 } 1271 1272 ibdev_dbg(&dev->ibdev, 1273 "user_pbl_created: user_pages[%u], continuous[%u]\n", 1274 hp_cnt, pbl->physically_continuous); 1275 1276 return 0; 1277 1278 err_free: 1279 kvfree(pbl->pbl_buf); 1280 return err; 1281 } 1282 1283 static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl) 1284 { 1285 if (pbl->physically_continuous) 1286 dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr, 1287 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE); 1288 else 1289 pbl_indirect_terminate(dev, pbl); 1290 1291 kvfree(pbl->pbl_buf); 1292 } 1293 1294 static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr, 1295 struct efa_com_reg_mr_params *params) 1296 { 1297 int err; 1298 1299 params->inline_pbl = 1; 1300 err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array, 1301 params->page_num, params->page_shift); 1302 if (err) 1303 return err; 1304 1305 ibdev_dbg(&dev->ibdev, 1306 "inline_pbl_array - pages[%u]\n", params->page_num); 1307 1308 return 0; 1309 } 1310 1311 static int efa_create_pbl(struct efa_dev *dev, 1312 struct pbl_context *pbl, 1313 struct efa_mr *mr, 1314 struct efa_com_reg_mr_params *params) 1315 { 1316 int err; 1317 1318 err = pbl_create(dev, pbl, mr->umem, params->page_num, 1319 params->page_shift); 1320 if (err) { 1321 ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err); 1322 return err; 1323 } 1324 1325 params->inline_pbl = 0; 1326 params->indirect = !pbl->physically_continuous; 1327 if (pbl->physically_continuous) { 1328 params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes; 1329 1330 efa_com_set_dma_addr(pbl->phys.continuous.dma_addr, 1331 ¶ms->pbl.pbl.address.mem_addr_high, 1332 ¶ms->pbl.pbl.address.mem_addr_low); 1333 } else { 1334 params->pbl.pbl.length = 1335 pbl->phys.indirect.chunk_list.chunks[0].length; 1336 1337 efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr, 1338 ¶ms->pbl.pbl.address.mem_addr_high, 1339 ¶ms->pbl.pbl.address.mem_addr_low); 1340 } 1341 1342 return 0; 1343 } 1344 1345 struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, 1346 u64 virt_addr, int access_flags, 1347 struct ib_udata *udata) 1348 { 1349 struct efa_dev *dev = to_edev(ibpd->device); 1350 struct efa_com_reg_mr_params params = {}; 1351 struct efa_com_reg_mr_result result = {}; 1352 struct pbl_context pbl; 1353 int supp_access_flags; 1354 unsigned int pg_sz; 1355 struct efa_mr *mr; 1356 int inline_size; 1357 int err; 1358 1359 if (udata && udata->inlen && 1360 !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { 1361 ibdev_dbg(&dev->ibdev, 1362 "Incompatible ABI params, udata not cleared\n"); 1363 err = -EINVAL; 1364 goto err_out; 1365 } 1366 1367 supp_access_flags = 1368 IB_ACCESS_LOCAL_WRITE | 1369 (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0); 1370 1371 access_flags &= ~IB_ACCESS_OPTIONAL; 1372 if (access_flags & ~supp_access_flags) { 1373 ibdev_dbg(&dev->ibdev, 1374 "Unsupported access flags[%#x], supported[%#x]\n", 1375 access_flags, supp_access_flags); 1376 err = -EOPNOTSUPP; 1377 goto err_out; 1378 } 1379 1380 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1381 if (!mr) { 1382 err = -ENOMEM; 1383 goto err_out; 1384 } 1385 1386 mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); 1387 if (IS_ERR(mr->umem)) { 1388 err = PTR_ERR(mr->umem); 1389 ibdev_dbg(&dev->ibdev, 1390 "Failed to pin and map user space memory[%d]\n", err); 1391 goto err_free; 1392 } 1393 1394 params.pd = to_epd(ibpd)->pdn; 1395 params.iova = virt_addr; 1396 params.mr_length_in_bytes = length; 1397 params.permissions = access_flags; 1398 1399 pg_sz = ib_umem_find_best_pgsz(mr->umem, 1400 dev->dev_attr.page_size_cap, 1401 virt_addr); 1402 if (!pg_sz) { 1403 err = -EOPNOTSUPP; 1404 ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n", 1405 dev->dev_attr.page_size_cap); 1406 goto err_unmap; 1407 } 1408 1409 params.page_shift = __ffs(pg_sz); 1410 params.page_num = DIV_ROUND_UP(length + (start & (pg_sz - 1)), 1411 pg_sz); 1412 1413 ibdev_dbg(&dev->ibdev, 1414 "start %#llx length %#llx params.page_shift %u params.page_num %u\n", 1415 start, length, params.page_shift, params.page_num); 1416 1417 inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array); 1418 if (params.page_num <= inline_size) { 1419 err = efa_create_inline_pbl(dev, mr, ¶ms); 1420 if (err) 1421 goto err_unmap; 1422 1423 err = efa_com_register_mr(&dev->edev, ¶ms, &result); 1424 if (err) 1425 goto err_unmap; 1426 } else { 1427 err = efa_create_pbl(dev, &pbl, mr, ¶ms); 1428 if (err) 1429 goto err_unmap; 1430 1431 err = efa_com_register_mr(&dev->edev, ¶ms, &result); 1432 pbl_destroy(dev, &pbl); 1433 1434 if (err) 1435 goto err_unmap; 1436 } 1437 1438 mr->ibmr.lkey = result.l_key; 1439 mr->ibmr.rkey = result.r_key; 1440 mr->ibmr.length = length; 1441 ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey); 1442 1443 return &mr->ibmr; 1444 1445 err_unmap: 1446 ib_umem_release(mr->umem); 1447 err_free: 1448 kfree(mr); 1449 err_out: 1450 atomic64_inc(&dev->stats.sw_stats.reg_mr_err); 1451 return ERR_PTR(err); 1452 } 1453 1454 int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 1455 { 1456 struct efa_dev *dev = to_edev(ibmr->device); 1457 struct efa_com_dereg_mr_params params; 1458 struct efa_mr *mr = to_emr(ibmr); 1459 int err; 1460 1461 ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey); 1462 1463 params.l_key = mr->ibmr.lkey; 1464 err = efa_com_dereg_mr(&dev->edev, ¶ms); 1465 if (err) 1466 return err; 1467 1468 ib_umem_release(mr->umem); 1469 kfree(mr); 1470 1471 return 0; 1472 } 1473 1474 int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num, 1475 struct ib_port_immutable *immutable) 1476 { 1477 struct ib_port_attr attr; 1478 int err; 1479 1480 err = ib_query_port(ibdev, port_num, &attr); 1481 if (err) { 1482 ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err); 1483 return err; 1484 } 1485 1486 immutable->pkey_tbl_len = attr.pkey_tbl_len; 1487 immutable->gid_tbl_len = attr.gid_tbl_len; 1488 1489 return 0; 1490 } 1491 1492 static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn) 1493 { 1494 struct efa_com_dealloc_uar_params params = { 1495 .uarn = uarn, 1496 }; 1497 1498 return efa_com_dealloc_uar(&dev->edev, ¶ms); 1499 } 1500 1501 int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) 1502 { 1503 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 1504 struct efa_dev *dev = to_edev(ibucontext->device); 1505 struct efa_ibv_alloc_ucontext_resp resp = {}; 1506 struct efa_com_alloc_uar_result result; 1507 int err; 1508 1509 /* 1510 * it's fine if the driver does not know all request fields, 1511 * we will ack input fields in our response. 1512 */ 1513 1514 err = efa_com_alloc_uar(&dev->edev, &result); 1515 if (err) 1516 goto err_out; 1517 1518 ucontext->uarn = result.uarn; 1519 1520 resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE; 1521 resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH; 1522 resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq; 1523 resp.inline_buf_size = dev->dev_attr.inline_buf_size; 1524 resp.max_llq_size = dev->dev_attr.max_llq_size; 1525 1526 if (udata && udata->outlen) { 1527 err = ib_copy_to_udata(udata, &resp, 1528 min(sizeof(resp), udata->outlen)); 1529 if (err) 1530 goto err_dealloc_uar; 1531 } 1532 1533 return 0; 1534 1535 err_dealloc_uar: 1536 efa_dealloc_uar(dev, result.uarn); 1537 err_out: 1538 atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err); 1539 return err; 1540 } 1541 1542 void efa_dealloc_ucontext(struct ib_ucontext *ibucontext) 1543 { 1544 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 1545 struct efa_dev *dev = to_edev(ibucontext->device); 1546 1547 efa_dealloc_uar(dev, ucontext->uarn); 1548 } 1549 1550 void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry) 1551 { 1552 struct efa_user_mmap_entry *entry = to_emmap(rdma_entry); 1553 1554 kfree(entry); 1555 } 1556 1557 static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, 1558 struct vm_area_struct *vma) 1559 { 1560 struct rdma_user_mmap_entry *rdma_entry; 1561 struct efa_user_mmap_entry *entry; 1562 unsigned long va; 1563 int err = 0; 1564 u64 pfn; 1565 1566 rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma); 1567 if (!rdma_entry) { 1568 ibdev_dbg(&dev->ibdev, 1569 "pgoff[%#lx] does not have valid entry\n", 1570 vma->vm_pgoff); 1571 return -EINVAL; 1572 } 1573 entry = to_emmap(rdma_entry); 1574 1575 ibdev_dbg(&dev->ibdev, 1576 "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n", 1577 entry->address, rdma_entry->npages * PAGE_SIZE, 1578 entry->mmap_flag); 1579 1580 pfn = entry->address >> PAGE_SHIFT; 1581 switch (entry->mmap_flag) { 1582 case EFA_MMAP_IO_NC: 1583 err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, 1584 entry->rdma_entry.npages * PAGE_SIZE, 1585 pgprot_noncached(vma->vm_page_prot), 1586 rdma_entry); 1587 break; 1588 case EFA_MMAP_IO_WC: 1589 err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, 1590 entry->rdma_entry.npages * PAGE_SIZE, 1591 pgprot_writecombine(vma->vm_page_prot), 1592 rdma_entry); 1593 break; 1594 case EFA_MMAP_DMA_PAGE: 1595 for (va = vma->vm_start; va < vma->vm_end; 1596 va += PAGE_SIZE, pfn++) { 1597 err = vm_insert_page(vma, va, pfn_to_page(pfn)); 1598 if (err) 1599 break; 1600 } 1601 break; 1602 default: 1603 err = -EINVAL; 1604 } 1605 1606 if (err) 1607 ibdev_dbg( 1608 &dev->ibdev, 1609 "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", 1610 entry->address, rdma_entry->npages * PAGE_SIZE, 1611 entry->mmap_flag, err); 1612 1613 rdma_user_mmap_entry_put(rdma_entry); 1614 return err; 1615 } 1616 1617 int efa_mmap(struct ib_ucontext *ibucontext, 1618 struct vm_area_struct *vma) 1619 { 1620 struct efa_ucontext *ucontext = to_eucontext(ibucontext); 1621 struct efa_dev *dev = to_edev(ibucontext->device); 1622 size_t length = vma->vm_end - vma->vm_start; 1623 1624 ibdev_dbg(&dev->ibdev, 1625 "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n", 1626 vma->vm_start, vma->vm_end, length, vma->vm_pgoff); 1627 1628 return __efa_mmap(dev, ucontext, vma); 1629 } 1630 1631 static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) 1632 { 1633 struct efa_com_destroy_ah_params params = { 1634 .ah = ah->ah, 1635 .pdn = to_epd(ah->ibah.pd)->pdn, 1636 }; 1637 1638 return efa_com_destroy_ah(&dev->edev, ¶ms); 1639 } 1640 1641 int efa_create_ah(struct ib_ah *ibah, 1642 struct rdma_ah_attr *ah_attr, 1643 u32 flags, 1644 struct ib_udata *udata) 1645 { 1646 struct efa_dev *dev = to_edev(ibah->device); 1647 struct efa_com_create_ah_params params = {}; 1648 struct efa_ibv_create_ah_resp resp = {}; 1649 struct efa_com_create_ah_result result; 1650 struct efa_ah *ah = to_eah(ibah); 1651 int err; 1652 1653 if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) { 1654 ibdev_dbg(&dev->ibdev, 1655 "Create address handle is not supported in atomic context\n"); 1656 err = -EOPNOTSUPP; 1657 goto err_out; 1658 } 1659 1660 if (udata->inlen && 1661 !ib_is_udata_cleared(udata, 0, udata->inlen)) { 1662 ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); 1663 err = -EINVAL; 1664 goto err_out; 1665 } 1666 1667 memcpy(params.dest_addr, ah_attr->grh.dgid.raw, 1668 sizeof(params.dest_addr)); 1669 params.pdn = to_epd(ibah->pd)->pdn; 1670 err = efa_com_create_ah(&dev->edev, ¶ms, &result); 1671 if (err) 1672 goto err_out; 1673 1674 memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id)); 1675 ah->ah = result.ah; 1676 1677 resp.efa_address_handle = result.ah; 1678 1679 if (udata->outlen) { 1680 err = ib_copy_to_udata(udata, &resp, 1681 min(sizeof(resp), udata->outlen)); 1682 if (err) { 1683 ibdev_dbg(&dev->ibdev, 1684 "Failed to copy udata for create_ah response\n"); 1685 goto err_destroy_ah; 1686 } 1687 } 1688 ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah); 1689 1690 return 0; 1691 1692 err_destroy_ah: 1693 efa_ah_destroy(dev, ah); 1694 err_out: 1695 atomic64_inc(&dev->stats.sw_stats.create_ah_err); 1696 return err; 1697 } 1698 1699 void efa_destroy_ah(struct ib_ah *ibah, u32 flags) 1700 { 1701 struct efa_dev *dev = to_edev(ibah->pd->device); 1702 struct efa_ah *ah = to_eah(ibah); 1703 1704 ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah); 1705 1706 if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) { 1707 ibdev_dbg(&dev->ibdev, 1708 "Destroy address handle is not supported in atomic context\n"); 1709 return; 1710 } 1711 1712 efa_ah_destroy(dev, ah); 1713 } 1714 1715 struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num) 1716 { 1717 return rdma_alloc_hw_stats_struct(efa_stats_names, 1718 ARRAY_SIZE(efa_stats_names), 1719 RDMA_HW_STATS_DEFAULT_LIFESPAN); 1720 } 1721 1722 int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, 1723 u8 port_num, int index) 1724 { 1725 struct efa_com_get_stats_params params = {}; 1726 union efa_com_get_stats_result result; 1727 struct efa_dev *dev = to_edev(ibdev); 1728 struct efa_com_basic_stats *bs; 1729 struct efa_com_stats_admin *as; 1730 struct efa_stats *s; 1731 int err; 1732 1733 params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC; 1734 params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL; 1735 1736 err = efa_com_get_stats(&dev->edev, ¶ms, &result); 1737 if (err) 1738 return err; 1739 1740 bs = &result.basic_stats; 1741 stats->value[EFA_TX_BYTES] = bs->tx_bytes; 1742 stats->value[EFA_TX_PKTS] = bs->tx_pkts; 1743 stats->value[EFA_RX_BYTES] = bs->rx_bytes; 1744 stats->value[EFA_RX_PKTS] = bs->rx_pkts; 1745 stats->value[EFA_RX_DROPS] = bs->rx_drops; 1746 1747 as = &dev->edev.aq.stats; 1748 stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd); 1749 stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd); 1750 stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion); 1751 1752 s = &dev->stats; 1753 stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd); 1754 stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err); 1755 stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err); 1756 stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err); 1757 stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err); 1758 stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err); 1759 1760 return ARRAY_SIZE(efa_stats_names); 1761 } 1762 1763 enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, 1764 u8 port_num) 1765 { 1766 return IB_LINK_LAYER_UNSPECIFIED; 1767 } 1768 1769