1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ 4 /* Kai Shen <kaishen@linux.alibaba.com> */ 5 /* Copyright (c) 2020-2022, Alibaba Group. */ 6 7 #include <linux/errno.h> 8 #include <linux/init.h> 9 #include <linux/kernel.h> 10 #include <linux/list.h> 11 #include <linux/module.h> 12 #include <linux/netdevice.h> 13 #include <linux/pci.h> 14 #include <net/addrconf.h> 15 #include <rdma/erdma-abi.h> 16 #include <rdma/ib_verbs.h> 17 #include <rdma/ib_user_verbs.h> 18 19 #include "erdma.h" 20 #include "erdma_cm.h" 21 #include "erdma_hw.h" 22 #include "erdma_verbs.h" 23 24 MODULE_AUTHOR("Cheng Xu <chengyou@linux.alibaba.com>"); 25 MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver"); 26 MODULE_LICENSE("Dual BSD/GPL"); 27 28 static int erdma_netdev_event(struct notifier_block *nb, unsigned long event, 29 void *arg) 30 { 31 struct net_device *netdev = netdev_notifier_info_to_dev(arg); 32 struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb); 33 34 if (dev->netdev == NULL || dev->netdev != netdev) 35 goto done; 36 37 switch (event) { 38 case NETDEV_UP: 39 dev->state = IB_PORT_ACTIVE; 40 erdma_port_event(dev, IB_EVENT_PORT_ACTIVE); 41 break; 42 case NETDEV_DOWN: 43 dev->state = IB_PORT_DOWN; 44 erdma_port_event(dev, IB_EVENT_PORT_ERR); 45 break; 46 case NETDEV_REGISTER: 47 case NETDEV_UNREGISTER: 48 case NETDEV_CHANGEADDR: 49 case NETDEV_CHANGEMTU: 50 case NETDEV_GOING_DOWN: 51 case NETDEV_CHANGE: 52 default: 53 break; 54 } 55 56 done: 57 return NOTIFY_OK; 58 } 59 60 static int erdma_enum_and_get_netdev(struct erdma_dev *dev) 61 { 62 struct net_device *netdev; 63 int ret = -ENODEV; 64 65 /* Already binded to a net_device, so we skip. */ 66 if (dev->netdev) 67 return 0; 68 69 rtnl_lock(); 70 for_each_netdev(&init_net, netdev) { 71 /* 72 * In erdma, the paired netdev and ibdev should have the same 73 * MAC address. erdma can get the value from its PCIe bar 74 * registers. Since erdma can not get the paired netdev 75 * reference directly, we do a traverse here to get the paired 76 * netdev. 77 */ 78 if (ether_addr_equal_unaligned(netdev->perm_addr, 79 dev->attrs.peer_addr)) { 80 ret = ib_device_set_netdev(&dev->ibdev, netdev, 1); 81 if (ret) { 82 rtnl_unlock(); 83 ibdev_warn(&dev->ibdev, 84 "failed (%d) to link netdev", ret); 85 return ret; 86 } 87 88 dev->netdev = netdev; 89 break; 90 } 91 } 92 93 rtnl_unlock(); 94 95 return ret; 96 } 97 98 static int erdma_device_register(struct erdma_dev *dev) 99 { 100 struct ib_device *ibdev = &dev->ibdev; 101 int ret; 102 103 ret = erdma_enum_and_get_netdev(dev); 104 if (ret) 105 return ret; 106 107 addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr); 108 109 ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev); 110 if (ret) { 111 dev_err(&dev->pdev->dev, 112 "ib_register_device failed: ret = %d\n", ret); 113 return ret; 114 } 115 116 dev->netdev_nb.notifier_call = erdma_netdev_event; 117 ret = register_netdevice_notifier(&dev->netdev_nb); 118 if (ret) { 119 ibdev_err(&dev->ibdev, "failed to register notifier.\n"); 120 ib_unregister_device(ibdev); 121 } 122 123 return ret; 124 } 125 126 static irqreturn_t erdma_comm_irq_handler(int irq, void *data) 127 { 128 struct erdma_dev *dev = data; 129 130 erdma_cmdq_completion_handler(&dev->cmdq); 131 erdma_aeq_event_handler(dev); 132 133 return IRQ_HANDLED; 134 } 135 136 static void erdma_dwqe_resource_init(struct erdma_dev *dev) 137 { 138 int total_pages, type0, type1; 139 140 dev->attrs.grp_num = erdma_reg_read32(dev, ERDMA_REGS_GRP_NUM_REG); 141 142 if (dev->attrs.grp_num < 4) 143 dev->attrs.disable_dwqe = true; 144 else 145 dev->attrs.disable_dwqe = false; 146 147 /* One page contains 4 goups. */ 148 total_pages = dev->attrs.grp_num * 4; 149 150 if (dev->attrs.grp_num >= ERDMA_DWQE_MAX_GRP_CNT) { 151 dev->attrs.grp_num = ERDMA_DWQE_MAX_GRP_CNT; 152 type0 = ERDMA_DWQE_TYPE0_CNT; 153 type1 = ERDMA_DWQE_TYPE1_CNT / ERDMA_DWQE_TYPE1_CNT_PER_PAGE; 154 } else { 155 type1 = total_pages / 3; 156 type0 = total_pages - type1 - 1; 157 } 158 159 dev->attrs.dwqe_pages = type0; 160 dev->attrs.dwqe_entries = type1 * ERDMA_DWQE_TYPE1_CNT_PER_PAGE; 161 } 162 163 static int erdma_request_vectors(struct erdma_dev *dev) 164 { 165 int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC); 166 int ret; 167 168 ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX); 169 if (ret < 0) { 170 dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n", 171 ret); 172 return ret; 173 } 174 dev->attrs.irq_num = ret; 175 176 return 0; 177 } 178 179 static int erdma_comm_irq_init(struct erdma_dev *dev) 180 { 181 snprintf(dev->comm_irq.name, ERDMA_IRQNAME_SIZE, "erdma-common@pci:%s", 182 pci_name(dev->pdev)); 183 dev->comm_irq.msix_vector = 184 pci_irq_vector(dev->pdev, ERDMA_MSIX_VECTOR_CMDQ); 185 186 cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev->pdev->bus)), 187 &dev->comm_irq.affinity_hint_mask); 188 irq_set_affinity_hint(dev->comm_irq.msix_vector, 189 &dev->comm_irq.affinity_hint_mask); 190 191 return request_irq(dev->comm_irq.msix_vector, erdma_comm_irq_handler, 0, 192 dev->comm_irq.name, dev); 193 } 194 195 static void erdma_comm_irq_uninit(struct erdma_dev *dev) 196 { 197 irq_set_affinity_hint(dev->comm_irq.msix_vector, NULL); 198 free_irq(dev->comm_irq.msix_vector, dev); 199 } 200 201 static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev) 202 { 203 int ret; 204 205 erdma_dwqe_resource_init(dev); 206 207 ret = dma_set_mask_and_coherent(&pdev->dev, 208 DMA_BIT_MASK(ERDMA_PCI_WIDTH)); 209 if (ret) 210 return ret; 211 212 dma_set_max_seg_size(&pdev->dev, UINT_MAX); 213 214 return 0; 215 } 216 217 static void erdma_device_uninit(struct erdma_dev *dev) 218 { 219 u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1); 220 221 erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl); 222 } 223 224 static const struct pci_device_id erdma_pci_tbl[] = { 225 { PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) }, 226 {} 227 }; 228 229 static int erdma_probe_dev(struct pci_dev *pdev) 230 { 231 struct erdma_dev *dev; 232 int bars, err; 233 u32 version; 234 235 err = pci_enable_device(pdev); 236 if (err) { 237 dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err); 238 return err; 239 } 240 241 pci_set_master(pdev); 242 243 dev = ib_alloc_device(erdma_dev, ibdev); 244 if (!dev) { 245 dev_err(&pdev->dev, "ib_alloc_device failed\n"); 246 err = -ENOMEM; 247 goto err_disable_device; 248 } 249 250 pci_set_drvdata(pdev, dev); 251 dev->pdev = pdev; 252 dev->attrs.numa_node = dev_to_node(&pdev->dev); 253 254 bars = pci_select_bars(pdev, IORESOURCE_MEM); 255 err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); 256 if (bars != ERDMA_BAR_MASK || err) { 257 err = err ? err : -EINVAL; 258 goto err_ib_device_release; 259 } 260 261 dev->func_bar_addr = pci_resource_start(pdev, ERDMA_FUNC_BAR); 262 dev->func_bar_len = pci_resource_len(pdev, ERDMA_FUNC_BAR); 263 264 dev->func_bar = 265 devm_ioremap(&pdev->dev, dev->func_bar_addr, dev->func_bar_len); 266 if (!dev->func_bar) { 267 dev_err(&pdev->dev, "devm_ioremap failed.\n"); 268 err = -EFAULT; 269 goto err_release_bars; 270 } 271 272 version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG); 273 if (version == 0) { 274 /* we knows that it is a non-functional function. */ 275 err = -ENODEV; 276 goto err_iounmap_func_bar; 277 } 278 279 err = erdma_device_init(dev, pdev); 280 if (err) 281 goto err_iounmap_func_bar; 282 283 err = erdma_request_vectors(dev); 284 if (err) 285 goto err_iounmap_func_bar; 286 287 err = erdma_comm_irq_init(dev); 288 if (err) 289 goto err_free_vectors; 290 291 err = erdma_aeq_init(dev); 292 if (err) 293 goto err_uninit_comm_irq; 294 295 err = erdma_cmdq_init(dev); 296 if (err) 297 goto err_uninit_aeq; 298 299 err = erdma_ceqs_init(dev); 300 if (err) 301 goto err_uninit_cmdq; 302 303 erdma_finish_cmdq_init(dev); 304 305 return 0; 306 307 err_uninit_cmdq: 308 erdma_device_uninit(dev); 309 erdma_cmdq_destroy(dev); 310 311 err_uninit_aeq: 312 erdma_aeq_destroy(dev); 313 314 err_uninit_comm_irq: 315 erdma_comm_irq_uninit(dev); 316 317 err_free_vectors: 318 pci_free_irq_vectors(dev->pdev); 319 320 err_iounmap_func_bar: 321 devm_iounmap(&pdev->dev, dev->func_bar); 322 323 err_release_bars: 324 pci_release_selected_regions(pdev, bars); 325 326 err_ib_device_release: 327 ib_dealloc_device(&dev->ibdev); 328 329 err_disable_device: 330 pci_disable_device(pdev); 331 332 return err; 333 } 334 335 static void erdma_remove_dev(struct pci_dev *pdev) 336 { 337 struct erdma_dev *dev = pci_get_drvdata(pdev); 338 339 erdma_ceqs_uninit(dev); 340 341 erdma_device_uninit(dev); 342 343 erdma_cmdq_destroy(dev); 344 erdma_aeq_destroy(dev); 345 erdma_comm_irq_uninit(dev); 346 pci_free_irq_vectors(dev->pdev); 347 348 devm_iounmap(&pdev->dev, dev->func_bar); 349 pci_release_selected_regions(pdev, ERDMA_BAR_MASK); 350 351 ib_dealloc_device(&dev->ibdev); 352 353 pci_disable_device(pdev); 354 } 355 356 #define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap) 357 358 static int erdma_dev_attrs_init(struct erdma_dev *dev) 359 { 360 int err; 361 u64 req_hdr, cap0, cap1; 362 363 erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA, 364 CMDQ_OPCODE_QUERY_DEVICE); 365 366 err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0, 367 &cap1); 368 if (err) 369 return err; 370 371 dev->attrs.max_cqe = 1 << ERDMA_GET_CAP(MAX_CQE, cap0); 372 dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0); 373 dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1); 374 dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0); 375 dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1); 376 dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1); 377 dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1); 378 dev->attrs.max_mr = dev->attrs.max_qp << 1; 379 dev->attrs.max_cq = dev->attrs.max_qp << 1; 380 381 dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR; 382 dev->attrs.max_ord = ERDMA_MAX_ORD; 383 dev->attrs.max_ird = ERDMA_MAX_IRD; 384 dev->attrs.max_send_sge = ERDMA_MAX_SEND_SGE; 385 dev->attrs.max_recv_sge = ERDMA_MAX_RECV_SGE; 386 dev->attrs.max_sge_rd = ERDMA_MAX_SGE_RD; 387 dev->attrs.max_pd = ERDMA_MAX_PD; 388 389 dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD; 390 dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr; 391 392 erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON, 393 CMDQ_OPCODE_QUERY_FW_INFO); 394 395 err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0, 396 &cap1); 397 if (!err) 398 dev->attrs.fw_version = 399 FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0); 400 401 return err; 402 } 403 404 static int erdma_res_cb_init(struct erdma_dev *dev) 405 { 406 int i, j; 407 408 for (i = 0; i < ERDMA_RES_CNT; i++) { 409 dev->res_cb[i].next_alloc_idx = 1; 410 spin_lock_init(&dev->res_cb[i].lock); 411 dev->res_cb[i].bitmap = 412 bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL); 413 if (!dev->res_cb[i].bitmap) 414 goto err; 415 } 416 417 return 0; 418 419 err: 420 for (j = 0; j < i; j++) 421 bitmap_free(dev->res_cb[j].bitmap); 422 423 return -ENOMEM; 424 } 425 426 static void erdma_res_cb_free(struct erdma_dev *dev) 427 { 428 int i; 429 430 for (i = 0; i < ERDMA_RES_CNT; i++) 431 bitmap_free(dev->res_cb[i].bitmap); 432 } 433 434 static const struct ib_device_ops erdma_device_ops = { 435 .owner = THIS_MODULE, 436 .driver_id = RDMA_DRIVER_ERDMA, 437 .uverbs_abi_ver = ERDMA_ABI_VERSION, 438 439 .alloc_mr = erdma_ib_alloc_mr, 440 .alloc_pd = erdma_alloc_pd, 441 .alloc_ucontext = erdma_alloc_ucontext, 442 .create_cq = erdma_create_cq, 443 .create_qp = erdma_create_qp, 444 .dealloc_pd = erdma_dealloc_pd, 445 .dealloc_ucontext = erdma_dealloc_ucontext, 446 .dereg_mr = erdma_dereg_mr, 447 .destroy_cq = erdma_destroy_cq, 448 .destroy_qp = erdma_destroy_qp, 449 .get_dma_mr = erdma_get_dma_mr, 450 .get_port_immutable = erdma_get_port_immutable, 451 .iw_accept = erdma_accept, 452 .iw_add_ref = erdma_qp_get_ref, 453 .iw_connect = erdma_connect, 454 .iw_create_listen = erdma_create_listen, 455 .iw_destroy_listen = erdma_destroy_listen, 456 .iw_get_qp = erdma_get_ibqp, 457 .iw_reject = erdma_reject, 458 .iw_rem_ref = erdma_qp_put_ref, 459 .map_mr_sg = erdma_map_mr_sg, 460 .mmap = erdma_mmap, 461 .mmap_free = erdma_mmap_free, 462 .modify_qp = erdma_modify_qp, 463 .post_recv = erdma_post_recv, 464 .post_send = erdma_post_send, 465 .poll_cq = erdma_poll_cq, 466 .query_device = erdma_query_device, 467 .query_gid = erdma_query_gid, 468 .query_port = erdma_query_port, 469 .query_qp = erdma_query_qp, 470 .req_notify_cq = erdma_req_notify_cq, 471 .reg_user_mr = erdma_reg_user_mr, 472 473 INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq), 474 INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd), 475 INIT_RDMA_OBJ_SIZE(ib_ucontext, erdma_ucontext, ibucontext), 476 INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp), 477 }; 478 479 static int erdma_ib_device_add(struct pci_dev *pdev) 480 { 481 struct erdma_dev *dev = pci_get_drvdata(pdev); 482 struct ib_device *ibdev = &dev->ibdev; 483 u64 mac; 484 int ret; 485 486 ret = erdma_dev_attrs_init(dev); 487 if (ret) 488 return ret; 489 490 ibdev->node_type = RDMA_NODE_RNIC; 491 memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC)); 492 493 /* 494 * Current model (one-to-one device association): 495 * One ERDMA device per net_device or, equivalently, 496 * per physical port. 497 */ 498 ibdev->phys_port_cnt = 1; 499 ibdev->num_comp_vectors = dev->attrs.irq_num - 1; 500 501 ib_set_device_ops(ibdev, &erdma_device_ops); 502 503 INIT_LIST_HEAD(&dev->cep_list); 504 505 spin_lock_init(&dev->lock); 506 xa_init_flags(&dev->qp_xa, XA_FLAGS_ALLOC1); 507 xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1); 508 dev->next_alloc_cqn = 1; 509 dev->next_alloc_qpn = 1; 510 511 ret = erdma_res_cb_init(dev); 512 if (ret) 513 return ret; 514 515 spin_lock_init(&dev->db_bitmap_lock); 516 bitmap_zero(dev->sdb_page, ERDMA_DWQE_TYPE0_CNT); 517 bitmap_zero(dev->sdb_entry, ERDMA_DWQE_TYPE1_CNT); 518 519 atomic_set(&dev->num_ctx, 0); 520 521 mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG); 522 mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32; 523 524 u64_to_ether_addr(mac, dev->attrs.peer_addr); 525 526 ret = erdma_device_register(dev); 527 if (ret) 528 goto err_out; 529 530 return 0; 531 532 err_out: 533 xa_destroy(&dev->qp_xa); 534 xa_destroy(&dev->cq_xa); 535 536 erdma_res_cb_free(dev); 537 538 return ret; 539 } 540 541 static void erdma_ib_device_remove(struct pci_dev *pdev) 542 { 543 struct erdma_dev *dev = pci_get_drvdata(pdev); 544 545 unregister_netdevice_notifier(&dev->netdev_nb); 546 ib_unregister_device(&dev->ibdev); 547 548 erdma_res_cb_free(dev); 549 xa_destroy(&dev->qp_xa); 550 xa_destroy(&dev->cq_xa); 551 } 552 553 static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 554 { 555 int ret; 556 557 ret = erdma_probe_dev(pdev); 558 if (ret) 559 return ret; 560 561 ret = erdma_ib_device_add(pdev); 562 if (ret) { 563 erdma_remove_dev(pdev); 564 return ret; 565 } 566 567 return 0; 568 } 569 570 static void erdma_remove(struct pci_dev *pdev) 571 { 572 erdma_ib_device_remove(pdev); 573 erdma_remove_dev(pdev); 574 } 575 576 static struct pci_driver erdma_pci_driver = { 577 .name = DRV_MODULE_NAME, 578 .id_table = erdma_pci_tbl, 579 .probe = erdma_probe, 580 .remove = erdma_remove 581 }; 582 583 MODULE_DEVICE_TABLE(pci, erdma_pci_tbl); 584 585 static __init int erdma_init_module(void) 586 { 587 int ret; 588 589 ret = erdma_cm_init(); 590 if (ret) 591 return ret; 592 593 ret = pci_register_driver(&erdma_pci_driver); 594 if (ret) 595 erdma_cm_exit(); 596 597 return ret; 598 } 599 600 static void __exit erdma_exit_module(void) 601 { 602 pci_unregister_driver(&erdma_pci_driver); 603 604 erdma_cm_exit(); 605 } 606 607 module_init(erdma_init_module); 608 module_exit(erdma_exit_module); 609