1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /* Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6
7 #include <linux/module.h>
8 #include <net/addrconf.h>
9 #include <rdma/erdma-abi.h>
10
11 #include "erdma.h"
12 #include "erdma_cm.h"
13 #include "erdma_verbs.h"
14
15 MODULE_AUTHOR("Cheng Xu <chengyou@linux.alibaba.com>");
16 MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver");
17 MODULE_LICENSE("Dual BSD/GPL");
18
erdma_netdev_event(struct notifier_block * nb,unsigned long event,void * arg)19 static int erdma_netdev_event(struct notifier_block *nb, unsigned long event,
20 void *arg)
21 {
22 struct net_device *netdev = netdev_notifier_info_to_dev(arg);
23 struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb);
24
25 if (dev->netdev == NULL || dev->netdev != netdev)
26 goto done;
27
28 switch (event) {
29 case NETDEV_UP:
30 dev->state = IB_PORT_ACTIVE;
31 erdma_port_event(dev, IB_EVENT_PORT_ACTIVE);
32 break;
33 case NETDEV_DOWN:
34 dev->state = IB_PORT_DOWN;
35 erdma_port_event(dev, IB_EVENT_PORT_ERR);
36 break;
37 case NETDEV_CHANGEMTU:
38 if (dev->mtu != netdev->mtu) {
39 erdma_set_mtu(dev, netdev->mtu);
40 dev->mtu = netdev->mtu;
41 }
42 break;
43 case NETDEV_REGISTER:
44 case NETDEV_UNREGISTER:
45 case NETDEV_CHANGEADDR:
46 case NETDEV_GOING_DOWN:
47 case NETDEV_CHANGE:
48 default:
49 break;
50 }
51
52 done:
53 return NOTIFY_OK;
54 }
55
erdma_enum_and_get_netdev(struct erdma_dev * dev)56 static int erdma_enum_and_get_netdev(struct erdma_dev *dev)
57 {
58 struct net_device *netdev;
59 int ret = -EPROBE_DEFER;
60
61 /* Already binded to a net_device, so we skip. */
62 if (dev->netdev)
63 return 0;
64
65 rtnl_lock();
66 for_each_netdev(&init_net, netdev) {
67 /*
68 * In erdma, the paired netdev and ibdev should have the same
69 * MAC address. erdma can get the value from its PCIe bar
70 * registers. Since erdma can not get the paired netdev
71 * reference directly, we do a traverse here to get the paired
72 * netdev.
73 */
74 if (ether_addr_equal_unaligned(netdev->perm_addr,
75 dev->attrs.peer_addr)) {
76 ret = ib_device_set_netdev(&dev->ibdev, netdev, 1);
77 if (ret) {
78 rtnl_unlock();
79 ibdev_warn(&dev->ibdev,
80 "failed (%d) to link netdev", ret);
81 return ret;
82 }
83
84 dev->netdev = netdev;
85 break;
86 }
87 }
88
89 rtnl_unlock();
90
91 return ret;
92 }
93
erdma_device_register(struct erdma_dev * dev)94 static int erdma_device_register(struct erdma_dev *dev)
95 {
96 struct ib_device *ibdev = &dev->ibdev;
97 int ret;
98
99 ret = erdma_enum_and_get_netdev(dev);
100 if (ret)
101 return ret;
102
103 dev->mtu = dev->netdev->mtu;
104 addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr);
105
106 ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev);
107 if (ret) {
108 dev_err(&dev->pdev->dev,
109 "ib_register_device failed: ret = %d\n", ret);
110 return ret;
111 }
112
113 dev->netdev_nb.notifier_call = erdma_netdev_event;
114 ret = register_netdevice_notifier(&dev->netdev_nb);
115 if (ret) {
116 ibdev_err(&dev->ibdev, "failed to register notifier.\n");
117 ib_unregister_device(ibdev);
118 }
119
120 return ret;
121 }
122
erdma_comm_irq_handler(int irq,void * data)123 static irqreturn_t erdma_comm_irq_handler(int irq, void *data)
124 {
125 struct erdma_dev *dev = data;
126
127 erdma_cmdq_completion_handler(&dev->cmdq);
128 erdma_aeq_event_handler(dev);
129
130 return IRQ_HANDLED;
131 }
132
erdma_request_vectors(struct erdma_dev * dev)133 static int erdma_request_vectors(struct erdma_dev *dev)
134 {
135 int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC);
136 int ret;
137
138 ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX);
139 if (ret < 0) {
140 dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n",
141 ret);
142 return ret;
143 }
144 dev->attrs.irq_num = ret;
145
146 return 0;
147 }
148
erdma_comm_irq_init(struct erdma_dev * dev)149 static int erdma_comm_irq_init(struct erdma_dev *dev)
150 {
151 snprintf(dev->comm_irq.name, ERDMA_IRQNAME_SIZE, "erdma-common@pci:%s",
152 pci_name(dev->pdev));
153 dev->comm_irq.msix_vector =
154 pci_irq_vector(dev->pdev, ERDMA_MSIX_VECTOR_CMDQ);
155
156 cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev->pdev->bus)),
157 &dev->comm_irq.affinity_hint_mask);
158 irq_set_affinity_hint(dev->comm_irq.msix_vector,
159 &dev->comm_irq.affinity_hint_mask);
160
161 return request_irq(dev->comm_irq.msix_vector, erdma_comm_irq_handler, 0,
162 dev->comm_irq.name, dev);
163 }
164
erdma_comm_irq_uninit(struct erdma_dev * dev)165 static void erdma_comm_irq_uninit(struct erdma_dev *dev)
166 {
167 irq_set_affinity_hint(dev->comm_irq.msix_vector, NULL);
168 free_irq(dev->comm_irq.msix_vector, dev);
169 }
170
erdma_device_init(struct erdma_dev * dev,struct pci_dev * pdev)171 static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
172 {
173 int ret;
174
175 ret = dma_set_mask_and_coherent(&pdev->dev,
176 DMA_BIT_MASK(ERDMA_PCI_WIDTH));
177 if (ret)
178 return ret;
179
180 dma_set_max_seg_size(&pdev->dev, UINT_MAX);
181
182 return 0;
183 }
184
erdma_hw_reset(struct erdma_dev * dev)185 static void erdma_hw_reset(struct erdma_dev *dev)
186 {
187 u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1);
188
189 erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
190 }
191
erdma_wait_hw_init_done(struct erdma_dev * dev)192 static int erdma_wait_hw_init_done(struct erdma_dev *dev)
193 {
194 int i;
195
196 erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG,
197 FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1));
198
199 for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) {
200 if (erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
201 ERDMA_REG_DEV_ST_INIT_DONE_MASK))
202 break;
203
204 msleep(ERDMA_REG_ACCESS_WAIT_MS);
205 }
206
207 if (i == ERDMA_WAIT_DEV_DONE_CNT) {
208 dev_err(&dev->pdev->dev, "wait init done failed.\n");
209 return -ETIMEDOUT;
210 }
211
212 return 0;
213 }
214
215 static const struct pci_device_id erdma_pci_tbl[] = {
216 { PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) },
217 {}
218 };
219
erdma_probe_dev(struct pci_dev * pdev)220 static int erdma_probe_dev(struct pci_dev *pdev)
221 {
222 struct erdma_dev *dev;
223 int bars, err;
224 u32 version;
225
226 err = pci_enable_device(pdev);
227 if (err) {
228 dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err);
229 return err;
230 }
231
232 pci_set_master(pdev);
233
234 dev = ib_alloc_device(erdma_dev, ibdev);
235 if (!dev) {
236 dev_err(&pdev->dev, "ib_alloc_device failed\n");
237 err = -ENOMEM;
238 goto err_disable_device;
239 }
240
241 pci_set_drvdata(pdev, dev);
242 dev->pdev = pdev;
243 dev->attrs.numa_node = dev_to_node(&pdev->dev);
244
245 bars = pci_select_bars(pdev, IORESOURCE_MEM);
246 err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
247 if (bars != ERDMA_BAR_MASK || err) {
248 err = err ? err : -EINVAL;
249 goto err_ib_device_release;
250 }
251
252 dev->func_bar_addr = pci_resource_start(pdev, ERDMA_FUNC_BAR);
253 dev->func_bar_len = pci_resource_len(pdev, ERDMA_FUNC_BAR);
254
255 dev->func_bar =
256 devm_ioremap(&pdev->dev, dev->func_bar_addr, dev->func_bar_len);
257 if (!dev->func_bar) {
258 dev_err(&pdev->dev, "devm_ioremap failed.\n");
259 err = -EFAULT;
260 goto err_release_bars;
261 }
262
263 version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
264 if (version == 0) {
265 /* we knows that it is a non-functional function. */
266 err = -ENODEV;
267 goto err_iounmap_func_bar;
268 }
269
270 err = erdma_device_init(dev, pdev);
271 if (err)
272 goto err_iounmap_func_bar;
273
274 err = erdma_request_vectors(dev);
275 if (err)
276 goto err_iounmap_func_bar;
277
278 err = erdma_comm_irq_init(dev);
279 if (err)
280 goto err_free_vectors;
281
282 err = erdma_aeq_init(dev);
283 if (err)
284 goto err_uninit_comm_irq;
285
286 err = erdma_cmdq_init(dev);
287 if (err)
288 goto err_uninit_aeq;
289
290 err = erdma_wait_hw_init_done(dev);
291 if (err)
292 goto err_uninit_cmdq;
293
294 err = erdma_ceqs_init(dev);
295 if (err)
296 goto err_reset_hw;
297
298 erdma_finish_cmdq_init(dev);
299
300 return 0;
301
302 err_reset_hw:
303 erdma_hw_reset(dev);
304
305 err_uninit_cmdq:
306 erdma_cmdq_destroy(dev);
307
308 err_uninit_aeq:
309 erdma_aeq_destroy(dev);
310
311 err_uninit_comm_irq:
312 erdma_comm_irq_uninit(dev);
313
314 err_free_vectors:
315 pci_free_irq_vectors(dev->pdev);
316
317 err_iounmap_func_bar:
318 devm_iounmap(&pdev->dev, dev->func_bar);
319
320 err_release_bars:
321 pci_release_selected_regions(pdev, bars);
322
323 err_ib_device_release:
324 ib_dealloc_device(&dev->ibdev);
325
326 err_disable_device:
327 pci_disable_device(pdev);
328
329 return err;
330 }
331
erdma_remove_dev(struct pci_dev * pdev)332 static void erdma_remove_dev(struct pci_dev *pdev)
333 {
334 struct erdma_dev *dev = pci_get_drvdata(pdev);
335
336 erdma_ceqs_uninit(dev);
337 erdma_hw_reset(dev);
338 erdma_cmdq_destroy(dev);
339 erdma_aeq_destroy(dev);
340 erdma_comm_irq_uninit(dev);
341 pci_free_irq_vectors(dev->pdev);
342
343 devm_iounmap(&pdev->dev, dev->func_bar);
344 pci_release_selected_regions(pdev, ERDMA_BAR_MASK);
345
346 ib_dealloc_device(&dev->ibdev);
347
348 pci_disable_device(pdev);
349 }
350
351 #define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap)
352
erdma_dev_attrs_init(struct erdma_dev * dev)353 static int erdma_dev_attrs_init(struct erdma_dev *dev)
354 {
355 int err;
356 u64 req_hdr, cap0, cap1;
357
358 erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA,
359 CMDQ_OPCODE_QUERY_DEVICE);
360
361 err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
362 &cap1);
363 if (err)
364 return err;
365
366 dev->attrs.max_cqe = 1 << ERDMA_GET_CAP(MAX_CQE, cap0);
367 dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0);
368 dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1);
369 dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0);
370 dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1);
371 dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1);
372 dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
373 dev->attrs.max_mr = dev->attrs.max_qp << 1;
374 dev->attrs.max_cq = dev->attrs.max_qp << 1;
375 dev->attrs.cap_flags = ERDMA_GET_CAP(FLAGS, cap0);
376
377 dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR;
378 dev->attrs.max_ord = ERDMA_MAX_ORD;
379 dev->attrs.max_ird = ERDMA_MAX_IRD;
380 dev->attrs.max_send_sge = ERDMA_MAX_SEND_SGE;
381 dev->attrs.max_recv_sge = ERDMA_MAX_RECV_SGE;
382 dev->attrs.max_sge_rd = ERDMA_MAX_SGE_RD;
383 dev->attrs.max_pd = ERDMA_MAX_PD;
384
385 dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD;
386 dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr;
387
388 erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON,
389 CMDQ_OPCODE_QUERY_FW_INFO);
390
391 err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
392 &cap1);
393 if (!err)
394 dev->attrs.fw_version =
395 FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0);
396
397 return err;
398 }
399
erdma_device_config(struct erdma_dev * dev)400 static int erdma_device_config(struct erdma_dev *dev)
401 {
402 struct erdma_cmdq_config_device_req req = {};
403
404 if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_EXTEND_DB))
405 return 0;
406
407 erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
408 CMDQ_OPCODE_CONF_DEVICE);
409
410 req.cfg = FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK, PAGE_SHIFT) |
411 FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK, 1);
412
413 return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
414 }
415
erdma_res_cb_init(struct erdma_dev * dev)416 static int erdma_res_cb_init(struct erdma_dev *dev)
417 {
418 int i, j;
419
420 for (i = 0; i < ERDMA_RES_CNT; i++) {
421 dev->res_cb[i].next_alloc_idx = 1;
422 spin_lock_init(&dev->res_cb[i].lock);
423 dev->res_cb[i].bitmap =
424 bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL);
425 if (!dev->res_cb[i].bitmap)
426 goto err;
427 }
428
429 return 0;
430
431 err:
432 for (j = 0; j < i; j++)
433 bitmap_free(dev->res_cb[j].bitmap);
434
435 return -ENOMEM;
436 }
437
erdma_res_cb_free(struct erdma_dev * dev)438 static void erdma_res_cb_free(struct erdma_dev *dev)
439 {
440 int i;
441
442 for (i = 0; i < ERDMA_RES_CNT; i++)
443 bitmap_free(dev->res_cb[i].bitmap);
444 }
445
446 static const struct ib_device_ops erdma_device_ops = {
447 .owner = THIS_MODULE,
448 .driver_id = RDMA_DRIVER_ERDMA,
449 .uverbs_abi_ver = ERDMA_ABI_VERSION,
450
451 .alloc_mr = erdma_ib_alloc_mr,
452 .alloc_pd = erdma_alloc_pd,
453 .alloc_ucontext = erdma_alloc_ucontext,
454 .create_cq = erdma_create_cq,
455 .create_qp = erdma_create_qp,
456 .dealloc_pd = erdma_dealloc_pd,
457 .dealloc_ucontext = erdma_dealloc_ucontext,
458 .dereg_mr = erdma_dereg_mr,
459 .destroy_cq = erdma_destroy_cq,
460 .destroy_qp = erdma_destroy_qp,
461 .get_dma_mr = erdma_get_dma_mr,
462 .get_port_immutable = erdma_get_port_immutable,
463 .iw_accept = erdma_accept,
464 .iw_add_ref = erdma_qp_get_ref,
465 .iw_connect = erdma_connect,
466 .iw_create_listen = erdma_create_listen,
467 .iw_destroy_listen = erdma_destroy_listen,
468 .iw_get_qp = erdma_get_ibqp,
469 .iw_reject = erdma_reject,
470 .iw_rem_ref = erdma_qp_put_ref,
471 .map_mr_sg = erdma_map_mr_sg,
472 .mmap = erdma_mmap,
473 .mmap_free = erdma_mmap_free,
474 .modify_qp = erdma_modify_qp,
475 .post_recv = erdma_post_recv,
476 .post_send = erdma_post_send,
477 .poll_cq = erdma_poll_cq,
478 .query_device = erdma_query_device,
479 .query_gid = erdma_query_gid,
480 .query_port = erdma_query_port,
481 .query_qp = erdma_query_qp,
482 .req_notify_cq = erdma_req_notify_cq,
483 .reg_user_mr = erdma_reg_user_mr,
484
485 INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq),
486 INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd),
487 INIT_RDMA_OBJ_SIZE(ib_ucontext, erdma_ucontext, ibucontext),
488 INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp),
489 };
490
erdma_ib_device_add(struct pci_dev * pdev)491 static int erdma_ib_device_add(struct pci_dev *pdev)
492 {
493 struct erdma_dev *dev = pci_get_drvdata(pdev);
494 struct ib_device *ibdev = &dev->ibdev;
495 u64 mac;
496 int ret;
497
498 ret = erdma_dev_attrs_init(dev);
499 if (ret)
500 return ret;
501
502 ret = erdma_device_config(dev);
503 if (ret)
504 return ret;
505
506 ibdev->node_type = RDMA_NODE_RNIC;
507 memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
508
509 /*
510 * Current model (one-to-one device association):
511 * One ERDMA device per net_device or, equivalently,
512 * per physical port.
513 */
514 ibdev->phys_port_cnt = 1;
515 ibdev->num_comp_vectors = dev->attrs.irq_num - 1;
516
517 ib_set_device_ops(ibdev, &erdma_device_ops);
518
519 INIT_LIST_HEAD(&dev->cep_list);
520
521 spin_lock_init(&dev->lock);
522 xa_init_flags(&dev->qp_xa, XA_FLAGS_ALLOC1);
523 xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1);
524 dev->next_alloc_cqn = 1;
525 dev->next_alloc_qpn = 1;
526
527 ret = erdma_res_cb_init(dev);
528 if (ret)
529 return ret;
530
531 atomic_set(&dev->num_ctx, 0);
532
533 mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG);
534 mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32;
535
536 u64_to_ether_addr(mac, dev->attrs.peer_addr);
537
538 dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND,
539 WQ_UNBOUND_MAX_ACTIVE);
540 if (!dev->reflush_wq) {
541 ret = -ENOMEM;
542 goto err_alloc_workqueue;
543 }
544
545 ret = erdma_device_register(dev);
546 if (ret)
547 goto err_register;
548
549 return 0;
550
551 err_register:
552 destroy_workqueue(dev->reflush_wq);
553 err_alloc_workqueue:
554 xa_destroy(&dev->qp_xa);
555 xa_destroy(&dev->cq_xa);
556
557 erdma_res_cb_free(dev);
558
559 return ret;
560 }
561
erdma_ib_device_remove(struct pci_dev * pdev)562 static void erdma_ib_device_remove(struct pci_dev *pdev)
563 {
564 struct erdma_dev *dev = pci_get_drvdata(pdev);
565
566 unregister_netdevice_notifier(&dev->netdev_nb);
567 ib_unregister_device(&dev->ibdev);
568
569 destroy_workqueue(dev->reflush_wq);
570 erdma_res_cb_free(dev);
571 xa_destroy(&dev->qp_xa);
572 xa_destroy(&dev->cq_xa);
573 }
574
erdma_probe(struct pci_dev * pdev,const struct pci_device_id * ent)575 static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
576 {
577 int ret;
578
579 ret = erdma_probe_dev(pdev);
580 if (ret)
581 return ret;
582
583 ret = erdma_ib_device_add(pdev);
584 if (ret) {
585 erdma_remove_dev(pdev);
586 return ret;
587 }
588
589 return 0;
590 }
591
erdma_remove(struct pci_dev * pdev)592 static void erdma_remove(struct pci_dev *pdev)
593 {
594 erdma_ib_device_remove(pdev);
595 erdma_remove_dev(pdev);
596 }
597
598 static struct pci_driver erdma_pci_driver = {
599 .name = DRV_MODULE_NAME,
600 .id_table = erdma_pci_tbl,
601 .probe = erdma_probe,
602 .remove = erdma_remove
603 };
604
605 MODULE_DEVICE_TABLE(pci, erdma_pci_tbl);
606
erdma_init_module(void)607 static __init int erdma_init_module(void)
608 {
609 int ret;
610
611 ret = erdma_cm_init();
612 if (ret)
613 return ret;
614
615 ret = pci_register_driver(&erdma_pci_driver);
616 if (ret)
617 erdma_cm_exit();
618
619 return ret;
620 }
621
erdma_exit_module(void)622 static void __exit erdma_exit_module(void)
623 {
624 pci_unregister_driver(&erdma_pci_driver);
625
626 erdma_cm_exit();
627 }
628
629 module_init(erdma_init_module);
630 module_exit(erdma_exit_module);
631