xref: /openbmc/linux/drivers/infiniband/hw/erdma/erdma_main.c (revision a266ef69b890f099069cf51bb40572611c435a54)
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /*          Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6 
7 #include <linux/module.h>
8 #include <net/addrconf.h>
9 #include <rdma/erdma-abi.h>
10 
11 #include "erdma.h"
12 #include "erdma_cm.h"
13 #include "erdma_verbs.h"
14 
15 MODULE_AUTHOR("Cheng Xu <chengyou@linux.alibaba.com>");
16 MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver");
17 MODULE_LICENSE("Dual BSD/GPL");
18 
19 static int erdma_netdev_event(struct notifier_block *nb, unsigned long event,
20 			      void *arg)
21 {
22 	struct net_device *netdev = netdev_notifier_info_to_dev(arg);
23 	struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb);
24 
25 	if (dev->netdev == NULL || dev->netdev != netdev)
26 		goto done;
27 
28 	switch (event) {
29 	case NETDEV_UP:
30 		dev->state = IB_PORT_ACTIVE;
31 		erdma_port_event(dev, IB_EVENT_PORT_ACTIVE);
32 		break;
33 	case NETDEV_DOWN:
34 		dev->state = IB_PORT_DOWN;
35 		erdma_port_event(dev, IB_EVENT_PORT_ERR);
36 		break;
37 	case NETDEV_CHANGEMTU:
38 		if (dev->mtu != netdev->mtu) {
39 			erdma_set_mtu(dev, netdev->mtu);
40 			dev->mtu = netdev->mtu;
41 		}
42 		break;
43 	case NETDEV_REGISTER:
44 	case NETDEV_UNREGISTER:
45 	case NETDEV_CHANGEADDR:
46 	case NETDEV_GOING_DOWN:
47 	case NETDEV_CHANGE:
48 	default:
49 		break;
50 	}
51 
52 done:
53 	return NOTIFY_OK;
54 }
55 
56 static int erdma_enum_and_get_netdev(struct erdma_dev *dev)
57 {
58 	struct net_device *netdev;
59 	int ret = -ENODEV;
60 
61 	/* Already binded to a net_device, so we skip. */
62 	if (dev->netdev)
63 		return 0;
64 
65 	rtnl_lock();
66 	for_each_netdev(&init_net, netdev) {
67 		/*
68 		 * In erdma, the paired netdev and ibdev should have the same
69 		 * MAC address. erdma can get the value from its PCIe bar
70 		 * registers. Since erdma can not get the paired netdev
71 		 * reference directly, we do a traverse here to get the paired
72 		 * netdev.
73 		 */
74 		if (ether_addr_equal_unaligned(netdev->perm_addr,
75 					       dev->attrs.peer_addr)) {
76 			ret = ib_device_set_netdev(&dev->ibdev, netdev, 1);
77 			if (ret) {
78 				rtnl_unlock();
79 				ibdev_warn(&dev->ibdev,
80 					   "failed (%d) to link netdev", ret);
81 				return ret;
82 			}
83 
84 			dev->netdev = netdev;
85 			break;
86 		}
87 	}
88 
89 	rtnl_unlock();
90 
91 	return ret;
92 }
93 
94 static int erdma_device_register(struct erdma_dev *dev)
95 {
96 	struct ib_device *ibdev = &dev->ibdev;
97 	int ret;
98 
99 	ret = erdma_enum_and_get_netdev(dev);
100 	if (ret)
101 		return ret;
102 
103 	dev->mtu = dev->netdev->mtu;
104 	addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr);
105 
106 	ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev);
107 	if (ret) {
108 		dev_err(&dev->pdev->dev,
109 			"ib_register_device failed: ret = %d\n", ret);
110 		return ret;
111 	}
112 
113 	dev->netdev_nb.notifier_call = erdma_netdev_event;
114 	ret = register_netdevice_notifier(&dev->netdev_nb);
115 	if (ret) {
116 		ibdev_err(&dev->ibdev, "failed to register notifier.\n");
117 		ib_unregister_device(ibdev);
118 	}
119 
120 	return ret;
121 }
122 
123 static irqreturn_t erdma_comm_irq_handler(int irq, void *data)
124 {
125 	struct erdma_dev *dev = data;
126 
127 	erdma_cmdq_completion_handler(&dev->cmdq);
128 	erdma_aeq_event_handler(dev);
129 
130 	return IRQ_HANDLED;
131 }
132 
133 static void erdma_dwqe_resource_init(struct erdma_dev *dev)
134 {
135 	int total_pages, type0, type1;
136 
137 	dev->attrs.grp_num = erdma_reg_read32(dev, ERDMA_REGS_GRP_NUM_REG);
138 
139 	if (dev->attrs.grp_num < 4)
140 		dev->attrs.disable_dwqe = true;
141 	else
142 		dev->attrs.disable_dwqe = false;
143 
144 	/* One page contains 4 goups. */
145 	total_pages = dev->attrs.grp_num * 4;
146 
147 	if (dev->attrs.grp_num >= ERDMA_DWQE_MAX_GRP_CNT) {
148 		dev->attrs.grp_num = ERDMA_DWQE_MAX_GRP_CNT;
149 		type0 = ERDMA_DWQE_TYPE0_CNT;
150 		type1 = ERDMA_DWQE_TYPE1_CNT / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
151 	} else {
152 		type1 = total_pages / 3;
153 		type0 = total_pages - type1 - 1;
154 	}
155 
156 	dev->attrs.dwqe_pages = type0;
157 	dev->attrs.dwqe_entries = type1 * ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
158 }
159 
160 static int erdma_request_vectors(struct erdma_dev *dev)
161 {
162 	int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC);
163 	int ret;
164 
165 	ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX);
166 	if (ret < 0) {
167 		dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n",
168 			ret);
169 		return ret;
170 	}
171 	dev->attrs.irq_num = ret;
172 
173 	return 0;
174 }
175 
176 static int erdma_comm_irq_init(struct erdma_dev *dev)
177 {
178 	snprintf(dev->comm_irq.name, ERDMA_IRQNAME_SIZE, "erdma-common@pci:%s",
179 		 pci_name(dev->pdev));
180 	dev->comm_irq.msix_vector =
181 		pci_irq_vector(dev->pdev, ERDMA_MSIX_VECTOR_CMDQ);
182 
183 	cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev->pdev->bus)),
184 			&dev->comm_irq.affinity_hint_mask);
185 	irq_set_affinity_hint(dev->comm_irq.msix_vector,
186 			      &dev->comm_irq.affinity_hint_mask);
187 
188 	return request_irq(dev->comm_irq.msix_vector, erdma_comm_irq_handler, 0,
189 			   dev->comm_irq.name, dev);
190 }
191 
192 static void erdma_comm_irq_uninit(struct erdma_dev *dev)
193 {
194 	irq_set_affinity_hint(dev->comm_irq.msix_vector, NULL);
195 	free_irq(dev->comm_irq.msix_vector, dev);
196 }
197 
198 static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
199 {
200 	int ret;
201 
202 	erdma_dwqe_resource_init(dev);
203 
204 	ret = dma_set_mask_and_coherent(&pdev->dev,
205 					DMA_BIT_MASK(ERDMA_PCI_WIDTH));
206 	if (ret)
207 		return ret;
208 
209 	dma_set_max_seg_size(&pdev->dev, UINT_MAX);
210 
211 	return 0;
212 }
213 
214 static void erdma_device_uninit(struct erdma_dev *dev)
215 {
216 	u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1);
217 
218 	erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
219 }
220 
221 static const struct pci_device_id erdma_pci_tbl[] = {
222 	{ PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) },
223 	{}
224 };
225 
226 static int erdma_probe_dev(struct pci_dev *pdev)
227 {
228 	struct erdma_dev *dev;
229 	int bars, err;
230 	u32 version;
231 
232 	err = pci_enable_device(pdev);
233 	if (err) {
234 		dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err);
235 		return err;
236 	}
237 
238 	pci_set_master(pdev);
239 
240 	dev = ib_alloc_device(erdma_dev, ibdev);
241 	if (!dev) {
242 		dev_err(&pdev->dev, "ib_alloc_device failed\n");
243 		err = -ENOMEM;
244 		goto err_disable_device;
245 	}
246 
247 	pci_set_drvdata(pdev, dev);
248 	dev->pdev = pdev;
249 	dev->attrs.numa_node = dev_to_node(&pdev->dev);
250 
251 	bars = pci_select_bars(pdev, IORESOURCE_MEM);
252 	err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
253 	if (bars != ERDMA_BAR_MASK || err) {
254 		err = err ? err : -EINVAL;
255 		goto err_ib_device_release;
256 	}
257 
258 	dev->func_bar_addr = pci_resource_start(pdev, ERDMA_FUNC_BAR);
259 	dev->func_bar_len = pci_resource_len(pdev, ERDMA_FUNC_BAR);
260 
261 	dev->func_bar =
262 		devm_ioremap(&pdev->dev, dev->func_bar_addr, dev->func_bar_len);
263 	if (!dev->func_bar) {
264 		dev_err(&pdev->dev, "devm_ioremap failed.\n");
265 		err = -EFAULT;
266 		goto err_release_bars;
267 	}
268 
269 	version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
270 	if (version == 0) {
271 		/* we knows that it is a non-functional function. */
272 		err = -ENODEV;
273 		goto err_iounmap_func_bar;
274 	}
275 
276 	err = erdma_device_init(dev, pdev);
277 	if (err)
278 		goto err_iounmap_func_bar;
279 
280 	err = erdma_request_vectors(dev);
281 	if (err)
282 		goto err_iounmap_func_bar;
283 
284 	err = erdma_comm_irq_init(dev);
285 	if (err)
286 		goto err_free_vectors;
287 
288 	err = erdma_aeq_init(dev);
289 	if (err)
290 		goto err_uninit_comm_irq;
291 
292 	err = erdma_cmdq_init(dev);
293 	if (err)
294 		goto err_uninit_aeq;
295 
296 	err = erdma_ceqs_init(dev);
297 	if (err)
298 		goto err_uninit_cmdq;
299 
300 	erdma_finish_cmdq_init(dev);
301 
302 	return 0;
303 
304 err_uninit_cmdq:
305 	erdma_device_uninit(dev);
306 	erdma_cmdq_destroy(dev);
307 
308 err_uninit_aeq:
309 	erdma_aeq_destroy(dev);
310 
311 err_uninit_comm_irq:
312 	erdma_comm_irq_uninit(dev);
313 
314 err_free_vectors:
315 	pci_free_irq_vectors(dev->pdev);
316 
317 err_iounmap_func_bar:
318 	devm_iounmap(&pdev->dev, dev->func_bar);
319 
320 err_release_bars:
321 	pci_release_selected_regions(pdev, bars);
322 
323 err_ib_device_release:
324 	ib_dealloc_device(&dev->ibdev);
325 
326 err_disable_device:
327 	pci_disable_device(pdev);
328 
329 	return err;
330 }
331 
332 static void erdma_remove_dev(struct pci_dev *pdev)
333 {
334 	struct erdma_dev *dev = pci_get_drvdata(pdev);
335 
336 	erdma_ceqs_uninit(dev);
337 
338 	erdma_device_uninit(dev);
339 
340 	erdma_cmdq_destroy(dev);
341 	erdma_aeq_destroy(dev);
342 	erdma_comm_irq_uninit(dev);
343 	pci_free_irq_vectors(dev->pdev);
344 
345 	devm_iounmap(&pdev->dev, dev->func_bar);
346 	pci_release_selected_regions(pdev, ERDMA_BAR_MASK);
347 
348 	ib_dealloc_device(&dev->ibdev);
349 
350 	pci_disable_device(pdev);
351 }
352 
353 #define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap)
354 
355 static int erdma_dev_attrs_init(struct erdma_dev *dev)
356 {
357 	int err;
358 	u64 req_hdr, cap0, cap1;
359 
360 	erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA,
361 				CMDQ_OPCODE_QUERY_DEVICE);
362 
363 	err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
364 				  &cap1);
365 	if (err)
366 		return err;
367 
368 	dev->attrs.max_cqe = 1 << ERDMA_GET_CAP(MAX_CQE, cap0);
369 	dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0);
370 	dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1);
371 	dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0);
372 	dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1);
373 	dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1);
374 	dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
375 	dev->attrs.max_mr = dev->attrs.max_qp << 1;
376 	dev->attrs.max_cq = dev->attrs.max_qp << 1;
377 	dev->attrs.cap_flags = ERDMA_GET_CAP(FLAGS, cap0);
378 
379 	dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR;
380 	dev->attrs.max_ord = ERDMA_MAX_ORD;
381 	dev->attrs.max_ird = ERDMA_MAX_IRD;
382 	dev->attrs.max_send_sge = ERDMA_MAX_SEND_SGE;
383 	dev->attrs.max_recv_sge = ERDMA_MAX_RECV_SGE;
384 	dev->attrs.max_sge_rd = ERDMA_MAX_SGE_RD;
385 	dev->attrs.max_pd = ERDMA_MAX_PD;
386 
387 	dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD;
388 	dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr;
389 
390 	erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON,
391 				CMDQ_OPCODE_QUERY_FW_INFO);
392 
393 	err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
394 				  &cap1);
395 	if (!err)
396 		dev->attrs.fw_version =
397 			FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0);
398 
399 	return err;
400 }
401 
402 static int erdma_res_cb_init(struct erdma_dev *dev)
403 {
404 	int i, j;
405 
406 	for (i = 0; i < ERDMA_RES_CNT; i++) {
407 		dev->res_cb[i].next_alloc_idx = 1;
408 		spin_lock_init(&dev->res_cb[i].lock);
409 		dev->res_cb[i].bitmap =
410 			bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL);
411 		if (!dev->res_cb[i].bitmap)
412 			goto err;
413 	}
414 
415 	return 0;
416 
417 err:
418 	for (j = 0; j < i; j++)
419 		bitmap_free(dev->res_cb[j].bitmap);
420 
421 	return -ENOMEM;
422 }
423 
424 static void erdma_res_cb_free(struct erdma_dev *dev)
425 {
426 	int i;
427 
428 	for (i = 0; i < ERDMA_RES_CNT; i++)
429 		bitmap_free(dev->res_cb[i].bitmap);
430 }
431 
432 static const struct ib_device_ops erdma_device_ops = {
433 	.owner = THIS_MODULE,
434 	.driver_id = RDMA_DRIVER_ERDMA,
435 	.uverbs_abi_ver = ERDMA_ABI_VERSION,
436 
437 	.alloc_mr = erdma_ib_alloc_mr,
438 	.alloc_pd = erdma_alloc_pd,
439 	.alloc_ucontext = erdma_alloc_ucontext,
440 	.create_cq = erdma_create_cq,
441 	.create_qp = erdma_create_qp,
442 	.dealloc_pd = erdma_dealloc_pd,
443 	.dealloc_ucontext = erdma_dealloc_ucontext,
444 	.dereg_mr = erdma_dereg_mr,
445 	.destroy_cq = erdma_destroy_cq,
446 	.destroy_qp = erdma_destroy_qp,
447 	.get_dma_mr = erdma_get_dma_mr,
448 	.get_port_immutable = erdma_get_port_immutable,
449 	.iw_accept = erdma_accept,
450 	.iw_add_ref = erdma_qp_get_ref,
451 	.iw_connect = erdma_connect,
452 	.iw_create_listen = erdma_create_listen,
453 	.iw_destroy_listen = erdma_destroy_listen,
454 	.iw_get_qp = erdma_get_ibqp,
455 	.iw_reject = erdma_reject,
456 	.iw_rem_ref = erdma_qp_put_ref,
457 	.map_mr_sg = erdma_map_mr_sg,
458 	.mmap = erdma_mmap,
459 	.mmap_free = erdma_mmap_free,
460 	.modify_qp = erdma_modify_qp,
461 	.post_recv = erdma_post_recv,
462 	.post_send = erdma_post_send,
463 	.poll_cq = erdma_poll_cq,
464 	.query_device = erdma_query_device,
465 	.query_gid = erdma_query_gid,
466 	.query_port = erdma_query_port,
467 	.query_qp = erdma_query_qp,
468 	.req_notify_cq = erdma_req_notify_cq,
469 	.reg_user_mr = erdma_reg_user_mr,
470 
471 	INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq),
472 	INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd),
473 	INIT_RDMA_OBJ_SIZE(ib_ucontext, erdma_ucontext, ibucontext),
474 	INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp),
475 };
476 
477 static int erdma_ib_device_add(struct pci_dev *pdev)
478 {
479 	struct erdma_dev *dev = pci_get_drvdata(pdev);
480 	struct ib_device *ibdev = &dev->ibdev;
481 	u64 mac;
482 	int ret;
483 
484 	ret = erdma_dev_attrs_init(dev);
485 	if (ret)
486 		return ret;
487 
488 	ibdev->node_type = RDMA_NODE_RNIC;
489 	memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
490 
491 	/*
492 	 * Current model (one-to-one device association):
493 	 * One ERDMA device per net_device or, equivalently,
494 	 * per physical port.
495 	 */
496 	ibdev->phys_port_cnt = 1;
497 	ibdev->num_comp_vectors = dev->attrs.irq_num - 1;
498 
499 	ib_set_device_ops(ibdev, &erdma_device_ops);
500 
501 	INIT_LIST_HEAD(&dev->cep_list);
502 
503 	spin_lock_init(&dev->lock);
504 	xa_init_flags(&dev->qp_xa, XA_FLAGS_ALLOC1);
505 	xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1);
506 	dev->next_alloc_cqn = 1;
507 	dev->next_alloc_qpn = 1;
508 
509 	ret = erdma_res_cb_init(dev);
510 	if (ret)
511 		return ret;
512 
513 	spin_lock_init(&dev->db_bitmap_lock);
514 	bitmap_zero(dev->sdb_page, ERDMA_DWQE_TYPE0_CNT);
515 	bitmap_zero(dev->sdb_entry, ERDMA_DWQE_TYPE1_CNT);
516 
517 	atomic_set(&dev->num_ctx, 0);
518 
519 	mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG);
520 	mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32;
521 
522 	u64_to_ether_addr(mac, dev->attrs.peer_addr);
523 
524 	dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND,
525 					  WQ_UNBOUND_MAX_ACTIVE);
526 	if (!dev->reflush_wq) {
527 		ret = -ENOMEM;
528 		goto err_alloc_workqueue;
529 	}
530 
531 	ret = erdma_device_register(dev);
532 	if (ret)
533 		goto err_register;
534 
535 	return 0;
536 
537 err_register:
538 	destroy_workqueue(dev->reflush_wq);
539 err_alloc_workqueue:
540 	xa_destroy(&dev->qp_xa);
541 	xa_destroy(&dev->cq_xa);
542 
543 	erdma_res_cb_free(dev);
544 
545 	return ret;
546 }
547 
548 static void erdma_ib_device_remove(struct pci_dev *pdev)
549 {
550 	struct erdma_dev *dev = pci_get_drvdata(pdev);
551 
552 	unregister_netdevice_notifier(&dev->netdev_nb);
553 	ib_unregister_device(&dev->ibdev);
554 
555 	destroy_workqueue(dev->reflush_wq);
556 	erdma_res_cb_free(dev);
557 	xa_destroy(&dev->qp_xa);
558 	xa_destroy(&dev->cq_xa);
559 }
560 
561 static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
562 {
563 	int ret;
564 
565 	ret = erdma_probe_dev(pdev);
566 	if (ret)
567 		return ret;
568 
569 	ret = erdma_ib_device_add(pdev);
570 	if (ret) {
571 		erdma_remove_dev(pdev);
572 		return ret;
573 	}
574 
575 	return 0;
576 }
577 
578 static void erdma_remove(struct pci_dev *pdev)
579 {
580 	erdma_ib_device_remove(pdev);
581 	erdma_remove_dev(pdev);
582 }
583 
584 static struct pci_driver erdma_pci_driver = {
585 	.name = DRV_MODULE_NAME,
586 	.id_table = erdma_pci_tbl,
587 	.probe = erdma_probe,
588 	.remove = erdma_remove
589 };
590 
591 MODULE_DEVICE_TABLE(pci, erdma_pci_tbl);
592 
593 static __init int erdma_init_module(void)
594 {
595 	int ret;
596 
597 	ret = erdma_cm_init();
598 	if (ret)
599 		return ret;
600 
601 	ret = pci_register_driver(&erdma_pci_driver);
602 	if (ret)
603 		erdma_cm_exit();
604 
605 	return ret;
606 }
607 
608 static void __exit erdma_exit_module(void)
609 {
610 	pci_unregister_driver(&erdma_pci_driver);
611 
612 	erdma_cm_exit();
613 }
614 
615 module_init(erdma_init_module);
616 module_exit(erdma_exit_module);
617