1 /*******************************************************************
2  * This file is part of the Emulex RoCE Device Driver for          *
3  * RoCE (RDMA over Converged Ethernet) adapters.                   *
4  * Copyright (C) 2008-2012 Emulex. All rights reserved.            *
5  * EMULEX and SLI are trademarks of Emulex.                        *
6  * www.emulex.com                                                  *
7  *                                                                 *
8  * This program is free software; you can redistribute it and/or   *
9  * modify it under the terms of version 2 of the GNU General       *
10  * Public License as published by the Free Software Foundation.    *
11  * This program is distributed in the hope that it will be useful. *
12  * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
13  * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
14  * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
15  * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16  * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
17  * more details, a copy of which can be found in the file COPYING  *
18  * included with this package.                                     *
19  *
20  * Contact Information:
21  * linux-drivers@emulex.com
22  *
23  * Emulex
24  * 3333 Susan Street
25  * Costa Mesa, CA 92626
26  *******************************************************************/
27 
28 #include <linux/module.h>
29 #include <linux/idr.h>
30 #include <rdma/ib_verbs.h>
31 #include <rdma/ib_user_verbs.h>
32 #include <rdma/ib_addr.h>
33 
34 #include <linux/netdevice.h>
35 #include <net/addrconf.h>
36 
37 #include "ocrdma.h"
38 #include "ocrdma_verbs.h"
39 #include "ocrdma_ah.h"
40 #include "be_roce.h"
41 #include "ocrdma_hw.h"
42 #include "ocrdma_abi.h"
43 
44 MODULE_VERSION(OCRDMA_ROCE_DEV_VERSION);
45 MODULE_DESCRIPTION("Emulex RoCE HCA Driver");
46 MODULE_AUTHOR("Emulex Corporation");
47 MODULE_LICENSE("GPL");
48 
49 static LIST_HEAD(ocrdma_dev_list);
50 static DEFINE_SPINLOCK(ocrdma_devlist_lock);
51 static DEFINE_IDR(ocrdma_dev_id);
52 
53 static union ib_gid ocrdma_zero_sgid;
54 
55 void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
56 {
57 	u8 mac_addr[6];
58 
59 	memcpy(&mac_addr[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
60 	guid[0] = mac_addr[0] ^ 2;
61 	guid[1] = mac_addr[1];
62 	guid[2] = mac_addr[2];
63 	guid[3] = 0xff;
64 	guid[4] = 0xfe;
65 	guid[5] = mac_addr[3];
66 	guid[6] = mac_addr[4];
67 	guid[7] = mac_addr[5];
68 }
69 
70 static void ocrdma_build_sgid_mac(union ib_gid *sgid, unsigned char *mac_addr,
71 				  bool is_vlan, u16 vlan_id)
72 {
73 	sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
74 	sgid->raw[8] = mac_addr[0] ^ 2;
75 	sgid->raw[9] = mac_addr[1];
76 	sgid->raw[10] = mac_addr[2];
77 	if (is_vlan) {
78 		sgid->raw[11] = vlan_id >> 8;
79 		sgid->raw[12] = vlan_id & 0xff;
80 	} else {
81 		sgid->raw[11] = 0xff;
82 		sgid->raw[12] = 0xfe;
83 	}
84 	sgid->raw[13] = mac_addr[3];
85 	sgid->raw[14] = mac_addr[4];
86 	sgid->raw[15] = mac_addr[5];
87 }
88 
89 static bool ocrdma_add_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
90 			    bool is_vlan, u16 vlan_id)
91 {
92 	int i;
93 	union ib_gid new_sgid;
94 	unsigned long flags;
95 
96 	memset(&ocrdma_zero_sgid, 0, sizeof(union ib_gid));
97 
98 	ocrdma_build_sgid_mac(&new_sgid, mac_addr, is_vlan, vlan_id);
99 
100 	spin_lock_irqsave(&dev->sgid_lock, flags);
101 	for (i = 0; i < OCRDMA_MAX_SGID; i++) {
102 		if (!memcmp(&dev->sgid_tbl[i], &ocrdma_zero_sgid,
103 			    sizeof(union ib_gid))) {
104 			/* found free entry */
105 			memcpy(&dev->sgid_tbl[i], &new_sgid,
106 			       sizeof(union ib_gid));
107 			spin_unlock_irqrestore(&dev->sgid_lock, flags);
108 			return true;
109 		} else if (!memcmp(&dev->sgid_tbl[i], &new_sgid,
110 				   sizeof(union ib_gid))) {
111 			/* entry already present, no addition is required. */
112 			spin_unlock_irqrestore(&dev->sgid_lock, flags);
113 			return false;
114 		}
115 	}
116 	spin_unlock_irqrestore(&dev->sgid_lock, flags);
117 	return false;
118 }
119 
120 static bool ocrdma_del_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
121 			    bool is_vlan, u16 vlan_id)
122 {
123 	int found = false;
124 	int i;
125 	union ib_gid sgid;
126 	unsigned long flags;
127 
128 	ocrdma_build_sgid_mac(&sgid, mac_addr, is_vlan, vlan_id);
129 
130 	spin_lock_irqsave(&dev->sgid_lock, flags);
131 	/* first is default sgid, which cannot be deleted. */
132 	for (i = 1; i < OCRDMA_MAX_SGID; i++) {
133 		if (!memcmp(&dev->sgid_tbl[i], &sgid, sizeof(union ib_gid))) {
134 			/* found matching entry */
135 			memset(&dev->sgid_tbl[i], 0, sizeof(union ib_gid));
136 			found = true;
137 			break;
138 		}
139 	}
140 	spin_unlock_irqrestore(&dev->sgid_lock, flags);
141 	return found;
142 }
143 
144 static void ocrdma_add_default_sgid(struct ocrdma_dev *dev)
145 {
146 	/* GID Index 0 - Invariant manufacturer-assigned EUI-64 */
147 	union ib_gid *sgid = &dev->sgid_tbl[0];
148 
149 	sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
150 	ocrdma_get_guid(dev, &sgid->raw[8]);
151 }
152 
153 #if IS_ENABLED(CONFIG_VLAN_8021Q)
154 static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev)
155 {
156 	struct net_device *netdev, *tmp;
157 	u16 vlan_id;
158 	bool is_vlan;
159 
160 	netdev = dev->nic_info.netdev;
161 
162 	rcu_read_lock();
163 	for_each_netdev_rcu(&init_net, tmp) {
164 		if (netdev == tmp || vlan_dev_real_dev(tmp) == netdev) {
165 			if (!netif_running(tmp) || !netif_oper_up(tmp))
166 				continue;
167 			if (netdev != tmp) {
168 				vlan_id = vlan_dev_vlan_id(tmp);
169 				is_vlan = true;
170 			} else {
171 				is_vlan = false;
172 				vlan_id = 0;
173 				tmp = netdev;
174 			}
175 			ocrdma_add_sgid(dev, tmp->dev_addr, is_vlan, vlan_id);
176 		}
177 	}
178 	rcu_read_unlock();
179 }
180 #else
181 static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev)
182 {
183 
184 }
185 #endif /* VLAN */
186 
187 static int ocrdma_build_sgid_tbl(struct ocrdma_dev *dev)
188 {
189 	ocrdma_add_default_sgid(dev);
190 	ocrdma_add_vlan_sgids(dev);
191 	return 0;
192 }
193 
194 #if IS_ENABLED(CONFIG_IPV6)
195 
196 static int ocrdma_inet6addr_event(struct notifier_block *notifier,
197 				  unsigned long event, void *ptr)
198 {
199 	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
200 	struct net_device *netdev = ifa->idev->dev;
201 	struct ib_event gid_event;
202 	struct ocrdma_dev *dev;
203 	bool found = false;
204 	bool updated = false;
205 	bool is_vlan = false;
206 	u16 vid = 0;
207 
208 	is_vlan = netdev->priv_flags & IFF_802_1Q_VLAN;
209 	if (is_vlan) {
210 		vid = vlan_dev_vlan_id(netdev);
211 		netdev = vlan_dev_real_dev(netdev);
212 	}
213 
214 	rcu_read_lock();
215 	list_for_each_entry_rcu(dev, &ocrdma_dev_list, entry) {
216 		if (dev->nic_info.netdev == netdev) {
217 			found = true;
218 			break;
219 		}
220 	}
221 	rcu_read_unlock();
222 
223 	if (!found)
224 		return NOTIFY_DONE;
225 	if (!rdma_link_local_addr((struct in6_addr *)&ifa->addr))
226 		return NOTIFY_DONE;
227 
228 	mutex_lock(&dev->dev_lock);
229 	switch (event) {
230 	case NETDEV_UP:
231 		updated = ocrdma_add_sgid(dev, netdev->dev_addr, is_vlan, vid);
232 		break;
233 	case NETDEV_DOWN:
234 		updated = ocrdma_del_sgid(dev, netdev->dev_addr, is_vlan, vid);
235 		break;
236 	default:
237 		break;
238 	}
239 	if (updated) {
240 		/* GID table updated, notify the consumers about it */
241 		gid_event.device = &dev->ibdev;
242 		gid_event.element.port_num = 1;
243 		gid_event.event = IB_EVENT_GID_CHANGE;
244 		ib_dispatch_event(&gid_event);
245 	}
246 	mutex_unlock(&dev->dev_lock);
247 	return NOTIFY_OK;
248 }
249 
250 static struct notifier_block ocrdma_inet6addr_notifier = {
251 	.notifier_call = ocrdma_inet6addr_event
252 };
253 
254 #endif /* IPV6 and VLAN */
255 
256 static enum rdma_link_layer ocrdma_link_layer(struct ib_device *device,
257 					      u8 port_num)
258 {
259 	return IB_LINK_LAYER_ETHERNET;
260 }
261 
262 static int ocrdma_register_device(struct ocrdma_dev *dev)
263 {
264 	strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX);
265 	ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid);
266 	memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC,
267 	       sizeof(OCRDMA_NODE_DESC));
268 	dev->ibdev.owner = THIS_MODULE;
269 	dev->ibdev.uverbs_abi_ver = OCRDMA_ABI_VERSION;
270 	dev->ibdev.uverbs_cmd_mask =
271 	    OCRDMA_UVERBS(GET_CONTEXT) |
272 	    OCRDMA_UVERBS(QUERY_DEVICE) |
273 	    OCRDMA_UVERBS(QUERY_PORT) |
274 	    OCRDMA_UVERBS(ALLOC_PD) |
275 	    OCRDMA_UVERBS(DEALLOC_PD) |
276 	    OCRDMA_UVERBS(REG_MR) |
277 	    OCRDMA_UVERBS(DEREG_MR) |
278 	    OCRDMA_UVERBS(CREATE_COMP_CHANNEL) |
279 	    OCRDMA_UVERBS(CREATE_CQ) |
280 	    OCRDMA_UVERBS(RESIZE_CQ) |
281 	    OCRDMA_UVERBS(DESTROY_CQ) |
282 	    OCRDMA_UVERBS(REQ_NOTIFY_CQ) |
283 	    OCRDMA_UVERBS(CREATE_QP) |
284 	    OCRDMA_UVERBS(MODIFY_QP) |
285 	    OCRDMA_UVERBS(QUERY_QP) |
286 	    OCRDMA_UVERBS(DESTROY_QP) |
287 	    OCRDMA_UVERBS(POLL_CQ) |
288 	    OCRDMA_UVERBS(POST_SEND) |
289 	    OCRDMA_UVERBS(POST_RECV);
290 
291 	dev->ibdev.uverbs_cmd_mask |=
292 	    OCRDMA_UVERBS(CREATE_AH) |
293 	     OCRDMA_UVERBS(MODIFY_AH) |
294 	     OCRDMA_UVERBS(QUERY_AH) |
295 	     OCRDMA_UVERBS(DESTROY_AH);
296 
297 	dev->ibdev.node_type = RDMA_NODE_IB_CA;
298 	dev->ibdev.phys_port_cnt = 1;
299 	dev->ibdev.num_comp_vectors = 1;
300 
301 	/* mandatory verbs. */
302 	dev->ibdev.query_device = ocrdma_query_device;
303 	dev->ibdev.query_port = ocrdma_query_port;
304 	dev->ibdev.modify_port = ocrdma_modify_port;
305 	dev->ibdev.query_gid = ocrdma_query_gid;
306 	dev->ibdev.get_link_layer = ocrdma_link_layer;
307 	dev->ibdev.alloc_pd = ocrdma_alloc_pd;
308 	dev->ibdev.dealloc_pd = ocrdma_dealloc_pd;
309 
310 	dev->ibdev.create_cq = ocrdma_create_cq;
311 	dev->ibdev.destroy_cq = ocrdma_destroy_cq;
312 	dev->ibdev.resize_cq = ocrdma_resize_cq;
313 
314 	dev->ibdev.create_qp = ocrdma_create_qp;
315 	dev->ibdev.modify_qp = ocrdma_modify_qp;
316 	dev->ibdev.query_qp = ocrdma_query_qp;
317 	dev->ibdev.destroy_qp = ocrdma_destroy_qp;
318 
319 	dev->ibdev.query_pkey = ocrdma_query_pkey;
320 	dev->ibdev.create_ah = ocrdma_create_ah;
321 	dev->ibdev.destroy_ah = ocrdma_destroy_ah;
322 	dev->ibdev.query_ah = ocrdma_query_ah;
323 	dev->ibdev.modify_ah = ocrdma_modify_ah;
324 
325 	dev->ibdev.poll_cq = ocrdma_poll_cq;
326 	dev->ibdev.post_send = ocrdma_post_send;
327 	dev->ibdev.post_recv = ocrdma_post_recv;
328 	dev->ibdev.req_notify_cq = ocrdma_arm_cq;
329 
330 	dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
331 	dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr;
332 	dev->ibdev.dereg_mr = ocrdma_dereg_mr;
333 	dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
334 
335 	dev->ibdev.alloc_fast_reg_mr = ocrdma_alloc_frmr;
336 	dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list;
337 	dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list;
338 
339 	/* mandatory to support user space verbs consumer. */
340 	dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
341 	dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext;
342 	dev->ibdev.mmap = ocrdma_mmap;
343 	dev->ibdev.dma_device = &dev->nic_info.pdev->dev;
344 
345 	dev->ibdev.process_mad = ocrdma_process_mad;
346 
347 	if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
348 		dev->ibdev.uverbs_cmd_mask |=
349 		     OCRDMA_UVERBS(CREATE_SRQ) |
350 		     OCRDMA_UVERBS(MODIFY_SRQ) |
351 		     OCRDMA_UVERBS(QUERY_SRQ) |
352 		     OCRDMA_UVERBS(DESTROY_SRQ) |
353 		     OCRDMA_UVERBS(POST_SRQ_RECV);
354 
355 		dev->ibdev.create_srq = ocrdma_create_srq;
356 		dev->ibdev.modify_srq = ocrdma_modify_srq;
357 		dev->ibdev.query_srq = ocrdma_query_srq;
358 		dev->ibdev.destroy_srq = ocrdma_destroy_srq;
359 		dev->ibdev.post_srq_recv = ocrdma_post_srq_recv;
360 	}
361 	return ib_register_device(&dev->ibdev, NULL);
362 }
363 
364 static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
365 {
366 	mutex_init(&dev->dev_lock);
367 	dev->sgid_tbl = kzalloc(sizeof(union ib_gid) *
368 				OCRDMA_MAX_SGID, GFP_KERNEL);
369 	if (!dev->sgid_tbl)
370 		goto alloc_err;
371 	spin_lock_init(&dev->sgid_lock);
372 
373 	dev->cq_tbl = kzalloc(sizeof(struct ocrdma_cq *) *
374 			      OCRDMA_MAX_CQ, GFP_KERNEL);
375 	if (!dev->cq_tbl)
376 		goto alloc_err;
377 
378 	if (dev->attr.max_qp) {
379 		dev->qp_tbl = kzalloc(sizeof(struct ocrdma_qp *) *
380 				      OCRDMA_MAX_QP, GFP_KERNEL);
381 		if (!dev->qp_tbl)
382 			goto alloc_err;
383 	}
384 	spin_lock_init(&dev->av_tbl.lock);
385 	spin_lock_init(&dev->flush_q_lock);
386 	return 0;
387 alloc_err:
388 	pr_err("%s(%d) error.\n", __func__, dev->id);
389 	return -ENOMEM;
390 }
391 
392 static void ocrdma_free_resources(struct ocrdma_dev *dev)
393 {
394 	kfree(dev->qp_tbl);
395 	kfree(dev->cq_tbl);
396 	kfree(dev->sgid_tbl);
397 }
398 
399 static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
400 {
401 	int status = 0;
402 	struct ocrdma_dev *dev;
403 
404 	dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
405 	if (!dev) {
406 		pr_err("Unable to allocate ib device\n");
407 		return NULL;
408 	}
409 	dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL);
410 	if (!dev->mbx_cmd)
411 		goto idr_err;
412 
413 	memcpy(&dev->nic_info, dev_info, sizeof(*dev_info));
414 	dev->id = idr_alloc(&ocrdma_dev_id, NULL, 0, 0, GFP_KERNEL);
415 	if (dev->id < 0)
416 		goto idr_err;
417 
418 	status = ocrdma_init_hw(dev);
419 	if (status)
420 		goto init_err;
421 
422 	status = ocrdma_alloc_resources(dev);
423 	if (status)
424 		goto alloc_err;
425 
426 	status = ocrdma_build_sgid_tbl(dev);
427 	if (status)
428 		goto alloc_err;
429 
430 	status = ocrdma_register_device(dev);
431 	if (status)
432 		goto alloc_err;
433 
434 	spin_lock(&ocrdma_devlist_lock);
435 	list_add_tail_rcu(&dev->entry, &ocrdma_dev_list);
436 	spin_unlock(&ocrdma_devlist_lock);
437 	return dev;
438 
439 alloc_err:
440 	ocrdma_free_resources(dev);
441 	ocrdma_cleanup_hw(dev);
442 init_err:
443 	idr_remove(&ocrdma_dev_id, dev->id);
444 idr_err:
445 	kfree(dev->mbx_cmd);
446 	ib_dealloc_device(&dev->ibdev);
447 	pr_err("%s() leaving. ret=%d\n", __func__, status);
448 	return NULL;
449 }
450 
451 static void ocrdma_remove_free(struct rcu_head *rcu)
452 {
453 	struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
454 
455 	idr_remove(&ocrdma_dev_id, dev->id);
456 	kfree(dev->mbx_cmd);
457 	ib_dealloc_device(&dev->ibdev);
458 }
459 
460 static void ocrdma_remove(struct ocrdma_dev *dev)
461 {
462 	/* first unregister with stack to stop all the active traffic
463 	 * of the registered clients.
464 	 */
465 	ib_unregister_device(&dev->ibdev);
466 
467 	spin_lock(&ocrdma_devlist_lock);
468 	list_del_rcu(&dev->entry);
469 	spin_unlock(&ocrdma_devlist_lock);
470 
471 	ocrdma_free_resources(dev);
472 	ocrdma_cleanup_hw(dev);
473 
474 	call_rcu(&dev->rcu, ocrdma_remove_free);
475 }
476 
477 static int ocrdma_open(struct ocrdma_dev *dev)
478 {
479 	struct ib_event port_event;
480 
481 	port_event.event = IB_EVENT_PORT_ACTIVE;
482 	port_event.element.port_num = 1;
483 	port_event.device = &dev->ibdev;
484 	ib_dispatch_event(&port_event);
485 	return 0;
486 }
487 
488 static int ocrdma_close(struct ocrdma_dev *dev)
489 {
490 	int i;
491 	struct ocrdma_qp *qp, **cur_qp;
492 	struct ib_event err_event;
493 	struct ib_qp_attr attrs;
494 	int attr_mask = IB_QP_STATE;
495 
496 	attrs.qp_state = IB_QPS_ERR;
497 	mutex_lock(&dev->dev_lock);
498 	if (dev->qp_tbl) {
499 		cur_qp = dev->qp_tbl;
500 		for (i = 0; i < OCRDMA_MAX_QP; i++) {
501 			qp = cur_qp[i];
502 			if (qp) {
503 				/* change the QP state to ERROR */
504 				_ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask);
505 
506 				err_event.event = IB_EVENT_QP_FATAL;
507 				err_event.element.qp = &qp->ibqp;
508 				err_event.device = &dev->ibdev;
509 				ib_dispatch_event(&err_event);
510 			}
511 		}
512 	}
513 	mutex_unlock(&dev->dev_lock);
514 
515 	err_event.event = IB_EVENT_PORT_ERR;
516 	err_event.element.port_num = 1;
517 	err_event.device = &dev->ibdev;
518 	ib_dispatch_event(&err_event);
519 	return 0;
520 }
521 
522 /* event handling via NIC driver ensures that all the NIC specific
523  * initialization done before RoCE driver notifies
524  * event to stack.
525  */
526 static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
527 {
528 	switch (event) {
529 	case BE_DEV_UP:
530 		ocrdma_open(dev);
531 		break;
532 	case BE_DEV_DOWN:
533 		ocrdma_close(dev);
534 		break;
535 	}
536 }
537 
538 static struct ocrdma_driver ocrdma_drv = {
539 	.name			= "ocrdma_driver",
540 	.add			= ocrdma_add,
541 	.remove			= ocrdma_remove,
542 	.state_change_handler	= ocrdma_event_handler,
543 };
544 
545 static void ocrdma_unregister_inet6addr_notifier(void)
546 {
547 #if IS_ENABLED(CONFIG_IPV6)
548 	unregister_inet6addr_notifier(&ocrdma_inet6addr_notifier);
549 #endif
550 }
551 
552 static int __init ocrdma_init_module(void)
553 {
554 	int status;
555 
556 #if IS_ENABLED(CONFIG_IPV6)
557 	status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier);
558 	if (status)
559 		return status;
560 #endif
561 
562 	status = be_roce_register_driver(&ocrdma_drv);
563 	if (status)
564 		ocrdma_unregister_inet6addr_notifier();
565 
566 	return status;
567 }
568 
569 static void __exit ocrdma_exit_module(void)
570 {
571 	be_roce_unregister_driver(&ocrdma_drv);
572 	ocrdma_unregister_inet6addr_notifier();
573 }
574 
575 module_init(ocrdma_init_module);
576 module_exit(ocrdma_exit_module);
577