1 /*******************************************************************
2  * This file is part of the Emulex RoCE Device Driver for          *
3  * RoCE (RDMA over Converged Ethernet) adapters.                   *
4  * Copyright (C) 2008-2012 Emulex. All rights reserved.            *
5  * EMULEX and SLI are trademarks of Emulex.                        *
6  * www.emulex.com                                                  *
7  *                                                                 *
8  * This program is free software; you can redistribute it and/or   *
9  * modify it under the terms of version 2 of the GNU General       *
10  * Public License as published by the Free Software Foundation.    *
11  * This program is distributed in the hope that it will be useful. *
12  * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
13  * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
14  * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
15  * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16  * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
17  * more details, a copy of which can be found in the file COPYING  *
18  * included with this package.                                     *
19  *
20  * Contact Information:
21  * linux-drivers@emulex.com
22  *
23  * Emulex
24  * 3333 Susan Street
25  * Costa Mesa, CA 92626
26  *******************************************************************/
27 
28 #include <linux/module.h>
29 #include <linux/idr.h>
30 #include <rdma/ib_verbs.h>
31 #include <rdma/ib_user_verbs.h>
32 #include <rdma/ib_addr.h>
33 
34 #include <linux/netdevice.h>
35 #include <net/addrconf.h>
36 
37 #include "ocrdma.h"
38 #include "ocrdma_verbs.h"
39 #include "ocrdma_ah.h"
40 #include "be_roce.h"
41 #include "ocrdma_hw.h"
42 
43 MODULE_VERSION(OCRDMA_ROCE_DEV_VERSION);
44 MODULE_DESCRIPTION("Emulex RoCE HCA Driver");
45 MODULE_AUTHOR("Emulex Corporation");
46 MODULE_LICENSE("GPL");
47 
48 static LIST_HEAD(ocrdma_dev_list);
49 static DEFINE_SPINLOCK(ocrdma_devlist_lock);
50 static DEFINE_IDR(ocrdma_dev_id);
51 
52 static union ib_gid ocrdma_zero_sgid;
53 
54 static int ocrdma_get_instance(void)
55 {
56 	int instance = 0;
57 
58 	/* Assign an unused number */
59 	if (!idr_pre_get(&ocrdma_dev_id, GFP_KERNEL))
60 		return -1;
61 	if (idr_get_new(&ocrdma_dev_id, NULL, &instance))
62 		return -1;
63 	return instance;
64 }
65 
66 void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
67 {
68 	u8 mac_addr[6];
69 
70 	memcpy(&mac_addr[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
71 	guid[0] = mac_addr[0] ^ 2;
72 	guid[1] = mac_addr[1];
73 	guid[2] = mac_addr[2];
74 	guid[3] = 0xff;
75 	guid[4] = 0xfe;
76 	guid[5] = mac_addr[3];
77 	guid[6] = mac_addr[4];
78 	guid[7] = mac_addr[5];
79 }
80 
81 static void ocrdma_build_sgid_mac(union ib_gid *sgid, unsigned char *mac_addr,
82 				  bool is_vlan, u16 vlan_id)
83 {
84 	sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
85 	sgid->raw[8] = mac_addr[0] ^ 2;
86 	sgid->raw[9] = mac_addr[1];
87 	sgid->raw[10] = mac_addr[2];
88 	if (is_vlan) {
89 		sgid->raw[11] = vlan_id >> 8;
90 		sgid->raw[12] = vlan_id & 0xff;
91 	} else {
92 		sgid->raw[11] = 0xff;
93 		sgid->raw[12] = 0xfe;
94 	}
95 	sgid->raw[13] = mac_addr[3];
96 	sgid->raw[14] = mac_addr[4];
97 	sgid->raw[15] = mac_addr[5];
98 }
99 
100 static bool ocrdma_add_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
101 			    bool is_vlan, u16 vlan_id)
102 {
103 	int i;
104 	union ib_gid new_sgid;
105 	unsigned long flags;
106 
107 	memset(&ocrdma_zero_sgid, 0, sizeof(union ib_gid));
108 
109 	ocrdma_build_sgid_mac(&new_sgid, mac_addr, is_vlan, vlan_id);
110 
111 	spin_lock_irqsave(&dev->sgid_lock, flags);
112 	for (i = 0; i < OCRDMA_MAX_SGID; i++) {
113 		if (!memcmp(&dev->sgid_tbl[i], &ocrdma_zero_sgid,
114 			    sizeof(union ib_gid))) {
115 			/* found free entry */
116 			memcpy(&dev->sgid_tbl[i], &new_sgid,
117 			       sizeof(union ib_gid));
118 			spin_unlock_irqrestore(&dev->sgid_lock, flags);
119 			return true;
120 		} else if (!memcmp(&dev->sgid_tbl[i], &new_sgid,
121 				   sizeof(union ib_gid))) {
122 			/* entry already present, no addition is required. */
123 			spin_unlock_irqrestore(&dev->sgid_lock, flags);
124 			return false;
125 		}
126 	}
127 	spin_unlock_irqrestore(&dev->sgid_lock, flags);
128 	return false;
129 }
130 
131 static bool ocrdma_del_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
132 			    bool is_vlan, u16 vlan_id)
133 {
134 	int found = false;
135 	int i;
136 	union ib_gid sgid;
137 	unsigned long flags;
138 
139 	ocrdma_build_sgid_mac(&sgid, mac_addr, is_vlan, vlan_id);
140 
141 	spin_lock_irqsave(&dev->sgid_lock, flags);
142 	/* first is default sgid, which cannot be deleted. */
143 	for (i = 1; i < OCRDMA_MAX_SGID; i++) {
144 		if (!memcmp(&dev->sgid_tbl[i], &sgid, sizeof(union ib_gid))) {
145 			/* found matching entry */
146 			memset(&dev->sgid_tbl[i], 0, sizeof(union ib_gid));
147 			found = true;
148 			break;
149 		}
150 	}
151 	spin_unlock_irqrestore(&dev->sgid_lock, flags);
152 	return found;
153 }
154 
155 static void ocrdma_add_default_sgid(struct ocrdma_dev *dev)
156 {
157 	/* GID Index 0 - Invariant manufacturer-assigned EUI-64 */
158 	union ib_gid *sgid = &dev->sgid_tbl[0];
159 
160 	sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
161 	ocrdma_get_guid(dev, &sgid->raw[8]);
162 }
163 
164 #if IS_ENABLED(CONFIG_VLAN_8021Q)
165 static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev)
166 {
167 	struct net_device *netdev, *tmp;
168 	u16 vlan_id;
169 	bool is_vlan;
170 
171 	netdev = dev->nic_info.netdev;
172 
173 	rcu_read_lock();
174 	for_each_netdev_rcu(&init_net, tmp) {
175 		if (netdev == tmp || vlan_dev_real_dev(tmp) == netdev) {
176 			if (!netif_running(tmp) || !netif_oper_up(tmp))
177 				continue;
178 			if (netdev != tmp) {
179 				vlan_id = vlan_dev_vlan_id(tmp);
180 				is_vlan = true;
181 			} else {
182 				is_vlan = false;
183 				vlan_id = 0;
184 				tmp = netdev;
185 			}
186 			ocrdma_add_sgid(dev, tmp->dev_addr, is_vlan, vlan_id);
187 		}
188 	}
189 	rcu_read_unlock();
190 }
191 #else
192 static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev)
193 {
194 
195 }
196 #endif /* VLAN */
197 
198 static int ocrdma_build_sgid_tbl(struct ocrdma_dev *dev)
199 {
200 	ocrdma_add_default_sgid(dev);
201 	ocrdma_add_vlan_sgids(dev);
202 	return 0;
203 }
204 
205 #if IS_ENABLED(CONFIG_IPV6)
206 
207 static int ocrdma_inet6addr_event(struct notifier_block *notifier,
208 				  unsigned long event, void *ptr)
209 {
210 	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
211 	struct net_device *netdev = ifa->idev->dev;
212 	struct ib_event gid_event;
213 	struct ocrdma_dev *dev;
214 	bool found = false;
215 	bool updated = false;
216 	bool is_vlan = false;
217 	u16 vid = 0;
218 
219 	is_vlan = netdev->priv_flags & IFF_802_1Q_VLAN;
220 	if (is_vlan) {
221 		vid = vlan_dev_vlan_id(netdev);
222 		netdev = vlan_dev_real_dev(netdev);
223 	}
224 
225 	rcu_read_lock();
226 	list_for_each_entry_rcu(dev, &ocrdma_dev_list, entry) {
227 		if (dev->nic_info.netdev == netdev) {
228 			found = true;
229 			break;
230 		}
231 	}
232 	rcu_read_unlock();
233 
234 	if (!found)
235 		return NOTIFY_DONE;
236 	if (!rdma_link_local_addr((struct in6_addr *)&ifa->addr))
237 		return NOTIFY_DONE;
238 
239 	mutex_lock(&dev->dev_lock);
240 	switch (event) {
241 	case NETDEV_UP:
242 		updated = ocrdma_add_sgid(dev, netdev->dev_addr, is_vlan, vid);
243 		break;
244 	case NETDEV_DOWN:
245 		updated = ocrdma_del_sgid(dev, netdev->dev_addr, is_vlan, vid);
246 		break;
247 	default:
248 		break;
249 	}
250 	if (updated) {
251 		/* GID table updated, notify the consumers about it */
252 		gid_event.device = &dev->ibdev;
253 		gid_event.element.port_num = 1;
254 		gid_event.event = IB_EVENT_GID_CHANGE;
255 		ib_dispatch_event(&gid_event);
256 	}
257 	mutex_unlock(&dev->dev_lock);
258 	return NOTIFY_OK;
259 }
260 
261 static struct notifier_block ocrdma_inet6addr_notifier = {
262 	.notifier_call = ocrdma_inet6addr_event
263 };
264 
265 #endif /* IPV6 and VLAN */
266 
267 static enum rdma_link_layer ocrdma_link_layer(struct ib_device *device,
268 					      u8 port_num)
269 {
270 	return IB_LINK_LAYER_ETHERNET;
271 }
272 
273 static int ocrdma_register_device(struct ocrdma_dev *dev)
274 {
275 	strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX);
276 	ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid);
277 	memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC,
278 	       sizeof(OCRDMA_NODE_DESC));
279 	dev->ibdev.owner = THIS_MODULE;
280 	dev->ibdev.uverbs_cmd_mask =
281 	    OCRDMA_UVERBS(GET_CONTEXT) |
282 	    OCRDMA_UVERBS(QUERY_DEVICE) |
283 	    OCRDMA_UVERBS(QUERY_PORT) |
284 	    OCRDMA_UVERBS(ALLOC_PD) |
285 	    OCRDMA_UVERBS(DEALLOC_PD) |
286 	    OCRDMA_UVERBS(REG_MR) |
287 	    OCRDMA_UVERBS(DEREG_MR) |
288 	    OCRDMA_UVERBS(CREATE_COMP_CHANNEL) |
289 	    OCRDMA_UVERBS(CREATE_CQ) |
290 	    OCRDMA_UVERBS(RESIZE_CQ) |
291 	    OCRDMA_UVERBS(DESTROY_CQ) |
292 	    OCRDMA_UVERBS(REQ_NOTIFY_CQ) |
293 	    OCRDMA_UVERBS(CREATE_QP) |
294 	    OCRDMA_UVERBS(MODIFY_QP) |
295 	    OCRDMA_UVERBS(QUERY_QP) |
296 	    OCRDMA_UVERBS(DESTROY_QP) |
297 	    OCRDMA_UVERBS(POLL_CQ) |
298 	    OCRDMA_UVERBS(POST_SEND) |
299 	    OCRDMA_UVERBS(POST_RECV);
300 
301 	dev->ibdev.uverbs_cmd_mask |=
302 	    OCRDMA_UVERBS(CREATE_AH) |
303 	     OCRDMA_UVERBS(MODIFY_AH) |
304 	     OCRDMA_UVERBS(QUERY_AH) |
305 	     OCRDMA_UVERBS(DESTROY_AH);
306 
307 	dev->ibdev.node_type = RDMA_NODE_IB_CA;
308 	dev->ibdev.phys_port_cnt = 1;
309 	dev->ibdev.num_comp_vectors = 1;
310 
311 	/* mandatory verbs. */
312 	dev->ibdev.query_device = ocrdma_query_device;
313 	dev->ibdev.query_port = ocrdma_query_port;
314 	dev->ibdev.modify_port = ocrdma_modify_port;
315 	dev->ibdev.query_gid = ocrdma_query_gid;
316 	dev->ibdev.get_link_layer = ocrdma_link_layer;
317 	dev->ibdev.alloc_pd = ocrdma_alloc_pd;
318 	dev->ibdev.dealloc_pd = ocrdma_dealloc_pd;
319 
320 	dev->ibdev.create_cq = ocrdma_create_cq;
321 	dev->ibdev.destroy_cq = ocrdma_destroy_cq;
322 	dev->ibdev.resize_cq = ocrdma_resize_cq;
323 
324 	dev->ibdev.create_qp = ocrdma_create_qp;
325 	dev->ibdev.modify_qp = ocrdma_modify_qp;
326 	dev->ibdev.query_qp = ocrdma_query_qp;
327 	dev->ibdev.destroy_qp = ocrdma_destroy_qp;
328 
329 	dev->ibdev.query_pkey = ocrdma_query_pkey;
330 	dev->ibdev.create_ah = ocrdma_create_ah;
331 	dev->ibdev.destroy_ah = ocrdma_destroy_ah;
332 	dev->ibdev.query_ah = ocrdma_query_ah;
333 	dev->ibdev.modify_ah = ocrdma_modify_ah;
334 
335 	dev->ibdev.poll_cq = ocrdma_poll_cq;
336 	dev->ibdev.post_send = ocrdma_post_send;
337 	dev->ibdev.post_recv = ocrdma_post_recv;
338 	dev->ibdev.req_notify_cq = ocrdma_arm_cq;
339 
340 	dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
341 	dev->ibdev.dereg_mr = ocrdma_dereg_mr;
342 	dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
343 
344 	/* mandatory to support user space verbs consumer. */
345 	dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
346 	dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext;
347 	dev->ibdev.mmap = ocrdma_mmap;
348 	dev->ibdev.dma_device = &dev->nic_info.pdev->dev;
349 
350 	dev->ibdev.process_mad = ocrdma_process_mad;
351 
352 	if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
353 		dev->ibdev.uverbs_cmd_mask |=
354 		     OCRDMA_UVERBS(CREATE_SRQ) |
355 		     OCRDMA_UVERBS(MODIFY_SRQ) |
356 		     OCRDMA_UVERBS(QUERY_SRQ) |
357 		     OCRDMA_UVERBS(DESTROY_SRQ) |
358 		     OCRDMA_UVERBS(POST_SRQ_RECV);
359 
360 		dev->ibdev.create_srq = ocrdma_create_srq;
361 		dev->ibdev.modify_srq = ocrdma_modify_srq;
362 		dev->ibdev.query_srq = ocrdma_query_srq;
363 		dev->ibdev.destroy_srq = ocrdma_destroy_srq;
364 		dev->ibdev.post_srq_recv = ocrdma_post_srq_recv;
365 	}
366 	return ib_register_device(&dev->ibdev, NULL);
367 }
368 
369 static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
370 {
371 	mutex_init(&dev->dev_lock);
372 	dev->sgid_tbl = kzalloc(sizeof(union ib_gid) *
373 				OCRDMA_MAX_SGID, GFP_KERNEL);
374 	if (!dev->sgid_tbl)
375 		goto alloc_err;
376 	spin_lock_init(&dev->sgid_lock);
377 
378 	dev->cq_tbl = kzalloc(sizeof(struct ocrdma_cq *) *
379 			      OCRDMA_MAX_CQ, GFP_KERNEL);
380 	if (!dev->cq_tbl)
381 		goto alloc_err;
382 
383 	if (dev->attr.max_qp) {
384 		dev->qp_tbl = kzalloc(sizeof(struct ocrdma_qp *) *
385 				      OCRDMA_MAX_QP, GFP_KERNEL);
386 		if (!dev->qp_tbl)
387 			goto alloc_err;
388 	}
389 	spin_lock_init(&dev->av_tbl.lock);
390 	spin_lock_init(&dev->flush_q_lock);
391 	return 0;
392 alloc_err:
393 	ocrdma_err("%s(%d) error.\n", __func__, dev->id);
394 	return -ENOMEM;
395 }
396 
397 static void ocrdma_free_resources(struct ocrdma_dev *dev)
398 {
399 	kfree(dev->qp_tbl);
400 	kfree(dev->cq_tbl);
401 	kfree(dev->sgid_tbl);
402 }
403 
404 static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
405 {
406 	int status = 0;
407 	struct ocrdma_dev *dev;
408 
409 	dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
410 	if (!dev) {
411 		ocrdma_err("Unable to allocate ib device\n");
412 		return NULL;
413 	}
414 	dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL);
415 	if (!dev->mbx_cmd)
416 		goto idr_err;
417 
418 	memcpy(&dev->nic_info, dev_info, sizeof(*dev_info));
419 	dev->id = ocrdma_get_instance();
420 	if (dev->id < 0)
421 		goto idr_err;
422 
423 	status = ocrdma_init_hw(dev);
424 	if (status)
425 		goto init_err;
426 
427 	status = ocrdma_alloc_resources(dev);
428 	if (status)
429 		goto alloc_err;
430 
431 	status = ocrdma_build_sgid_tbl(dev);
432 	if (status)
433 		goto alloc_err;
434 
435 	status = ocrdma_register_device(dev);
436 	if (status)
437 		goto alloc_err;
438 
439 	spin_lock(&ocrdma_devlist_lock);
440 	list_add_tail_rcu(&dev->entry, &ocrdma_dev_list);
441 	spin_unlock(&ocrdma_devlist_lock);
442 	return dev;
443 
444 alloc_err:
445 	ocrdma_free_resources(dev);
446 	ocrdma_cleanup_hw(dev);
447 init_err:
448 	idr_remove(&ocrdma_dev_id, dev->id);
449 idr_err:
450 	kfree(dev->mbx_cmd);
451 	ib_dealloc_device(&dev->ibdev);
452 	ocrdma_err("%s() leaving. ret=%d\n", __func__, status);
453 	return NULL;
454 }
455 
456 static void ocrdma_remove_free(struct rcu_head *rcu)
457 {
458 	struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
459 
460 	ocrdma_free_resources(dev);
461 	ocrdma_cleanup_hw(dev);
462 
463 	idr_remove(&ocrdma_dev_id, dev->id);
464 	kfree(dev->mbx_cmd);
465 	ib_dealloc_device(&dev->ibdev);
466 }
467 
468 static void ocrdma_remove(struct ocrdma_dev *dev)
469 {
470 	/* first unregister with stack to stop all the active traffic
471 	 * of the registered clients.
472 	 */
473 	ib_unregister_device(&dev->ibdev);
474 
475 	spin_lock(&ocrdma_devlist_lock);
476 	list_del_rcu(&dev->entry);
477 	spin_unlock(&ocrdma_devlist_lock);
478 	call_rcu(&dev->rcu, ocrdma_remove_free);
479 }
480 
481 static int ocrdma_open(struct ocrdma_dev *dev)
482 {
483 	struct ib_event port_event;
484 
485 	port_event.event = IB_EVENT_PORT_ACTIVE;
486 	port_event.element.port_num = 1;
487 	port_event.device = &dev->ibdev;
488 	ib_dispatch_event(&port_event);
489 	return 0;
490 }
491 
492 static int ocrdma_close(struct ocrdma_dev *dev)
493 {
494 	int i;
495 	struct ocrdma_qp *qp, **cur_qp;
496 	struct ib_event err_event;
497 	struct ib_qp_attr attrs;
498 	int attr_mask = IB_QP_STATE;
499 
500 	attrs.qp_state = IB_QPS_ERR;
501 	mutex_lock(&dev->dev_lock);
502 	if (dev->qp_tbl) {
503 		cur_qp = dev->qp_tbl;
504 		for (i = 0; i < OCRDMA_MAX_QP; i++) {
505 			qp = cur_qp[i];
506 			if (qp) {
507 				/* change the QP state to ERROR */
508 				_ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask);
509 
510 				err_event.event = IB_EVENT_QP_FATAL;
511 				err_event.element.qp = &qp->ibqp;
512 				err_event.device = &dev->ibdev;
513 				ib_dispatch_event(&err_event);
514 			}
515 		}
516 	}
517 	mutex_unlock(&dev->dev_lock);
518 
519 	err_event.event = IB_EVENT_PORT_ERR;
520 	err_event.element.port_num = 1;
521 	err_event.device = &dev->ibdev;
522 	ib_dispatch_event(&err_event);
523 	return 0;
524 }
525 
526 /* event handling via NIC driver ensures that all the NIC specific
527  * initialization done before RoCE driver notifies
528  * event to stack.
529  */
530 static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
531 {
532 	switch (event) {
533 	case BE_DEV_UP:
534 		ocrdma_open(dev);
535 		break;
536 	case BE_DEV_DOWN:
537 		ocrdma_close(dev);
538 		break;
539 	};
540 }
541 
542 static struct ocrdma_driver ocrdma_drv = {
543 	.name			= "ocrdma_driver",
544 	.add			= ocrdma_add,
545 	.remove			= ocrdma_remove,
546 	.state_change_handler	= ocrdma_event_handler,
547 };
548 
549 static void ocrdma_unregister_inet6addr_notifier(void)
550 {
551 #if IS_ENABLED(CONFIG_IPV6)
552 	unregister_inet6addr_notifier(&ocrdma_inet6addr_notifier);
553 #endif
554 }
555 
556 static int __init ocrdma_init_module(void)
557 {
558 	int status;
559 
560 #if IS_ENABLED(CONFIG_IPV6)
561 	status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier);
562 	if (status)
563 		return status;
564 #endif
565 
566 	status = be_roce_register_driver(&ocrdma_drv);
567 	if (status)
568 		ocrdma_unregister_inet6addr_notifier();
569 
570 	return status;
571 }
572 
573 static void __exit ocrdma_exit_module(void)
574 {
575 	be_roce_unregister_driver(&ocrdma_drv);
576 	ocrdma_unregister_inet6addr_notifier();
577 }
578 
579 module_init(ocrdma_init_module);
580 module_exit(ocrdma_exit_module);
581