1 /*
2  * Copyright (c) 2012-2016 VMware, Inc.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of EITHER the GNU General Public License
6  * version 2 as published by the Free Software Foundation or the BSD
7  * 2-Clause License. This program is distributed in the hope that it
8  * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9  * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10  * See the GNU General Public License version 2 for more details at
11  * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program available in the file COPYING in the main
15  * directory of this source tree.
16  *
17  * The BSD 2-Clause License
18  *
19  *     Redistribution and use in source and binary forms, with or
20  *     without modification, are permitted provided that the following
21  *     conditions are met:
22  *
23  *      - Redistributions of source code must retain the above
24  *        copyright notice, this list of conditions and the following
25  *        disclaimer.
26  *
27  *      - Redistributions in binary form must reproduce the above
28  *        copyright notice, this list of conditions and the following
29  *        disclaimer in the documentation and/or other materials
30  *        provided with the distribution.
31  *
32  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43  * OF THE POSSIBILITY OF SUCH DAMAGE.
44  */
45 
46 #include <linux/errno.h>
47 #include <linux/inetdevice.h>
48 #include <linux/init.h>
49 #include <linux/module.h>
50 #include <linux/slab.h>
51 #include <rdma/ib_addr.h>
52 #include <rdma/ib_smi.h>
53 #include <rdma/ib_user_verbs.h>
54 #include <net/addrconf.h>
55 
56 #include "pvrdma.h"
57 
58 #define DRV_NAME	"vmw_pvrdma"
59 #define DRV_VERSION	"1.0.0.0-k"
60 
61 static DEFINE_MUTEX(pvrdma_device_list_lock);
62 static LIST_HEAD(pvrdma_device_list);
63 static struct workqueue_struct *event_wq;
64 
65 static int pvrdma_add_gid(struct ib_device *ibdev,
66 			  u8 port_num,
67 			  unsigned int index,
68 			  const union ib_gid *gid,
69 			  const struct ib_gid_attr *attr,
70 			  void **context);
71 static int pvrdma_del_gid(struct ib_device *ibdev,
72 			  u8 port_num,
73 			  unsigned int index,
74 			  void **context);
75 
76 
77 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
78 			char *buf)
79 {
80 	return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION);
81 }
82 
83 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
84 			char *buf)
85 {
86 	return sprintf(buf, "%d\n", PVRDMA_REV_ID);
87 }
88 
89 static ssize_t show_board(struct device *device, struct device_attribute *attr,
90 			  char *buf)
91 {
92 	return sprintf(buf, "%d\n", PVRDMA_BOARD_ID);
93 }
94 
95 static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,	   NULL);
96 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,	   NULL);
97 static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
98 
99 static struct device_attribute *pvrdma_class_attributes[] = {
100 	&dev_attr_hw_rev,
101 	&dev_attr_hca_type,
102 	&dev_attr_board_id
103 };
104 
105 static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str,
106 				  size_t str_len)
107 {
108 	struct pvrdma_dev *dev =
109 		container_of(device, struct pvrdma_dev, ib_dev);
110 	snprintf(str, str_len, "%d.%d.%d\n",
111 		 (int) (dev->dsr->caps.fw_ver >> 32),
112 		 (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff,
113 		 (int) dev->dsr->caps.fw_ver & 0xffff);
114 }
115 
116 static int pvrdma_init_device(struct pvrdma_dev *dev)
117 {
118 	/*  Initialize some device related stuff */
119 	spin_lock_init(&dev->cmd_lock);
120 	sema_init(&dev->cmd_sema, 1);
121 	atomic_set(&dev->num_qps, 0);
122 	atomic_set(&dev->num_cqs, 0);
123 	atomic_set(&dev->num_pds, 0);
124 	atomic_set(&dev->num_ahs, 0);
125 
126 	return 0;
127 }
128 
129 static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
130 				 struct ib_port_immutable *immutable)
131 {
132 	struct ib_port_attr attr;
133 	int err;
134 
135 	err = pvrdma_query_port(ibdev, port_num, &attr);
136 	if (err)
137 		return err;
138 
139 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
140 	immutable->gid_tbl_len = attr.gid_tbl_len;
141 	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
142 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
143 	return 0;
144 }
145 
146 static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev,
147 					    u8 port_num)
148 {
149 	struct net_device *netdev;
150 	struct pvrdma_dev *dev = to_vdev(ibdev);
151 
152 	if (port_num != 1)
153 		return NULL;
154 
155 	rcu_read_lock();
156 	netdev = dev->netdev;
157 	if (netdev)
158 		dev_hold(netdev);
159 	rcu_read_unlock();
160 
161 	return netdev;
162 }
163 
164 static int pvrdma_register_device(struct pvrdma_dev *dev)
165 {
166 	int ret = -1;
167 	int i = 0;
168 
169 	strlcpy(dev->ib_dev.name, "vmw_pvrdma%d", IB_DEVICE_NAME_MAX);
170 	dev->ib_dev.node_guid = dev->dsr->caps.node_guid;
171 	dev->sys_image_guid = dev->dsr->caps.sys_image_guid;
172 	dev->flags = 0;
173 	dev->ib_dev.owner = THIS_MODULE;
174 	dev->ib_dev.num_comp_vectors = 1;
175 	dev->ib_dev.dma_device = &dev->pdev->dev;
176 	dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION;
177 	dev->ib_dev.uverbs_cmd_mask =
178 		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
179 		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
180 		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
181 		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
182 		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
183 		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
184 		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
185 		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
186 		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
187 		(1ull << IB_USER_VERBS_CMD_POLL_CQ)		|
188 		(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)	|
189 		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
190 		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
191 		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
192 		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
193 		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
194 		(1ull << IB_USER_VERBS_CMD_POST_SEND)		|
195 		(1ull << IB_USER_VERBS_CMD_POST_RECV)		|
196 		(1ull << IB_USER_VERBS_CMD_CREATE_AH)		|
197 		(1ull << IB_USER_VERBS_CMD_DESTROY_AH);
198 
199 	dev->ib_dev.node_type = RDMA_NODE_IB_CA;
200 	dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt;
201 
202 	dev->ib_dev.query_device = pvrdma_query_device;
203 	dev->ib_dev.query_port = pvrdma_query_port;
204 	dev->ib_dev.query_gid = pvrdma_query_gid;
205 	dev->ib_dev.query_pkey = pvrdma_query_pkey;
206 	dev->ib_dev.modify_port	= pvrdma_modify_port;
207 	dev->ib_dev.alloc_ucontext = pvrdma_alloc_ucontext;
208 	dev->ib_dev.dealloc_ucontext = pvrdma_dealloc_ucontext;
209 	dev->ib_dev.mmap = pvrdma_mmap;
210 	dev->ib_dev.alloc_pd = pvrdma_alloc_pd;
211 	dev->ib_dev.dealloc_pd = pvrdma_dealloc_pd;
212 	dev->ib_dev.create_ah = pvrdma_create_ah;
213 	dev->ib_dev.destroy_ah = pvrdma_destroy_ah;
214 	dev->ib_dev.create_qp = pvrdma_create_qp;
215 	dev->ib_dev.modify_qp = pvrdma_modify_qp;
216 	dev->ib_dev.query_qp = pvrdma_query_qp;
217 	dev->ib_dev.destroy_qp = pvrdma_destroy_qp;
218 	dev->ib_dev.post_send = pvrdma_post_send;
219 	dev->ib_dev.post_recv = pvrdma_post_recv;
220 	dev->ib_dev.create_cq = pvrdma_create_cq;
221 	dev->ib_dev.modify_cq = pvrdma_modify_cq;
222 	dev->ib_dev.resize_cq = pvrdma_resize_cq;
223 	dev->ib_dev.destroy_cq = pvrdma_destroy_cq;
224 	dev->ib_dev.poll_cq = pvrdma_poll_cq;
225 	dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq;
226 	dev->ib_dev.get_dma_mr = pvrdma_get_dma_mr;
227 	dev->ib_dev.reg_user_mr	= pvrdma_reg_user_mr;
228 	dev->ib_dev.dereg_mr = pvrdma_dereg_mr;
229 	dev->ib_dev.alloc_mr = pvrdma_alloc_mr;
230 	dev->ib_dev.map_mr_sg = pvrdma_map_mr_sg;
231 	dev->ib_dev.add_gid = pvrdma_add_gid;
232 	dev->ib_dev.del_gid = pvrdma_del_gid;
233 	dev->ib_dev.get_netdev = pvrdma_get_netdev;
234 	dev->ib_dev.get_port_immutable = pvrdma_port_immutable;
235 	dev->ib_dev.get_link_layer = pvrdma_port_link_layer;
236 	dev->ib_dev.get_dev_fw_str = pvrdma_get_fw_ver_str;
237 
238 	mutex_init(&dev->port_mutex);
239 	spin_lock_init(&dev->desc_lock);
240 
241 	dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(void *),
242 			      GFP_KERNEL);
243 	if (!dev->cq_tbl)
244 		return ret;
245 	spin_lock_init(&dev->cq_tbl_lock);
246 
247 	dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(void *),
248 			      GFP_KERNEL);
249 	if (!dev->qp_tbl)
250 		goto err_cq_free;
251 	spin_lock_init(&dev->qp_tbl_lock);
252 
253 	ret = ib_register_device(&dev->ib_dev, NULL);
254 	if (ret)
255 		goto err_qp_free;
256 
257 	for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) {
258 		ret = device_create_file(&dev->ib_dev.dev,
259 					 pvrdma_class_attributes[i]);
260 		if (ret)
261 			goto err_class;
262 	}
263 
264 	dev->ib_active = true;
265 
266 	return 0;
267 
268 err_class:
269 	ib_unregister_device(&dev->ib_dev);
270 err_qp_free:
271 	kfree(dev->qp_tbl);
272 err_cq_free:
273 	kfree(dev->cq_tbl);
274 
275 	return ret;
276 }
277 
278 static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id)
279 {
280 	u32 icr = PVRDMA_INTR_CAUSE_RESPONSE;
281 	struct pvrdma_dev *dev = dev_id;
282 
283 	dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n");
284 
285 	if (dev->intr.type != PVRDMA_INTR_TYPE_MSIX) {
286 		/* Legacy intr */
287 		icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR);
288 		if (icr == 0)
289 			return IRQ_NONE;
290 	}
291 
292 	if (icr == PVRDMA_INTR_CAUSE_RESPONSE)
293 		complete(&dev->cmd_done);
294 
295 	return IRQ_HANDLED;
296 }
297 
298 static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
299 {
300 	struct pvrdma_qp *qp;
301 	unsigned long flags;
302 
303 	spin_lock_irqsave(&dev->qp_tbl_lock, flags);
304 	qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp];
305 	if (qp)
306 		atomic_inc(&qp->refcnt);
307 	spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
308 
309 	if (qp && qp->ibqp.event_handler) {
310 		struct ib_qp *ibqp = &qp->ibqp;
311 		struct ib_event e;
312 
313 		e.device = ibqp->device;
314 		e.element.qp = ibqp;
315 		e.event = type; /* 1:1 mapping for now. */
316 		ibqp->event_handler(&e, ibqp->qp_context);
317 	}
318 	if (qp) {
319 		atomic_dec(&qp->refcnt);
320 		if (atomic_read(&qp->refcnt) == 0)
321 			wake_up(&qp->wait);
322 	}
323 }
324 
325 static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
326 {
327 	struct pvrdma_cq *cq;
328 	unsigned long flags;
329 
330 	spin_lock_irqsave(&dev->cq_tbl_lock, flags);
331 	cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq];
332 	if (cq)
333 		atomic_inc(&cq->refcnt);
334 	spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
335 
336 	if (cq && cq->ibcq.event_handler) {
337 		struct ib_cq *ibcq = &cq->ibcq;
338 		struct ib_event e;
339 
340 		e.device = ibcq->device;
341 		e.element.cq = ibcq;
342 		e.event = type; /* 1:1 mapping for now. */
343 		ibcq->event_handler(&e, ibcq->cq_context);
344 	}
345 	if (cq) {
346 		atomic_dec(&cq->refcnt);
347 		if (atomic_read(&cq->refcnt) == 0)
348 			wake_up(&cq->wait);
349 	}
350 }
351 
352 static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port,
353 				  enum ib_event_type event)
354 {
355 	struct ib_event ib_event;
356 
357 	memset(&ib_event, 0, sizeof(ib_event));
358 	ib_event.device = &dev->ib_dev;
359 	ib_event.element.port_num = port;
360 	ib_event.event = event;
361 	ib_dispatch_event(&ib_event);
362 }
363 
364 static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type)
365 {
366 	if (port < 1 || port > dev->dsr->caps.phys_port_cnt) {
367 		dev_warn(&dev->pdev->dev, "event on port %d\n", port);
368 		return;
369 	}
370 
371 	pvrdma_dispatch_event(dev, port, type);
372 }
373 
374 static inline struct pvrdma_eqe *get_eqe(struct pvrdma_dev *dev, unsigned int i)
375 {
376 	return (struct pvrdma_eqe *)pvrdma_page_dir_get_ptr(
377 					&dev->async_pdir,
378 					PAGE_SIZE +
379 					sizeof(struct pvrdma_eqe) * i);
380 }
381 
382 static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id)
383 {
384 	struct pvrdma_dev *dev = dev_id;
385 	struct pvrdma_ring *ring = &dev->async_ring_state->rx;
386 	int ring_slots = (dev->dsr->async_ring_pages.num_pages - 1) *
387 			 PAGE_SIZE / sizeof(struct pvrdma_eqe);
388 	unsigned int head;
389 
390 	dev_dbg(&dev->pdev->dev, "interrupt 1 (async event) handler\n");
391 
392 	/*
393 	 * Don't process events until the IB device is registered. Otherwise
394 	 * we'll try to ib_dispatch_event() on an invalid device.
395 	 */
396 	if (!dev->ib_active)
397 		return IRQ_HANDLED;
398 
399 	while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) {
400 		struct pvrdma_eqe *eqe;
401 
402 		eqe = get_eqe(dev, head);
403 
404 		switch (eqe->type) {
405 		case PVRDMA_EVENT_QP_FATAL:
406 		case PVRDMA_EVENT_QP_REQ_ERR:
407 		case PVRDMA_EVENT_QP_ACCESS_ERR:
408 		case PVRDMA_EVENT_COMM_EST:
409 		case PVRDMA_EVENT_SQ_DRAINED:
410 		case PVRDMA_EVENT_PATH_MIG:
411 		case PVRDMA_EVENT_PATH_MIG_ERR:
412 		case PVRDMA_EVENT_QP_LAST_WQE_REACHED:
413 			pvrdma_qp_event(dev, eqe->info, eqe->type);
414 			break;
415 
416 		case PVRDMA_EVENT_CQ_ERR:
417 			pvrdma_cq_event(dev, eqe->info, eqe->type);
418 			break;
419 
420 		case PVRDMA_EVENT_SRQ_ERR:
421 		case PVRDMA_EVENT_SRQ_LIMIT_REACHED:
422 			break;
423 
424 		case PVRDMA_EVENT_PORT_ACTIVE:
425 		case PVRDMA_EVENT_PORT_ERR:
426 		case PVRDMA_EVENT_LID_CHANGE:
427 		case PVRDMA_EVENT_PKEY_CHANGE:
428 		case PVRDMA_EVENT_SM_CHANGE:
429 		case PVRDMA_EVENT_CLIENT_REREGISTER:
430 		case PVRDMA_EVENT_GID_CHANGE:
431 			pvrdma_dev_event(dev, eqe->info, eqe->type);
432 			break;
433 
434 		case PVRDMA_EVENT_DEVICE_FATAL:
435 			pvrdma_dev_event(dev, 1, eqe->type);
436 			break;
437 
438 		default:
439 			break;
440 		}
441 
442 		pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
443 	}
444 
445 	return IRQ_HANDLED;
446 }
447 
448 static inline struct pvrdma_cqne *get_cqne(struct pvrdma_dev *dev,
449 					   unsigned int i)
450 {
451 	return (struct pvrdma_cqne *)pvrdma_page_dir_get_ptr(
452 					&dev->cq_pdir,
453 					PAGE_SIZE +
454 					sizeof(struct pvrdma_cqne) * i);
455 }
456 
457 static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
458 {
459 	struct pvrdma_dev *dev = dev_id;
460 	struct pvrdma_ring *ring = &dev->cq_ring_state->rx;
461 	int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE /
462 			 sizeof(struct pvrdma_cqne);
463 	unsigned int head;
464 	unsigned long flags;
465 
466 	dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n");
467 
468 	while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) {
469 		struct pvrdma_cqne *cqne;
470 		struct pvrdma_cq *cq;
471 
472 		cqne = get_cqne(dev, head);
473 		spin_lock_irqsave(&dev->cq_tbl_lock, flags);
474 		cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq];
475 		if (cq)
476 			atomic_inc(&cq->refcnt);
477 		spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
478 
479 		if (cq && cq->ibcq.comp_handler)
480 			cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
481 		if (cq) {
482 			atomic_dec(&cq->refcnt);
483 			if (atomic_read(&cq->refcnt))
484 				wake_up(&cq->wait);
485 		}
486 		pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
487 	}
488 
489 	return IRQ_HANDLED;
490 }
491 
492 static void pvrdma_disable_msi_all(struct pvrdma_dev *dev)
493 {
494 	if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX)
495 		pci_disable_msix(dev->pdev);
496 	else if (dev->intr.type == PVRDMA_INTR_TYPE_MSI)
497 		pci_disable_msi(dev->pdev);
498 }
499 
500 static void pvrdma_free_irq(struct pvrdma_dev *dev)
501 {
502 	int i;
503 
504 	dev_dbg(&dev->pdev->dev, "freeing interrupts\n");
505 
506 	if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX) {
507 		for (i = 0; i < dev->intr.size; i++) {
508 			if (dev->intr.enabled[i]) {
509 				free_irq(dev->intr.msix_entry[i].vector, dev);
510 				dev->intr.enabled[i] = 0;
511 			}
512 		}
513 	} else if (dev->intr.type == PVRDMA_INTR_TYPE_INTX ||
514 		   dev->intr.type == PVRDMA_INTR_TYPE_MSI) {
515 		free_irq(dev->pdev->irq, dev);
516 	}
517 }
518 
519 static void pvrdma_enable_intrs(struct pvrdma_dev *dev)
520 {
521 	dev_dbg(&dev->pdev->dev, "enable interrupts\n");
522 	pvrdma_write_reg(dev, PVRDMA_REG_IMR, 0);
523 }
524 
525 static void pvrdma_disable_intrs(struct pvrdma_dev *dev)
526 {
527 	dev_dbg(&dev->pdev->dev, "disable interrupts\n");
528 	pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0);
529 }
530 
531 static int pvrdma_enable_msix(struct pci_dev *pdev, struct pvrdma_dev *dev)
532 {
533 	int i;
534 	int ret;
535 
536 	for (i = 0; i < PVRDMA_MAX_INTERRUPTS; i++) {
537 		dev->intr.msix_entry[i].entry = i;
538 		dev->intr.msix_entry[i].vector = i;
539 
540 		switch (i) {
541 		case 0:
542 			/* CMD ring handler */
543 			dev->intr.handler[i] = pvrdma_intr0_handler;
544 			break;
545 		case 1:
546 			/* Async event ring handler */
547 			dev->intr.handler[i] = pvrdma_intr1_handler;
548 			break;
549 		default:
550 			/* Completion queue handler */
551 			dev->intr.handler[i] = pvrdma_intrx_handler;
552 			break;
553 		}
554 	}
555 
556 	ret = pci_enable_msix(pdev, dev->intr.msix_entry,
557 			      PVRDMA_MAX_INTERRUPTS);
558 	if (!ret) {
559 		dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
560 		dev->intr.size = PVRDMA_MAX_INTERRUPTS;
561 	} else if (ret > 0) {
562 		ret = pci_enable_msix(pdev, dev->intr.msix_entry, ret);
563 		if (!ret) {
564 			dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
565 			dev->intr.size = ret;
566 		} else {
567 			dev->intr.size = 0;
568 		}
569 	}
570 
571 	dev_dbg(&pdev->dev, "using interrupt type %d, size %d\n",
572 		dev->intr.type, dev->intr.size);
573 
574 	return ret;
575 }
576 
577 static int pvrdma_alloc_intrs(struct pvrdma_dev *dev)
578 {
579 	int ret = 0;
580 	int i;
581 
582 	if (pci_find_capability(dev->pdev, PCI_CAP_ID_MSIX) &&
583 	    pvrdma_enable_msix(dev->pdev, dev)) {
584 		/* Try MSI */
585 		ret = pci_enable_msi(dev->pdev);
586 		if (!ret) {
587 			dev->intr.type = PVRDMA_INTR_TYPE_MSI;
588 		} else {
589 			/* Legacy INTR */
590 			dev->intr.type = PVRDMA_INTR_TYPE_INTX;
591 		}
592 	}
593 
594 	/* Request First IRQ */
595 	switch (dev->intr.type) {
596 	case PVRDMA_INTR_TYPE_INTX:
597 	case PVRDMA_INTR_TYPE_MSI:
598 		ret = request_irq(dev->pdev->irq, pvrdma_intr0_handler,
599 				  IRQF_SHARED, DRV_NAME, dev);
600 		if (ret) {
601 			dev_err(&dev->pdev->dev,
602 				"failed to request interrupt\n");
603 			goto disable_msi;
604 		}
605 		break;
606 	case PVRDMA_INTR_TYPE_MSIX:
607 		ret = request_irq(dev->intr.msix_entry[0].vector,
608 				  pvrdma_intr0_handler, 0, DRV_NAME, dev);
609 		if (ret) {
610 			dev_err(&dev->pdev->dev,
611 				"failed to request interrupt 0\n");
612 			goto disable_msi;
613 		}
614 		dev->intr.enabled[0] = 1;
615 		break;
616 	default:
617 		/* Not reached */
618 		break;
619 	}
620 
621 	/* For MSIX: request intr for each vector */
622 	if (dev->intr.size > 1) {
623 		ret = request_irq(dev->intr.msix_entry[1].vector,
624 				  pvrdma_intr1_handler, 0, DRV_NAME, dev);
625 		if (ret) {
626 			dev_err(&dev->pdev->dev,
627 				"failed to request interrupt 1\n");
628 			goto free_irq;
629 		}
630 		dev->intr.enabled[1] = 1;
631 
632 		for (i = 2; i < dev->intr.size; i++) {
633 			ret = request_irq(dev->intr.msix_entry[i].vector,
634 					  pvrdma_intrx_handler, 0,
635 					  DRV_NAME, dev);
636 			if (ret) {
637 				dev_err(&dev->pdev->dev,
638 					"failed to request interrupt %d\n", i);
639 				goto free_irq;
640 			}
641 			dev->intr.enabled[i] = 1;
642 		}
643 	}
644 
645 	return 0;
646 
647 free_irq:
648 	pvrdma_free_irq(dev);
649 disable_msi:
650 	pvrdma_disable_msi_all(dev);
651 	return ret;
652 }
653 
654 static void pvrdma_free_slots(struct pvrdma_dev *dev)
655 {
656 	struct pci_dev *pdev = dev->pdev;
657 
658 	if (dev->resp_slot)
659 		dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->resp_slot,
660 				  dev->dsr->resp_slot_dma);
661 	if (dev->cmd_slot)
662 		dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->cmd_slot,
663 				  dev->dsr->cmd_slot_dma);
664 }
665 
666 static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev,
667 				   const union ib_gid *gid,
668 				   int index)
669 {
670 	int ret;
671 	union pvrdma_cmd_req req;
672 	struct pvrdma_cmd_create_bind *cmd_bind = &req.create_bind;
673 
674 	if (!dev->sgid_tbl) {
675 		dev_warn(&dev->pdev->dev, "sgid table not initialized\n");
676 		return -EINVAL;
677 	}
678 
679 	memset(cmd_bind, 0, sizeof(*cmd_bind));
680 	cmd_bind->hdr.cmd = PVRDMA_CMD_CREATE_BIND;
681 	memcpy(cmd_bind->new_gid, gid->raw, 16);
682 	cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024);
683 	cmd_bind->vlan = 0xfff;
684 	cmd_bind->index = index;
685 	cmd_bind->gid_type = PVRDMA_GID_TYPE_FLAG_ROCE_V1;
686 
687 	ret = pvrdma_cmd_post(dev, &req, NULL, 0);
688 	if (ret < 0) {
689 		dev_warn(&dev->pdev->dev,
690 			 "could not create binding, error: %d\n", ret);
691 		return -EFAULT;
692 	}
693 	memcpy(&dev->sgid_tbl[index], gid, sizeof(*gid));
694 	return 0;
695 }
696 
697 static int pvrdma_add_gid(struct ib_device *ibdev,
698 			  u8 port_num,
699 			  unsigned int index,
700 			  const union ib_gid *gid,
701 			  const struct ib_gid_attr *attr,
702 			  void **context)
703 {
704 	struct pvrdma_dev *dev = to_vdev(ibdev);
705 
706 	return pvrdma_add_gid_at_index(dev, gid, index);
707 }
708 
709 static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index)
710 {
711 	int ret;
712 	union pvrdma_cmd_req req;
713 	struct pvrdma_cmd_destroy_bind *cmd_dest = &req.destroy_bind;
714 
715 	/* Update sgid table. */
716 	if (!dev->sgid_tbl) {
717 		dev_warn(&dev->pdev->dev, "sgid table not initialized\n");
718 		return -EINVAL;
719 	}
720 
721 	memset(cmd_dest, 0, sizeof(*cmd_dest));
722 	cmd_dest->hdr.cmd = PVRDMA_CMD_DESTROY_BIND;
723 	memcpy(cmd_dest->dest_gid, &dev->sgid_tbl[index], 16);
724 	cmd_dest->index = index;
725 
726 	ret = pvrdma_cmd_post(dev, &req, NULL, 0);
727 	if (ret < 0) {
728 		dev_warn(&dev->pdev->dev,
729 			 "could not destroy binding, error: %d\n", ret);
730 		return ret;
731 	}
732 	memset(&dev->sgid_tbl[index], 0, 16);
733 	return 0;
734 }
735 
736 static int pvrdma_del_gid(struct ib_device *ibdev,
737 			  u8 port_num,
738 			  unsigned int index,
739 			  void **context)
740 {
741 	struct pvrdma_dev *dev = to_vdev(ibdev);
742 
743 	dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s",
744 		index, dev->netdev->name);
745 
746 	return pvrdma_del_gid_at_index(dev, index);
747 }
748 
749 static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
750 					  unsigned long event)
751 {
752 	switch (event) {
753 	case NETDEV_REBOOT:
754 	case NETDEV_DOWN:
755 		pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
756 		break;
757 	case NETDEV_UP:
758 		pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
759 		break;
760 	default:
761 		dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
762 			event, dev->ib_dev.name);
763 		break;
764 	}
765 }
766 
767 static void pvrdma_netdevice_event_work(struct work_struct *work)
768 {
769 	struct pvrdma_netdevice_work *netdev_work;
770 	struct pvrdma_dev *dev;
771 
772 	netdev_work = container_of(work, struct pvrdma_netdevice_work, work);
773 
774 	mutex_lock(&pvrdma_device_list_lock);
775 	list_for_each_entry(dev, &pvrdma_device_list, device_link) {
776 		if (dev->netdev == netdev_work->event_netdev) {
777 			pvrdma_netdevice_event_handle(dev, netdev_work->event);
778 			break;
779 		}
780 	}
781 	mutex_unlock(&pvrdma_device_list_lock);
782 
783 	kfree(netdev_work);
784 }
785 
786 static int pvrdma_netdevice_event(struct notifier_block *this,
787 				  unsigned long event, void *ptr)
788 {
789 	struct net_device *event_netdev = netdev_notifier_info_to_dev(ptr);
790 	struct pvrdma_netdevice_work *netdev_work;
791 
792 	netdev_work = kmalloc(sizeof(*netdev_work), GFP_ATOMIC);
793 	if (!netdev_work)
794 		return NOTIFY_BAD;
795 
796 	INIT_WORK(&netdev_work->work, pvrdma_netdevice_event_work);
797 	netdev_work->event_netdev = event_netdev;
798 	netdev_work->event = event;
799 	queue_work(event_wq, &netdev_work->work);
800 
801 	return NOTIFY_DONE;
802 }
803 
804 static int pvrdma_pci_probe(struct pci_dev *pdev,
805 			    const struct pci_device_id *id)
806 {
807 	struct pci_dev *pdev_net;
808 	struct pvrdma_dev *dev;
809 	int ret;
810 	unsigned long start;
811 	unsigned long len;
812 	unsigned int version;
813 	dma_addr_t slot_dma = 0;
814 
815 	dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev));
816 
817 	/* Allocate zero-out device */
818 	dev = (struct pvrdma_dev *)ib_alloc_device(sizeof(*dev));
819 	if (!dev) {
820 		dev_err(&pdev->dev, "failed to allocate IB device\n");
821 		return -ENOMEM;
822 	}
823 
824 	mutex_lock(&pvrdma_device_list_lock);
825 	list_add(&dev->device_link, &pvrdma_device_list);
826 	mutex_unlock(&pvrdma_device_list_lock);
827 
828 	ret = pvrdma_init_device(dev);
829 	if (ret)
830 		goto err_free_device;
831 
832 	dev->pdev = pdev;
833 	pci_set_drvdata(pdev, dev);
834 
835 	ret = pci_enable_device(pdev);
836 	if (ret) {
837 		dev_err(&pdev->dev, "cannot enable PCI device\n");
838 		goto err_free_device;
839 	}
840 
841 	dev_dbg(&pdev->dev, "PCI resource flags BAR0 %#lx\n",
842 		pci_resource_flags(pdev, 0));
843 	dev_dbg(&pdev->dev, "PCI resource len %#llx\n",
844 		(unsigned long long)pci_resource_len(pdev, 0));
845 	dev_dbg(&pdev->dev, "PCI resource start %#llx\n",
846 		(unsigned long long)pci_resource_start(pdev, 0));
847 	dev_dbg(&pdev->dev, "PCI resource flags BAR1 %#lx\n",
848 		pci_resource_flags(pdev, 1));
849 	dev_dbg(&pdev->dev, "PCI resource len %#llx\n",
850 		(unsigned long long)pci_resource_len(pdev, 1));
851 	dev_dbg(&pdev->dev, "PCI resource start %#llx\n",
852 		(unsigned long long)pci_resource_start(pdev, 1));
853 
854 	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
855 	    !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
856 		dev_err(&pdev->dev, "PCI BAR region not MMIO\n");
857 		ret = -ENOMEM;
858 		goto err_free_device;
859 	}
860 
861 	ret = pci_request_regions(pdev, DRV_NAME);
862 	if (ret) {
863 		dev_err(&pdev->dev, "cannot request PCI resources\n");
864 		goto err_disable_pdev;
865 	}
866 
867 	/* Enable 64-Bit DMA */
868 	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
869 		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
870 		if (ret != 0) {
871 			dev_err(&pdev->dev,
872 				"pci_set_consistent_dma_mask failed\n");
873 			goto err_free_resource;
874 		}
875 	} else {
876 		ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
877 		if (ret != 0) {
878 			dev_err(&pdev->dev,
879 				"pci_set_dma_mask failed\n");
880 			goto err_free_resource;
881 		}
882 	}
883 
884 	pci_set_master(pdev);
885 
886 	/* Map register space */
887 	start = pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_REG);
888 	len = pci_resource_len(dev->pdev, PVRDMA_PCI_RESOURCE_REG);
889 	dev->regs = ioremap(start, len);
890 	if (!dev->regs) {
891 		dev_err(&pdev->dev, "register mapping failed\n");
892 		ret = -ENOMEM;
893 		goto err_free_resource;
894 	}
895 
896 	/* Setup per-device UAR. */
897 	dev->driver_uar.index = 0;
898 	dev->driver_uar.pfn =
899 		pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >>
900 		PAGE_SHIFT;
901 	dev->driver_uar.map =
902 		ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
903 	if (!dev->driver_uar.map) {
904 		dev_err(&pdev->dev, "failed to remap UAR pages\n");
905 		ret = -ENOMEM;
906 		goto err_unmap_regs;
907 	}
908 
909 	version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION);
910 	dev_info(&pdev->dev, "device version %d, driver version %d\n",
911 		 version, PVRDMA_VERSION);
912 	if (version < PVRDMA_VERSION) {
913 		dev_err(&pdev->dev, "incompatible device version\n");
914 		goto err_uar_unmap;
915 	}
916 
917 	dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr),
918 				      &dev->dsrbase, GFP_KERNEL);
919 	if (!dev->dsr) {
920 		dev_err(&pdev->dev, "failed to allocate shared region\n");
921 		ret = -ENOMEM;
922 		goto err_uar_unmap;
923 	}
924 
925 	/* Setup the shared region */
926 	memset(dev->dsr, 0, sizeof(*dev->dsr));
927 	dev->dsr->driver_version = PVRDMA_VERSION;
928 	dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ?
929 		PVRDMA_GOS_BITS_32 :
930 		PVRDMA_GOS_BITS_64;
931 	dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX;
932 	dev->dsr->gos_info.gos_ver = 1;
933 	dev->dsr->uar_pfn = dev->driver_uar.pfn;
934 
935 	/* Command slot. */
936 	dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
937 					   &slot_dma, GFP_KERNEL);
938 	if (!dev->cmd_slot) {
939 		ret = -ENOMEM;
940 		goto err_free_dsr;
941 	}
942 
943 	dev->dsr->cmd_slot_dma = (u64)slot_dma;
944 
945 	/* Response slot. */
946 	dev->resp_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
947 					    &slot_dma, GFP_KERNEL);
948 	if (!dev->resp_slot) {
949 		ret = -ENOMEM;
950 		goto err_free_slots;
951 	}
952 
953 	dev->dsr->resp_slot_dma = (u64)slot_dma;
954 
955 	/* Async event ring */
956 	dev->dsr->async_ring_pages.num_pages = 4;
957 	ret = pvrdma_page_dir_init(dev, &dev->async_pdir,
958 				   dev->dsr->async_ring_pages.num_pages, true);
959 	if (ret)
960 		goto err_free_slots;
961 	dev->async_ring_state = dev->async_pdir.pages[0];
962 	dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma;
963 
964 	/* CQ notification ring */
965 	dev->dsr->cq_ring_pages.num_pages = 4;
966 	ret = pvrdma_page_dir_init(dev, &dev->cq_pdir,
967 				   dev->dsr->cq_ring_pages.num_pages, true);
968 	if (ret)
969 		goto err_free_async_ring;
970 	dev->cq_ring_state = dev->cq_pdir.pages[0];
971 	dev->dsr->cq_ring_pages.pdir_dma = dev->cq_pdir.dir_dma;
972 
973 	/*
974 	 * Write the PA of the shared region to the device. The writes must be
975 	 * ordered such that the high bits are written last. When the writes
976 	 * complete, the device will have filled out the capabilities.
977 	 */
978 
979 	pvrdma_write_reg(dev, PVRDMA_REG_DSRLOW, (u32)dev->dsrbase);
980 	pvrdma_write_reg(dev, PVRDMA_REG_DSRHIGH,
981 			 (u32)((u64)(dev->dsrbase) >> 32));
982 
983 	/* Make sure the write is complete before reading status. */
984 	mb();
985 
986 	/* Currently, the driver only supports RoCE mode. */
987 	if (dev->dsr->caps.mode != PVRDMA_DEVICE_MODE_ROCE) {
988 		dev_err(&pdev->dev, "unsupported transport %d\n",
989 			dev->dsr->caps.mode);
990 		ret = -EFAULT;
991 		goto err_free_cq_ring;
992 	}
993 
994 	/* Currently, the driver only supports RoCE V1. */
995 	if (!(dev->dsr->caps.gid_types & PVRDMA_GID_TYPE_FLAG_ROCE_V1)) {
996 		dev_err(&pdev->dev, "driver needs RoCE v1 support\n");
997 		ret = -EFAULT;
998 		goto err_free_cq_ring;
999 	}
1000 
1001 	/* Paired vmxnet3 will have same bus, slot. But func will be 0 */
1002 	pdev_net = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
1003 	if (!pdev_net) {
1004 		dev_err(&pdev->dev, "failed to find paired net device\n");
1005 		ret = -ENODEV;
1006 		goto err_free_cq_ring;
1007 	}
1008 
1009 	if (pdev_net->vendor != PCI_VENDOR_ID_VMWARE ||
1010 	    pdev_net->device != PCI_DEVICE_ID_VMWARE_VMXNET3) {
1011 		dev_err(&pdev->dev, "failed to find paired vmxnet3 device\n");
1012 		pci_dev_put(pdev_net);
1013 		ret = -ENODEV;
1014 		goto err_free_cq_ring;
1015 	}
1016 
1017 	dev->netdev = pci_get_drvdata(pdev_net);
1018 	pci_dev_put(pdev_net);
1019 	if (!dev->netdev) {
1020 		dev_err(&pdev->dev, "failed to get vmxnet3 device\n");
1021 		ret = -ENODEV;
1022 		goto err_free_cq_ring;
1023 	}
1024 
1025 	dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name);
1026 
1027 	/* Interrupt setup */
1028 	ret = pvrdma_alloc_intrs(dev);
1029 	if (ret) {
1030 		dev_err(&pdev->dev, "failed to allocate interrupts\n");
1031 		ret = -ENOMEM;
1032 		goto err_netdevice;
1033 	}
1034 
1035 	/* Allocate UAR table. */
1036 	ret = pvrdma_uar_table_init(dev);
1037 	if (ret) {
1038 		dev_err(&pdev->dev, "failed to allocate UAR table\n");
1039 		ret = -ENOMEM;
1040 		goto err_free_intrs;
1041 	}
1042 
1043 	/* Allocate GID table */
1044 	dev->sgid_tbl = kcalloc(dev->dsr->caps.gid_tbl_len,
1045 				sizeof(union ib_gid), GFP_KERNEL);
1046 	if (!dev->sgid_tbl) {
1047 		ret = -ENOMEM;
1048 		goto err_free_uar_table;
1049 	}
1050 	dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len);
1051 
1052 	pvrdma_enable_intrs(dev);
1053 
1054 	/* Activate pvrdma device */
1055 	pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE);
1056 
1057 	/* Make sure the write is complete before reading status. */
1058 	mb();
1059 
1060 	/* Check if device was successfully activated */
1061 	ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR);
1062 	if (ret != 0) {
1063 		dev_err(&pdev->dev, "failed to activate device\n");
1064 		ret = -EFAULT;
1065 		goto err_disable_intr;
1066 	}
1067 
1068 	/* Register IB device */
1069 	ret = pvrdma_register_device(dev);
1070 	if (ret) {
1071 		dev_err(&pdev->dev, "failed to register IB device\n");
1072 		goto err_disable_intr;
1073 	}
1074 
1075 	dev->nb_netdev.notifier_call = pvrdma_netdevice_event;
1076 	ret = register_netdevice_notifier(&dev->nb_netdev);
1077 	if (ret) {
1078 		dev_err(&pdev->dev, "failed to register netdevice events\n");
1079 		goto err_unreg_ibdev;
1080 	}
1081 
1082 	dev_info(&pdev->dev, "attached to device\n");
1083 	return 0;
1084 
1085 err_unreg_ibdev:
1086 	ib_unregister_device(&dev->ib_dev);
1087 err_disable_intr:
1088 	pvrdma_disable_intrs(dev);
1089 	kfree(dev->sgid_tbl);
1090 err_free_uar_table:
1091 	pvrdma_uar_table_cleanup(dev);
1092 err_free_intrs:
1093 	pvrdma_free_irq(dev);
1094 	pvrdma_disable_msi_all(dev);
1095 err_netdevice:
1096 	unregister_netdevice_notifier(&dev->nb_netdev);
1097 err_free_cq_ring:
1098 	pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
1099 err_free_async_ring:
1100 	pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
1101 err_free_slots:
1102 	pvrdma_free_slots(dev);
1103 err_free_dsr:
1104 	dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr,
1105 			  dev->dsrbase);
1106 err_uar_unmap:
1107 	iounmap(dev->driver_uar.map);
1108 err_unmap_regs:
1109 	iounmap(dev->regs);
1110 err_free_resource:
1111 	pci_release_regions(pdev);
1112 err_disable_pdev:
1113 	pci_disable_device(pdev);
1114 	pci_set_drvdata(pdev, NULL);
1115 err_free_device:
1116 	mutex_lock(&pvrdma_device_list_lock);
1117 	list_del(&dev->device_link);
1118 	mutex_unlock(&pvrdma_device_list_lock);
1119 	ib_dealloc_device(&dev->ib_dev);
1120 	return ret;
1121 }
1122 
1123 static void pvrdma_pci_remove(struct pci_dev *pdev)
1124 {
1125 	struct pvrdma_dev *dev = pci_get_drvdata(pdev);
1126 
1127 	if (!dev)
1128 		return;
1129 
1130 	dev_info(&pdev->dev, "detaching from device\n");
1131 
1132 	unregister_netdevice_notifier(&dev->nb_netdev);
1133 	dev->nb_netdev.notifier_call = NULL;
1134 
1135 	flush_workqueue(event_wq);
1136 
1137 	/* Unregister ib device */
1138 	ib_unregister_device(&dev->ib_dev);
1139 
1140 	mutex_lock(&pvrdma_device_list_lock);
1141 	list_del(&dev->device_link);
1142 	mutex_unlock(&pvrdma_device_list_lock);
1143 
1144 	pvrdma_disable_intrs(dev);
1145 	pvrdma_free_irq(dev);
1146 	pvrdma_disable_msi_all(dev);
1147 
1148 	/* Deactivate pvrdma device */
1149 	pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET);
1150 	pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
1151 	pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
1152 	pvrdma_free_slots(dev);
1153 
1154 	iounmap(dev->regs);
1155 	kfree(dev->sgid_tbl);
1156 	kfree(dev->cq_tbl);
1157 	kfree(dev->qp_tbl);
1158 	pvrdma_uar_table_cleanup(dev);
1159 	iounmap(dev->driver_uar.map);
1160 
1161 	ib_dealloc_device(&dev->ib_dev);
1162 
1163 	/* Free pci resources */
1164 	pci_release_regions(pdev);
1165 	pci_disable_device(pdev);
1166 	pci_set_drvdata(pdev, NULL);
1167 }
1168 
1169 static struct pci_device_id pvrdma_pci_table[] = {
1170 	{ PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), },
1171 	{ 0 },
1172 };
1173 
1174 MODULE_DEVICE_TABLE(pci, pvrdma_pci_table);
1175 
1176 static struct pci_driver pvrdma_driver = {
1177 	.name		= DRV_NAME,
1178 	.id_table	= pvrdma_pci_table,
1179 	.probe		= pvrdma_pci_probe,
1180 	.remove		= pvrdma_pci_remove,
1181 };
1182 
1183 static int __init pvrdma_init(void)
1184 {
1185 	int err;
1186 
1187 	event_wq = alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM);
1188 	if (!event_wq)
1189 		return -ENOMEM;
1190 
1191 	err = pci_register_driver(&pvrdma_driver);
1192 	if (err)
1193 		destroy_workqueue(event_wq);
1194 
1195 	return err;
1196 }
1197 
1198 static void __exit pvrdma_cleanup(void)
1199 {
1200 	pci_unregister_driver(&pvrdma_driver);
1201 
1202 	destroy_workqueue(event_wq);
1203 }
1204 
1205 module_init(pvrdma_init);
1206 module_exit(pvrdma_cleanup);
1207 
1208 MODULE_AUTHOR("VMware, Inc");
1209 MODULE_DESCRIPTION("VMware Paravirtual RDMA driver");
1210 MODULE_VERSION(DRV_VERSION);
1211 MODULE_LICENSE("Dual BSD/GPL");
1212