1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VMware VMCI Driver
4  *
5  * Copyright (C) 2012 VMware, Inc. All rights reserved.
6  */
7 
8 #include <linux/vmw_vmci_defs.h>
9 #include <linux/vmw_vmci_api.h>
10 #include <linux/moduleparam.h>
11 #include <linux/interrupt.h>
12 #include <linux/highmem.h>
13 #include <linux/kernel.h>
14 #include <linux/mm.h>
15 #include <linux/module.h>
16 #include <linux/sched.h>
17 #include <linux/slab.h>
18 #include <linux/init.h>
19 #include <linux/pci.h>
20 #include <linux/smp.h>
21 #include <linux/io.h>
22 #include <linux/vmalloc.h>
23 
24 #include "vmci_datagram.h"
25 #include "vmci_doorbell.h"
26 #include "vmci_context.h"
27 #include "vmci_driver.h"
28 #include "vmci_event.h"
29 
30 #define PCI_DEVICE_ID_VMWARE_VMCI	0x0740
31 
32 #define VMCI_UTIL_NUM_RESOURCES 1
33 
34 static bool vmci_disable_msi;
35 module_param_named(disable_msi, vmci_disable_msi, bool, 0);
36 MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
37 
38 static bool vmci_disable_msix;
39 module_param_named(disable_msix, vmci_disable_msix, bool, 0);
40 MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
41 
42 static u32 ctx_update_sub_id = VMCI_INVALID_ID;
43 static u32 vm_context_id = VMCI_INVALID_ID;
44 
45 struct vmci_guest_device {
46 	struct device *dev;	/* PCI device we are attached to */
47 	void __iomem *iobase;
48 	void __iomem *mmio_base;
49 
50 	bool exclusive_vectors;
51 
52 	struct tasklet_struct datagram_tasklet;
53 	struct tasklet_struct bm_tasklet;
54 
55 	void *data_buffer;
56 	void *notification_bitmap;
57 	dma_addr_t notification_base;
58 };
59 
60 static bool use_ppn64;
61 
62 bool vmci_use_ppn64(void)
63 {
64 	return use_ppn64;
65 }
66 
67 /* vmci_dev singleton device and supporting data*/
68 struct pci_dev *vmci_pdev;
69 static struct vmci_guest_device *vmci_dev_g;
70 static DEFINE_SPINLOCK(vmci_dev_spinlock);
71 
72 static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0);
73 
74 bool vmci_guest_code_active(void)
75 {
76 	return atomic_read(&vmci_num_guest_devices) != 0;
77 }
78 
79 u32 vmci_get_vm_context_id(void)
80 {
81 	if (vm_context_id == VMCI_INVALID_ID) {
82 		struct vmci_datagram get_cid_msg;
83 		get_cid_msg.dst =
84 		    vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
85 				     VMCI_GET_CONTEXT_ID);
86 		get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
87 		get_cid_msg.payload_size = 0;
88 		vm_context_id = vmci_send_datagram(&get_cid_msg);
89 	}
90 	return vm_context_id;
91 }
92 
93 static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
94 {
95 	if (dev->mmio_base != NULL)
96 		return readl(dev->mmio_base + reg);
97 	return ioread32(dev->iobase + reg);
98 }
99 
100 static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
101 {
102 	if (dev->mmio_base != NULL)
103 		writel(val, dev->mmio_base + reg);
104 	else
105 		iowrite32(val, dev->iobase + reg);
106 }
107 
108 /*
109  * VM to hypervisor call mechanism. We use the standard VMware naming
110  * convention since shared code is calling this function as well.
111  */
112 int vmci_send_datagram(struct vmci_datagram *dg)
113 {
114 	unsigned long flags;
115 	int result;
116 
117 	/* Check args. */
118 	if (dg == NULL)
119 		return VMCI_ERROR_INVALID_ARGS;
120 
121 	/*
122 	 * Need to acquire spinlock on the device because the datagram
123 	 * data may be spread over multiple pages and the monitor may
124 	 * interleave device user rpc calls from multiple
125 	 * VCPUs. Acquiring the spinlock precludes that
126 	 * possibility. Disabling interrupts to avoid incoming
127 	 * datagrams during a "rep out" and possibly landing up in
128 	 * this function.
129 	 */
130 	spin_lock_irqsave(&vmci_dev_spinlock, flags);
131 
132 	if (vmci_dev_g) {
133 		iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR,
134 			     dg, VMCI_DG_SIZE(dg));
135 		result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
136 	} else {
137 		result = VMCI_ERROR_UNAVAILABLE;
138 	}
139 
140 	spin_unlock_irqrestore(&vmci_dev_spinlock, flags);
141 
142 	return result;
143 }
144 EXPORT_SYMBOL_GPL(vmci_send_datagram);
145 
146 /*
147  * Gets called with the new context id if updated or resumed.
148  * Context id.
149  */
150 static void vmci_guest_cid_update(u32 sub_id,
151 				  const struct vmci_event_data *event_data,
152 				  void *client_data)
153 {
154 	const struct vmci_event_payld_ctx *ev_payload =
155 				vmci_event_data_const_payload(event_data);
156 
157 	if (sub_id != ctx_update_sub_id) {
158 		pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id);
159 		return;
160 	}
161 
162 	if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) {
163 		pr_devel("Invalid event data\n");
164 		return;
165 	}
166 
167 	pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n",
168 		 vm_context_id, ev_payload->context_id, event_data->event);
169 
170 	vm_context_id = ev_payload->context_id;
171 }
172 
173 /*
174  * Verify that the host supports the hypercalls we need. If it does not,
175  * try to find fallback hypercalls and use those instead.  Returns
176  * true if required hypercalls (or fallback hypercalls) are
177  * supported by the host, false otherwise.
178  */
179 static int vmci_check_host_caps(struct pci_dev *pdev)
180 {
181 	bool result;
182 	struct vmci_resource_query_msg *msg;
183 	u32 msg_size = sizeof(struct vmci_resource_query_hdr) +
184 				VMCI_UTIL_NUM_RESOURCES * sizeof(u32);
185 	struct vmci_datagram *check_msg;
186 
187 	check_msg = kzalloc(msg_size, GFP_KERNEL);
188 	if (!check_msg) {
189 		dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__);
190 		return -ENOMEM;
191 	}
192 
193 	check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
194 					  VMCI_RESOURCES_QUERY);
195 	check_msg->src = VMCI_ANON_SRC_HANDLE;
196 	check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
197 	msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg);
198 
199 	msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
200 	msg->resources[0] = VMCI_GET_CONTEXT_ID;
201 
202 	/* Checks that hyper calls are supported */
203 	result = vmci_send_datagram(check_msg) == 0x01;
204 	kfree(check_msg);
205 
206 	dev_dbg(&pdev->dev, "%s: Host capability check: %s\n",
207 		__func__, result ? "PASSED" : "FAILED");
208 
209 	/* We need the vector. There are no fallbacks. */
210 	return result ? 0 : -ENXIO;
211 }
212 
213 /*
214  * Reads datagrams from the data in port and dispatches them. We
215  * always start reading datagrams into only the first page of the
216  * datagram buffer. If the datagrams don't fit into one page, we
217  * use the maximum datagram buffer size for the remainder of the
218  * invocation. This is a simple heuristic for not penalizing
219  * small datagrams.
220  *
221  * This function assumes that it has exclusive access to the data
222  * in port for the duration of the call.
223  */
224 static void vmci_dispatch_dgs(unsigned long data)
225 {
226 	struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data;
227 	u8 *dg_in_buffer = vmci_dev->data_buffer;
228 	struct vmci_datagram *dg;
229 	size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
230 	size_t current_dg_in_buffer_size = PAGE_SIZE;
231 	size_t remaining_bytes;
232 
233 	BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
234 
235 	ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
236 		    vmci_dev->data_buffer, current_dg_in_buffer_size);
237 	dg = (struct vmci_datagram *)dg_in_buffer;
238 	remaining_bytes = current_dg_in_buffer_size;
239 
240 	while (dg->dst.resource != VMCI_INVALID_ID ||
241 	       remaining_bytes > PAGE_SIZE) {
242 		unsigned dg_in_size;
243 
244 		/*
245 		 * When the input buffer spans multiple pages, a datagram can
246 		 * start on any page boundary in the buffer.
247 		 */
248 		if (dg->dst.resource == VMCI_INVALID_ID) {
249 			dg = (struct vmci_datagram *)roundup(
250 				(uintptr_t)dg + 1, PAGE_SIZE);
251 			remaining_bytes =
252 				(size_t)(dg_in_buffer +
253 					 current_dg_in_buffer_size -
254 					 (u8 *)dg);
255 			continue;
256 		}
257 
258 		dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
259 
260 		if (dg_in_size <= dg_in_buffer_size) {
261 			int result;
262 
263 			/*
264 			 * If the remaining bytes in the datagram
265 			 * buffer doesn't contain the complete
266 			 * datagram, we first make sure we have enough
267 			 * room for it and then we read the reminder
268 			 * of the datagram and possibly any following
269 			 * datagrams.
270 			 */
271 			if (dg_in_size > remaining_bytes) {
272 				if (remaining_bytes !=
273 				    current_dg_in_buffer_size) {
274 
275 					/*
276 					 * We move the partial
277 					 * datagram to the front and
278 					 * read the reminder of the
279 					 * datagram and possibly
280 					 * following calls into the
281 					 * following bytes.
282 					 */
283 					memmove(dg_in_buffer, dg_in_buffer +
284 						current_dg_in_buffer_size -
285 						remaining_bytes,
286 						remaining_bytes);
287 					dg = (struct vmci_datagram *)
288 					    dg_in_buffer;
289 				}
290 
291 				if (current_dg_in_buffer_size !=
292 				    dg_in_buffer_size)
293 					current_dg_in_buffer_size =
294 					    dg_in_buffer_size;
295 
296 				ioread8_rep(vmci_dev->iobase +
297 						VMCI_DATA_IN_ADDR,
298 					vmci_dev->data_buffer +
299 						remaining_bytes,
300 					current_dg_in_buffer_size -
301 						remaining_bytes);
302 			}
303 
304 			/*
305 			 * We special case event datagrams from the
306 			 * hypervisor.
307 			 */
308 			if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
309 			    dg->dst.resource == VMCI_EVENT_HANDLER) {
310 				result = vmci_event_dispatch(dg);
311 			} else {
312 				result = vmci_datagram_invoke_guest_handler(dg);
313 			}
314 			if (result < VMCI_SUCCESS)
315 				dev_dbg(vmci_dev->dev,
316 					"Datagram with resource (ID=0x%x) failed (err=%d)\n",
317 					 dg->dst.resource, result);
318 
319 			/* On to the next datagram. */
320 			dg = (struct vmci_datagram *)((u8 *)dg +
321 						      dg_in_size);
322 		} else {
323 			size_t bytes_to_skip;
324 
325 			/*
326 			 * Datagram doesn't fit in datagram buffer of maximal
327 			 * size. We drop it.
328 			 */
329 			dev_dbg(vmci_dev->dev,
330 				"Failed to receive datagram (size=%u bytes)\n",
331 				 dg_in_size);
332 
333 			bytes_to_skip = dg_in_size - remaining_bytes;
334 			if (current_dg_in_buffer_size != dg_in_buffer_size)
335 				current_dg_in_buffer_size = dg_in_buffer_size;
336 
337 			for (;;) {
338 				ioread8_rep(vmci_dev->iobase +
339 						VMCI_DATA_IN_ADDR,
340 					vmci_dev->data_buffer,
341 					current_dg_in_buffer_size);
342 				if (bytes_to_skip <= current_dg_in_buffer_size)
343 					break;
344 
345 				bytes_to_skip -= current_dg_in_buffer_size;
346 			}
347 			dg = (struct vmci_datagram *)(dg_in_buffer +
348 						      bytes_to_skip);
349 		}
350 
351 		remaining_bytes =
352 		    (size_t) (dg_in_buffer + current_dg_in_buffer_size -
353 			      (u8 *)dg);
354 
355 		if (remaining_bytes < VMCI_DG_HEADERSIZE) {
356 			/* Get the next batch of datagrams. */
357 
358 			ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
359 				    vmci_dev->data_buffer,
360 				    current_dg_in_buffer_size);
361 			dg = (struct vmci_datagram *)dg_in_buffer;
362 			remaining_bytes = current_dg_in_buffer_size;
363 		}
364 	}
365 }
366 
367 /*
368  * Scans the notification bitmap for raised flags, clears them
369  * and handles the notifications.
370  */
371 static void vmci_process_bitmap(unsigned long data)
372 {
373 	struct vmci_guest_device *dev = (struct vmci_guest_device *)data;
374 
375 	if (!dev->notification_bitmap) {
376 		dev_dbg(dev->dev, "No bitmap present in %s\n", __func__);
377 		return;
378 	}
379 
380 	vmci_dbell_scan_notification_entries(dev->notification_bitmap);
381 }
382 
383 /*
384  * Interrupt handler for legacy or MSI interrupt, or for first MSI-X
385  * interrupt (vector VMCI_INTR_DATAGRAM).
386  */
387 static irqreturn_t vmci_interrupt(int irq, void *_dev)
388 {
389 	struct vmci_guest_device *dev = _dev;
390 
391 	/*
392 	 * If we are using MSI-X with exclusive vectors then we simply schedule
393 	 * the datagram tasklet, since we know the interrupt was meant for us.
394 	 * Otherwise we must read the ICR to determine what to do.
395 	 */
396 
397 	if (dev->exclusive_vectors) {
398 		tasklet_schedule(&dev->datagram_tasklet);
399 	} else {
400 		unsigned int icr;
401 
402 		/* Acknowledge interrupt and determine what needs doing. */
403 		icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
404 		if (icr == 0 || icr == ~0)
405 			return IRQ_NONE;
406 
407 		if (icr & VMCI_ICR_DATAGRAM) {
408 			tasklet_schedule(&dev->datagram_tasklet);
409 			icr &= ~VMCI_ICR_DATAGRAM;
410 		}
411 
412 		if (icr & VMCI_ICR_NOTIFICATION) {
413 			tasklet_schedule(&dev->bm_tasklet);
414 			icr &= ~VMCI_ICR_NOTIFICATION;
415 		}
416 
417 		if (icr != 0)
418 			dev_warn(dev->dev,
419 				 "Ignoring unknown interrupt cause (%d)\n",
420 				 icr);
421 	}
422 
423 	return IRQ_HANDLED;
424 }
425 
426 /*
427  * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
428  * which is for the notification bitmap.  Will only get called if we are
429  * using MSI-X with exclusive vectors.
430  */
431 static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
432 {
433 	struct vmci_guest_device *dev = _dev;
434 
435 	/* For MSI-X we can just assume it was meant for us. */
436 	tasklet_schedule(&dev->bm_tasklet);
437 
438 	return IRQ_HANDLED;
439 }
440 
441 /*
442  * Most of the initialization at module load time is done here.
443  */
444 static int vmci_guest_probe_device(struct pci_dev *pdev,
445 				   const struct pci_device_id *id)
446 {
447 	struct vmci_guest_device *vmci_dev;
448 	void __iomem *iobase = NULL;
449 	void __iomem *mmio_base = NULL;
450 	unsigned int capabilities;
451 	unsigned int caps_in_use;
452 	unsigned long cmd;
453 	int vmci_err;
454 	int error;
455 
456 	dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n");
457 
458 	error = pcim_enable_device(pdev);
459 	if (error) {
460 		dev_err(&pdev->dev,
461 			"Failed to enable VMCI device: %d\n", error);
462 		return error;
463 	}
464 
465 	/*
466 	 * The VMCI device with mmio access to registers requests 256KB
467 	 * for BAR1. If present, driver will use new VMCI device
468 	 * functionality for register access and datagram send/recv.
469 	 */
470 
471 	if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
472 		dev_info(&pdev->dev, "MMIO register access is available\n");
473 		mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET,
474 					    VMCI_MMIO_ACCESS_SIZE);
475 		/* If the map fails, we fall back to IOIO access. */
476 		if (!mmio_base)
477 			dev_warn(&pdev->dev, "Failed to map MMIO register access\n");
478 	}
479 
480 	if (!mmio_base) {
481 		error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
482 		if (error) {
483 			dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
484 			return error;
485 		}
486 		iobase = pcim_iomap_table(pdev)[0];
487 	}
488 
489 	vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL);
490 	if (!vmci_dev) {
491 		dev_err(&pdev->dev,
492 			"Can't allocate memory for VMCI device\n");
493 		return -ENOMEM;
494 	}
495 
496 	vmci_dev->dev = &pdev->dev;
497 	vmci_dev->exclusive_vectors = false;
498 	vmci_dev->iobase = iobase;
499 	vmci_dev->mmio_base = mmio_base;
500 
501 	tasklet_init(&vmci_dev->datagram_tasklet,
502 		     vmci_dispatch_dgs, (unsigned long)vmci_dev);
503 	tasklet_init(&vmci_dev->bm_tasklet,
504 		     vmci_process_bitmap, (unsigned long)vmci_dev);
505 
506 	vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
507 	if (!vmci_dev->data_buffer) {
508 		dev_err(&pdev->dev,
509 			"Can't allocate memory for datagram buffer\n");
510 		return -ENOMEM;
511 	}
512 
513 	pci_set_master(pdev);	/* To enable queue_pair functionality. */
514 
515 	/*
516 	 * Verify that the VMCI Device supports the capabilities that
517 	 * we need. If the device is missing capabilities that we would
518 	 * like to use, check for fallback capabilities and use those
519 	 * instead (so we can run a new VM on old hosts). Fail the load if
520 	 * a required capability is missing and there is no fallback.
521 	 *
522 	 * Right now, we need datagrams. There are no fallbacks.
523 	 */
524 	capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR);
525 	if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
526 		dev_err(&pdev->dev, "Device does not support datagrams\n");
527 		error = -ENXIO;
528 		goto err_free_data_buffer;
529 	}
530 	caps_in_use = VMCI_CAPS_DATAGRAM;
531 
532 	/*
533 	 * Use 64-bit PPNs if the device supports.
534 	 *
535 	 * There is no check for the return value of dma_set_mask_and_coherent
536 	 * since this driver can handle the default mask values if
537 	 * dma_set_mask_and_coherent fails.
538 	 */
539 	if (capabilities & VMCI_CAPS_PPN64) {
540 		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
541 		use_ppn64 = true;
542 		caps_in_use |= VMCI_CAPS_PPN64;
543 	} else {
544 		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
545 		use_ppn64 = false;
546 	}
547 
548 	/*
549 	 * If the hardware supports notifications, we will use that as
550 	 * well.
551 	 */
552 	if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
553 		vmci_dev->notification_bitmap = dma_alloc_coherent(
554 			&pdev->dev, PAGE_SIZE, &vmci_dev->notification_base,
555 			GFP_KERNEL);
556 		if (!vmci_dev->notification_bitmap) {
557 			dev_warn(&pdev->dev,
558 				 "Unable to allocate notification bitmap\n");
559 		} else {
560 			memset(vmci_dev->notification_bitmap, 0, PAGE_SIZE);
561 			caps_in_use |= VMCI_CAPS_NOTIFICATIONS;
562 		}
563 	}
564 
565 	if (mmio_base != NULL) {
566 		if (capabilities & VMCI_CAPS_DMA_DATAGRAM) {
567 			caps_in_use |= VMCI_CAPS_DMA_DATAGRAM;
568 		} else {
569 			dev_err(&pdev->dev,
570 				"Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
571 			error = -ENXIO;
572 			goto err_free_data_buffer;
573 		}
574 	}
575 
576 	dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use);
577 
578 	/* Let the host know which capabilities we intend to use. */
579 	vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
580 
581 	/* Set up global device so that we can start sending datagrams */
582 	spin_lock_irq(&vmci_dev_spinlock);
583 	vmci_dev_g = vmci_dev;
584 	vmci_pdev = pdev;
585 	spin_unlock_irq(&vmci_dev_spinlock);
586 
587 	/*
588 	 * Register notification bitmap with device if that capability is
589 	 * used.
590 	 */
591 	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) {
592 		unsigned long bitmap_ppn =
593 			vmci_dev->notification_base >> PAGE_SHIFT;
594 		if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
595 			dev_warn(&pdev->dev,
596 				 "VMCI device unable to register notification bitmap with PPN 0x%lx\n",
597 				 bitmap_ppn);
598 			error = -ENXIO;
599 			goto err_remove_vmci_dev_g;
600 		}
601 	}
602 
603 	/* Check host capabilities. */
604 	error = vmci_check_host_caps(pdev);
605 	if (error)
606 		goto err_remove_bitmap;
607 
608 	/* Enable device. */
609 
610 	/*
611 	 * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can
612 	 * update the internal context id when needed.
613 	 */
614 	vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE,
615 					vmci_guest_cid_update, NULL,
616 					&ctx_update_sub_id);
617 	if (vmci_err < VMCI_SUCCESS)
618 		dev_warn(&pdev->dev,
619 			 "Failed to subscribe to event (type=%d): %d\n",
620 			 VMCI_EVENT_CTX_ID_UPDATE, vmci_err);
621 
622 	/*
623 	 * Enable interrupts.  Try MSI-X first, then MSI, and then fallback on
624 	 * legacy interrupts.
625 	 */
626 	error = pci_alloc_irq_vectors(pdev, VMCI_MAX_INTRS, VMCI_MAX_INTRS,
627 			PCI_IRQ_MSIX);
628 	if (error < 0) {
629 		error = pci_alloc_irq_vectors(pdev, 1, 1,
630 				PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
631 		if (error < 0)
632 			goto err_remove_bitmap;
633 	} else {
634 		vmci_dev->exclusive_vectors = true;
635 	}
636 
637 	/*
638 	 * Request IRQ for legacy or MSI interrupts, or for first
639 	 * MSI-X vector.
640 	 */
641 	error = request_irq(pci_irq_vector(pdev, 0), vmci_interrupt,
642 			    IRQF_SHARED, KBUILD_MODNAME, vmci_dev);
643 	if (error) {
644 		dev_err(&pdev->dev, "Irq %u in use: %d\n",
645 			pci_irq_vector(pdev, 0), error);
646 		goto err_disable_msi;
647 	}
648 
649 	/*
650 	 * For MSI-X with exclusive vectors we need to request an
651 	 * interrupt for each vector so that we get a separate
652 	 * interrupt handler routine.  This allows us to distinguish
653 	 * between the vectors.
654 	 */
655 	if (vmci_dev->exclusive_vectors) {
656 		error = request_irq(pci_irq_vector(pdev, 1),
657 				    vmci_interrupt_bm, 0, KBUILD_MODNAME,
658 				    vmci_dev);
659 		if (error) {
660 			dev_err(&pdev->dev,
661 				"Failed to allocate irq %u: %d\n",
662 				pci_irq_vector(pdev, 1), error);
663 			goto err_free_irq;
664 		}
665 	}
666 
667 	dev_dbg(&pdev->dev, "Registered device\n");
668 
669 	atomic_inc(&vmci_num_guest_devices);
670 
671 	/* Enable specific interrupt bits. */
672 	cmd = VMCI_IMR_DATAGRAM;
673 	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
674 		cmd |= VMCI_IMR_NOTIFICATION;
675 	vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR);
676 
677 	/* Enable interrupts. */
678 	vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR);
679 
680 	pci_set_drvdata(pdev, vmci_dev);
681 
682 	vmci_call_vsock_callback(false);
683 	return 0;
684 
685 err_free_irq:
686 	free_irq(pci_irq_vector(pdev, 0), vmci_dev);
687 	tasklet_kill(&vmci_dev->datagram_tasklet);
688 	tasklet_kill(&vmci_dev->bm_tasklet);
689 
690 err_disable_msi:
691 	pci_free_irq_vectors(pdev);
692 
693 	vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
694 	if (vmci_err < VMCI_SUCCESS)
695 		dev_warn(&pdev->dev,
696 			 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
697 			 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
698 
699 err_remove_bitmap:
700 	if (vmci_dev->notification_bitmap) {
701 		vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
702 		dma_free_coherent(&pdev->dev, PAGE_SIZE,
703 				  vmci_dev->notification_bitmap,
704 				  vmci_dev->notification_base);
705 	}
706 
707 err_remove_vmci_dev_g:
708 	spin_lock_irq(&vmci_dev_spinlock);
709 	vmci_pdev = NULL;
710 	vmci_dev_g = NULL;
711 	spin_unlock_irq(&vmci_dev_spinlock);
712 
713 err_free_data_buffer:
714 	vfree(vmci_dev->data_buffer);
715 
716 	/* The rest are managed resources and will be freed by PCI core */
717 	return error;
718 }
719 
720 static void vmci_guest_remove_device(struct pci_dev *pdev)
721 {
722 	struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev);
723 	int vmci_err;
724 
725 	dev_dbg(&pdev->dev, "Removing device\n");
726 
727 	atomic_dec(&vmci_num_guest_devices);
728 
729 	vmci_qp_guest_endpoints_exit();
730 
731 	vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
732 	if (vmci_err < VMCI_SUCCESS)
733 		dev_warn(&pdev->dev,
734 			 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
735 			 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
736 
737 	spin_lock_irq(&vmci_dev_spinlock);
738 	vmci_dev_g = NULL;
739 	vmci_pdev = NULL;
740 	spin_unlock_irq(&vmci_dev_spinlock);
741 
742 	dev_dbg(&pdev->dev, "Resetting vmci device\n");
743 	vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
744 
745 	/*
746 	 * Free IRQ and then disable MSI/MSI-X as appropriate.  For
747 	 * MSI-X, we might have multiple vectors, each with their own
748 	 * IRQ, which we must free too.
749 	 */
750 	if (vmci_dev->exclusive_vectors)
751 		free_irq(pci_irq_vector(pdev, 1), vmci_dev);
752 	free_irq(pci_irq_vector(pdev, 0), vmci_dev);
753 	pci_free_irq_vectors(pdev);
754 
755 	tasklet_kill(&vmci_dev->datagram_tasklet);
756 	tasklet_kill(&vmci_dev->bm_tasklet);
757 
758 	if (vmci_dev->notification_bitmap) {
759 		/*
760 		 * The device reset above cleared the bitmap state of the
761 		 * device, so we can safely free it here.
762 		 */
763 
764 		dma_free_coherent(&pdev->dev, PAGE_SIZE,
765 				  vmci_dev->notification_bitmap,
766 				  vmci_dev->notification_base);
767 	}
768 
769 	vfree(vmci_dev->data_buffer);
770 
771 	/* The rest are managed resources and will be freed by PCI core */
772 }
773 
774 static const struct pci_device_id vmci_ids[] = {
775 	{ PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), },
776 	{ 0 },
777 };
778 MODULE_DEVICE_TABLE(pci, vmci_ids);
779 
780 static struct pci_driver vmci_guest_driver = {
781 	.name		= KBUILD_MODNAME,
782 	.id_table	= vmci_ids,
783 	.probe		= vmci_guest_probe_device,
784 	.remove		= vmci_guest_remove_device,
785 };
786 
787 int __init vmci_guest_init(void)
788 {
789 	return pci_register_driver(&vmci_guest_driver);
790 }
791 
792 void __exit vmci_guest_exit(void)
793 {
794 	pci_unregister_driver(&vmci_guest_driver);
795 }
796