xref: /openbmc/linux/drivers/misc/vmw_vmci/vmci_guest.c (revision 15a1fbdcfb519c2bd291ed01c6c94e0b89537a77)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VMware VMCI Driver
4  *
5  * Copyright (C) 2012 VMware, Inc. All rights reserved.
6  */
7 
8 #include <linux/vmw_vmci_defs.h>
9 #include <linux/vmw_vmci_api.h>
10 #include <linux/moduleparam.h>
11 #include <linux/interrupt.h>
12 #include <linux/highmem.h>
13 #include <linux/kernel.h>
14 #include <linux/mm.h>
15 #include <linux/module.h>
16 #include <linux/sched.h>
17 #include <linux/slab.h>
18 #include <linux/init.h>
19 #include <linux/pci.h>
20 #include <linux/smp.h>
21 #include <linux/io.h>
22 #include <linux/vmalloc.h>
23 
24 #include "vmci_datagram.h"
25 #include "vmci_doorbell.h"
26 #include "vmci_context.h"
27 #include "vmci_driver.h"
28 #include "vmci_event.h"
29 
30 #define PCI_DEVICE_ID_VMWARE_VMCI	0x0740
31 
32 #define VMCI_UTIL_NUM_RESOURCES 1
33 
34 static bool vmci_disable_msi;
35 module_param_named(disable_msi, vmci_disable_msi, bool, 0);
36 MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
37 
38 static bool vmci_disable_msix;
39 module_param_named(disable_msix, vmci_disable_msix, bool, 0);
40 MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
41 
42 static u32 ctx_update_sub_id = VMCI_INVALID_ID;
43 static u32 vm_context_id = VMCI_INVALID_ID;
44 
45 struct vmci_guest_device {
46 	struct device *dev;	/* PCI device we are attached to */
47 	void __iomem *iobase;
48 
49 	bool exclusive_vectors;
50 
51 	struct tasklet_struct datagram_tasklet;
52 	struct tasklet_struct bm_tasklet;
53 
54 	void *data_buffer;
55 	void *notification_bitmap;
56 	dma_addr_t notification_base;
57 };
58 
59 static bool use_ppn64;
60 
61 bool vmci_use_ppn64(void)
62 {
63 	return use_ppn64;
64 }
65 
66 /* vmci_dev singleton device and supporting data*/
67 struct pci_dev *vmci_pdev;
68 static struct vmci_guest_device *vmci_dev_g;
69 static DEFINE_SPINLOCK(vmci_dev_spinlock);
70 
71 static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0);
72 
73 bool vmci_guest_code_active(void)
74 {
75 	return atomic_read(&vmci_num_guest_devices) != 0;
76 }
77 
78 u32 vmci_get_vm_context_id(void)
79 {
80 	if (vm_context_id == VMCI_INVALID_ID) {
81 		struct vmci_datagram get_cid_msg;
82 		get_cid_msg.dst =
83 		    vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
84 				     VMCI_GET_CONTEXT_ID);
85 		get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
86 		get_cid_msg.payload_size = 0;
87 		vm_context_id = vmci_send_datagram(&get_cid_msg);
88 	}
89 	return vm_context_id;
90 }
91 
92 /*
93  * VM to hypervisor call mechanism. We use the standard VMware naming
94  * convention since shared code is calling this function as well.
95  */
96 int vmci_send_datagram(struct vmci_datagram *dg)
97 {
98 	unsigned long flags;
99 	int result;
100 
101 	/* Check args. */
102 	if (dg == NULL)
103 		return VMCI_ERROR_INVALID_ARGS;
104 
105 	/*
106 	 * Need to acquire spinlock on the device because the datagram
107 	 * data may be spread over multiple pages and the monitor may
108 	 * interleave device user rpc calls from multiple
109 	 * VCPUs. Acquiring the spinlock precludes that
110 	 * possibility. Disabling interrupts to avoid incoming
111 	 * datagrams during a "rep out" and possibly landing up in
112 	 * this function.
113 	 */
114 	spin_lock_irqsave(&vmci_dev_spinlock, flags);
115 
116 	if (vmci_dev_g) {
117 		iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR,
118 			     dg, VMCI_DG_SIZE(dg));
119 		result = ioread32(vmci_dev_g->iobase + VMCI_RESULT_LOW_ADDR);
120 	} else {
121 		result = VMCI_ERROR_UNAVAILABLE;
122 	}
123 
124 	spin_unlock_irqrestore(&vmci_dev_spinlock, flags);
125 
126 	return result;
127 }
128 EXPORT_SYMBOL_GPL(vmci_send_datagram);
129 
130 /*
131  * Gets called with the new context id if updated or resumed.
132  * Context id.
133  */
134 static void vmci_guest_cid_update(u32 sub_id,
135 				  const struct vmci_event_data *event_data,
136 				  void *client_data)
137 {
138 	const struct vmci_event_payld_ctx *ev_payload =
139 				vmci_event_data_const_payload(event_data);
140 
141 	if (sub_id != ctx_update_sub_id) {
142 		pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id);
143 		return;
144 	}
145 
146 	if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) {
147 		pr_devel("Invalid event data\n");
148 		return;
149 	}
150 
151 	pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n",
152 		 vm_context_id, ev_payload->context_id, event_data->event);
153 
154 	vm_context_id = ev_payload->context_id;
155 }
156 
157 /*
158  * Verify that the host supports the hypercalls we need. If it does not,
159  * try to find fallback hypercalls and use those instead.  Returns
160  * true if required hypercalls (or fallback hypercalls) are
161  * supported by the host, false otherwise.
162  */
163 static int vmci_check_host_caps(struct pci_dev *pdev)
164 {
165 	bool result;
166 	struct vmci_resource_query_msg *msg;
167 	u32 msg_size = sizeof(struct vmci_resource_query_hdr) +
168 				VMCI_UTIL_NUM_RESOURCES * sizeof(u32);
169 	struct vmci_datagram *check_msg;
170 
171 	check_msg = kmalloc(msg_size, GFP_KERNEL);
172 	if (!check_msg) {
173 		dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__);
174 		return -ENOMEM;
175 	}
176 
177 	check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
178 					  VMCI_RESOURCES_QUERY);
179 	check_msg->src = VMCI_ANON_SRC_HANDLE;
180 	check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
181 	msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg);
182 
183 	msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
184 	msg->resources[0] = VMCI_GET_CONTEXT_ID;
185 
186 	/* Checks that hyper calls are supported */
187 	result = vmci_send_datagram(check_msg) == 0x01;
188 	kfree(check_msg);
189 
190 	dev_dbg(&pdev->dev, "%s: Host capability check: %s\n",
191 		__func__, result ? "PASSED" : "FAILED");
192 
193 	/* We need the vector. There are no fallbacks. */
194 	return result ? 0 : -ENXIO;
195 }
196 
197 /*
198  * Reads datagrams from the data in port and dispatches them. We
199  * always start reading datagrams into only the first page of the
200  * datagram buffer. If the datagrams don't fit into one page, we
201  * use the maximum datagram buffer size for the remainder of the
202  * invocation. This is a simple heuristic for not penalizing
203  * small datagrams.
204  *
205  * This function assumes that it has exclusive access to the data
206  * in port for the duration of the call.
207  */
208 static void vmci_dispatch_dgs(unsigned long data)
209 {
210 	struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data;
211 	u8 *dg_in_buffer = vmci_dev->data_buffer;
212 	struct vmci_datagram *dg;
213 	size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
214 	size_t current_dg_in_buffer_size = PAGE_SIZE;
215 	size_t remaining_bytes;
216 
217 	BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
218 
219 	ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
220 		    vmci_dev->data_buffer, current_dg_in_buffer_size);
221 	dg = (struct vmci_datagram *)dg_in_buffer;
222 	remaining_bytes = current_dg_in_buffer_size;
223 
224 	while (dg->dst.resource != VMCI_INVALID_ID ||
225 	       remaining_bytes > PAGE_SIZE) {
226 		unsigned dg_in_size;
227 
228 		/*
229 		 * When the input buffer spans multiple pages, a datagram can
230 		 * start on any page boundary in the buffer.
231 		 */
232 		if (dg->dst.resource == VMCI_INVALID_ID) {
233 			dg = (struct vmci_datagram *)roundup(
234 				(uintptr_t)dg + 1, PAGE_SIZE);
235 			remaining_bytes =
236 				(size_t)(dg_in_buffer +
237 					 current_dg_in_buffer_size -
238 					 (u8 *)dg);
239 			continue;
240 		}
241 
242 		dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
243 
244 		if (dg_in_size <= dg_in_buffer_size) {
245 			int result;
246 
247 			/*
248 			 * If the remaining bytes in the datagram
249 			 * buffer doesn't contain the complete
250 			 * datagram, we first make sure we have enough
251 			 * room for it and then we read the reminder
252 			 * of the datagram and possibly any following
253 			 * datagrams.
254 			 */
255 			if (dg_in_size > remaining_bytes) {
256 				if (remaining_bytes !=
257 				    current_dg_in_buffer_size) {
258 
259 					/*
260 					 * We move the partial
261 					 * datagram to the front and
262 					 * read the reminder of the
263 					 * datagram and possibly
264 					 * following calls into the
265 					 * following bytes.
266 					 */
267 					memmove(dg_in_buffer, dg_in_buffer +
268 						current_dg_in_buffer_size -
269 						remaining_bytes,
270 						remaining_bytes);
271 					dg = (struct vmci_datagram *)
272 					    dg_in_buffer;
273 				}
274 
275 				if (current_dg_in_buffer_size !=
276 				    dg_in_buffer_size)
277 					current_dg_in_buffer_size =
278 					    dg_in_buffer_size;
279 
280 				ioread8_rep(vmci_dev->iobase +
281 						VMCI_DATA_IN_ADDR,
282 					vmci_dev->data_buffer +
283 						remaining_bytes,
284 					current_dg_in_buffer_size -
285 						remaining_bytes);
286 			}
287 
288 			/*
289 			 * We special case event datagrams from the
290 			 * hypervisor.
291 			 */
292 			if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
293 			    dg->dst.resource == VMCI_EVENT_HANDLER) {
294 				result = vmci_event_dispatch(dg);
295 			} else {
296 				result = vmci_datagram_invoke_guest_handler(dg);
297 			}
298 			if (result < VMCI_SUCCESS)
299 				dev_dbg(vmci_dev->dev,
300 					"Datagram with resource (ID=0x%x) failed (err=%d)\n",
301 					 dg->dst.resource, result);
302 
303 			/* On to the next datagram. */
304 			dg = (struct vmci_datagram *)((u8 *)dg +
305 						      dg_in_size);
306 		} else {
307 			size_t bytes_to_skip;
308 
309 			/*
310 			 * Datagram doesn't fit in datagram buffer of maximal
311 			 * size. We drop it.
312 			 */
313 			dev_dbg(vmci_dev->dev,
314 				"Failed to receive datagram (size=%u bytes)\n",
315 				 dg_in_size);
316 
317 			bytes_to_skip = dg_in_size - remaining_bytes;
318 			if (current_dg_in_buffer_size != dg_in_buffer_size)
319 				current_dg_in_buffer_size = dg_in_buffer_size;
320 
321 			for (;;) {
322 				ioread8_rep(vmci_dev->iobase +
323 						VMCI_DATA_IN_ADDR,
324 					vmci_dev->data_buffer,
325 					current_dg_in_buffer_size);
326 				if (bytes_to_skip <= current_dg_in_buffer_size)
327 					break;
328 
329 				bytes_to_skip -= current_dg_in_buffer_size;
330 			}
331 			dg = (struct vmci_datagram *)(dg_in_buffer +
332 						      bytes_to_skip);
333 		}
334 
335 		remaining_bytes =
336 		    (size_t) (dg_in_buffer + current_dg_in_buffer_size -
337 			      (u8 *)dg);
338 
339 		if (remaining_bytes < VMCI_DG_HEADERSIZE) {
340 			/* Get the next batch of datagrams. */
341 
342 			ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
343 				    vmci_dev->data_buffer,
344 				    current_dg_in_buffer_size);
345 			dg = (struct vmci_datagram *)dg_in_buffer;
346 			remaining_bytes = current_dg_in_buffer_size;
347 		}
348 	}
349 }
350 
351 /*
352  * Scans the notification bitmap for raised flags, clears them
353  * and handles the notifications.
354  */
355 static void vmci_process_bitmap(unsigned long data)
356 {
357 	struct vmci_guest_device *dev = (struct vmci_guest_device *)data;
358 
359 	if (!dev->notification_bitmap) {
360 		dev_dbg(dev->dev, "No bitmap present in %s\n", __func__);
361 		return;
362 	}
363 
364 	vmci_dbell_scan_notification_entries(dev->notification_bitmap);
365 }
366 
367 /*
368  * Interrupt handler for legacy or MSI interrupt, or for first MSI-X
369  * interrupt (vector VMCI_INTR_DATAGRAM).
370  */
371 static irqreturn_t vmci_interrupt(int irq, void *_dev)
372 {
373 	struct vmci_guest_device *dev = _dev;
374 
375 	/*
376 	 * If we are using MSI-X with exclusive vectors then we simply schedule
377 	 * the datagram tasklet, since we know the interrupt was meant for us.
378 	 * Otherwise we must read the ICR to determine what to do.
379 	 */
380 
381 	if (dev->exclusive_vectors) {
382 		tasklet_schedule(&dev->datagram_tasklet);
383 	} else {
384 		unsigned int icr;
385 
386 		/* Acknowledge interrupt and determine what needs doing. */
387 		icr = ioread32(dev->iobase + VMCI_ICR_ADDR);
388 		if (icr == 0 || icr == ~0)
389 			return IRQ_NONE;
390 
391 		if (icr & VMCI_ICR_DATAGRAM) {
392 			tasklet_schedule(&dev->datagram_tasklet);
393 			icr &= ~VMCI_ICR_DATAGRAM;
394 		}
395 
396 		if (icr & VMCI_ICR_NOTIFICATION) {
397 			tasklet_schedule(&dev->bm_tasklet);
398 			icr &= ~VMCI_ICR_NOTIFICATION;
399 		}
400 
401 		if (icr != 0)
402 			dev_warn(dev->dev,
403 				 "Ignoring unknown interrupt cause (%d)\n",
404 				 icr);
405 	}
406 
407 	return IRQ_HANDLED;
408 }
409 
410 /*
411  * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
412  * which is for the notification bitmap.  Will only get called if we are
413  * using MSI-X with exclusive vectors.
414  */
415 static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
416 {
417 	struct vmci_guest_device *dev = _dev;
418 
419 	/* For MSI-X we can just assume it was meant for us. */
420 	tasklet_schedule(&dev->bm_tasklet);
421 
422 	return IRQ_HANDLED;
423 }
424 
425 /*
426  * Most of the initialization at module load time is done here.
427  */
428 static int vmci_guest_probe_device(struct pci_dev *pdev,
429 				   const struct pci_device_id *id)
430 {
431 	struct vmci_guest_device *vmci_dev;
432 	void __iomem *iobase;
433 	unsigned int capabilities;
434 	unsigned int caps_in_use;
435 	unsigned long cmd;
436 	int vmci_err;
437 	int error;
438 
439 	dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n");
440 
441 	error = pcim_enable_device(pdev);
442 	if (error) {
443 		dev_err(&pdev->dev,
444 			"Failed to enable VMCI device: %d\n", error);
445 		return error;
446 	}
447 
448 	error = pcim_iomap_regions(pdev, 1 << 0, KBUILD_MODNAME);
449 	if (error) {
450 		dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
451 		return error;
452 	}
453 
454 	iobase = pcim_iomap_table(pdev)[0];
455 
456 	dev_info(&pdev->dev, "Found VMCI PCI device at %#lx, irq %u\n",
457 		 (unsigned long)iobase, pdev->irq);
458 
459 	vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL);
460 	if (!vmci_dev) {
461 		dev_err(&pdev->dev,
462 			"Can't allocate memory for VMCI device\n");
463 		return -ENOMEM;
464 	}
465 
466 	vmci_dev->dev = &pdev->dev;
467 	vmci_dev->exclusive_vectors = false;
468 	vmci_dev->iobase = iobase;
469 
470 	tasklet_init(&vmci_dev->datagram_tasklet,
471 		     vmci_dispatch_dgs, (unsigned long)vmci_dev);
472 	tasklet_init(&vmci_dev->bm_tasklet,
473 		     vmci_process_bitmap, (unsigned long)vmci_dev);
474 
475 	vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
476 	if (!vmci_dev->data_buffer) {
477 		dev_err(&pdev->dev,
478 			"Can't allocate memory for datagram buffer\n");
479 		return -ENOMEM;
480 	}
481 
482 	pci_set_master(pdev);	/* To enable queue_pair functionality. */
483 
484 	/*
485 	 * Verify that the VMCI Device supports the capabilities that
486 	 * we need. If the device is missing capabilities that we would
487 	 * like to use, check for fallback capabilities and use those
488 	 * instead (so we can run a new VM on old hosts). Fail the load if
489 	 * a required capability is missing and there is no fallback.
490 	 *
491 	 * Right now, we need datagrams. There are no fallbacks.
492 	 */
493 	capabilities = ioread32(vmci_dev->iobase + VMCI_CAPS_ADDR);
494 	if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
495 		dev_err(&pdev->dev, "Device does not support datagrams\n");
496 		error = -ENXIO;
497 		goto err_free_data_buffer;
498 	}
499 	caps_in_use = VMCI_CAPS_DATAGRAM;
500 
501 	/*
502 	 * Use 64-bit PPNs if the device supports.
503 	 *
504 	 * There is no check for the return value of dma_set_mask_and_coherent
505 	 * since this driver can handle the default mask values if
506 	 * dma_set_mask_and_coherent fails.
507 	 */
508 	if (capabilities & VMCI_CAPS_PPN64) {
509 		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
510 		use_ppn64 = true;
511 		caps_in_use |= VMCI_CAPS_PPN64;
512 	} else {
513 		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
514 		use_ppn64 = false;
515 	}
516 
517 	/*
518 	 * If the hardware supports notifications, we will use that as
519 	 * well.
520 	 */
521 	if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
522 		vmci_dev->notification_bitmap = dma_alloc_coherent(
523 			&pdev->dev, PAGE_SIZE, &vmci_dev->notification_base,
524 			GFP_KERNEL);
525 		if (!vmci_dev->notification_bitmap) {
526 			dev_warn(&pdev->dev,
527 				 "Unable to allocate notification bitmap\n");
528 		} else {
529 			memset(vmci_dev->notification_bitmap, 0, PAGE_SIZE);
530 			caps_in_use |= VMCI_CAPS_NOTIFICATIONS;
531 		}
532 	}
533 
534 	dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use);
535 
536 	/* Let the host know which capabilities we intend to use. */
537 	iowrite32(caps_in_use, vmci_dev->iobase + VMCI_CAPS_ADDR);
538 
539 	/* Set up global device so that we can start sending datagrams */
540 	spin_lock_irq(&vmci_dev_spinlock);
541 	vmci_dev_g = vmci_dev;
542 	vmci_pdev = pdev;
543 	spin_unlock_irq(&vmci_dev_spinlock);
544 
545 	/*
546 	 * Register notification bitmap with device if that capability is
547 	 * used.
548 	 */
549 	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) {
550 		unsigned long bitmap_ppn =
551 			vmci_dev->notification_base >> PAGE_SHIFT;
552 		if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
553 			dev_warn(&pdev->dev,
554 				 "VMCI device unable to register notification bitmap with PPN 0x%lx\n",
555 				 bitmap_ppn);
556 			error = -ENXIO;
557 			goto err_remove_vmci_dev_g;
558 		}
559 	}
560 
561 	/* Check host capabilities. */
562 	error = vmci_check_host_caps(pdev);
563 	if (error)
564 		goto err_remove_bitmap;
565 
566 	/* Enable device. */
567 
568 	/*
569 	 * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can
570 	 * update the internal context id when needed.
571 	 */
572 	vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE,
573 					vmci_guest_cid_update, NULL,
574 					&ctx_update_sub_id);
575 	if (vmci_err < VMCI_SUCCESS)
576 		dev_warn(&pdev->dev,
577 			 "Failed to subscribe to event (type=%d): %d\n",
578 			 VMCI_EVENT_CTX_ID_UPDATE, vmci_err);
579 
580 	/*
581 	 * Enable interrupts.  Try MSI-X first, then MSI, and then fallback on
582 	 * legacy interrupts.
583 	 */
584 	error = pci_alloc_irq_vectors(pdev, VMCI_MAX_INTRS, VMCI_MAX_INTRS,
585 			PCI_IRQ_MSIX);
586 	if (error < 0) {
587 		error = pci_alloc_irq_vectors(pdev, 1, 1,
588 				PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
589 		if (error < 0)
590 			goto err_remove_bitmap;
591 	} else {
592 		vmci_dev->exclusive_vectors = true;
593 	}
594 
595 	/*
596 	 * Request IRQ for legacy or MSI interrupts, or for first
597 	 * MSI-X vector.
598 	 */
599 	error = request_irq(pci_irq_vector(pdev, 0), vmci_interrupt,
600 			    IRQF_SHARED, KBUILD_MODNAME, vmci_dev);
601 	if (error) {
602 		dev_err(&pdev->dev, "Irq %u in use: %d\n",
603 			pci_irq_vector(pdev, 0), error);
604 		goto err_disable_msi;
605 	}
606 
607 	/*
608 	 * For MSI-X with exclusive vectors we need to request an
609 	 * interrupt for each vector so that we get a separate
610 	 * interrupt handler routine.  This allows us to distinguish
611 	 * between the vectors.
612 	 */
613 	if (vmci_dev->exclusive_vectors) {
614 		error = request_irq(pci_irq_vector(pdev, 1),
615 				    vmci_interrupt_bm, 0, KBUILD_MODNAME,
616 				    vmci_dev);
617 		if (error) {
618 			dev_err(&pdev->dev,
619 				"Failed to allocate irq %u: %d\n",
620 				pci_irq_vector(pdev, 1), error);
621 			goto err_free_irq;
622 		}
623 	}
624 
625 	dev_dbg(&pdev->dev, "Registered device\n");
626 
627 	atomic_inc(&vmci_num_guest_devices);
628 
629 	/* Enable specific interrupt bits. */
630 	cmd = VMCI_IMR_DATAGRAM;
631 	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
632 		cmd |= VMCI_IMR_NOTIFICATION;
633 	iowrite32(cmd, vmci_dev->iobase + VMCI_IMR_ADDR);
634 
635 	/* Enable interrupts. */
636 	iowrite32(VMCI_CONTROL_INT_ENABLE,
637 		  vmci_dev->iobase + VMCI_CONTROL_ADDR);
638 
639 	pci_set_drvdata(pdev, vmci_dev);
640 
641 	vmci_call_vsock_callback(false);
642 	return 0;
643 
644 err_free_irq:
645 	free_irq(pci_irq_vector(pdev, 0), vmci_dev);
646 	tasklet_kill(&vmci_dev->datagram_tasklet);
647 	tasklet_kill(&vmci_dev->bm_tasklet);
648 
649 err_disable_msi:
650 	pci_free_irq_vectors(pdev);
651 
652 	vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
653 	if (vmci_err < VMCI_SUCCESS)
654 		dev_warn(&pdev->dev,
655 			 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
656 			 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
657 
658 err_remove_bitmap:
659 	if (vmci_dev->notification_bitmap) {
660 		iowrite32(VMCI_CONTROL_RESET,
661 			  vmci_dev->iobase + VMCI_CONTROL_ADDR);
662 		dma_free_coherent(&pdev->dev, PAGE_SIZE,
663 				  vmci_dev->notification_bitmap,
664 				  vmci_dev->notification_base);
665 	}
666 
667 err_remove_vmci_dev_g:
668 	spin_lock_irq(&vmci_dev_spinlock);
669 	vmci_pdev = NULL;
670 	vmci_dev_g = NULL;
671 	spin_unlock_irq(&vmci_dev_spinlock);
672 
673 err_free_data_buffer:
674 	vfree(vmci_dev->data_buffer);
675 
676 	/* The rest are managed resources and will be freed by PCI core */
677 	return error;
678 }
679 
680 static void vmci_guest_remove_device(struct pci_dev *pdev)
681 {
682 	struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev);
683 	int vmci_err;
684 
685 	dev_dbg(&pdev->dev, "Removing device\n");
686 
687 	atomic_dec(&vmci_num_guest_devices);
688 
689 	vmci_qp_guest_endpoints_exit();
690 
691 	vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
692 	if (vmci_err < VMCI_SUCCESS)
693 		dev_warn(&pdev->dev,
694 			 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
695 			 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
696 
697 	spin_lock_irq(&vmci_dev_spinlock);
698 	vmci_dev_g = NULL;
699 	vmci_pdev = NULL;
700 	spin_unlock_irq(&vmci_dev_spinlock);
701 
702 	dev_dbg(&pdev->dev, "Resetting vmci device\n");
703 	iowrite32(VMCI_CONTROL_RESET, vmci_dev->iobase + VMCI_CONTROL_ADDR);
704 
705 	/*
706 	 * Free IRQ and then disable MSI/MSI-X as appropriate.  For
707 	 * MSI-X, we might have multiple vectors, each with their own
708 	 * IRQ, which we must free too.
709 	 */
710 	if (vmci_dev->exclusive_vectors)
711 		free_irq(pci_irq_vector(pdev, 1), vmci_dev);
712 	free_irq(pci_irq_vector(pdev, 0), vmci_dev);
713 	pci_free_irq_vectors(pdev);
714 
715 	tasklet_kill(&vmci_dev->datagram_tasklet);
716 	tasklet_kill(&vmci_dev->bm_tasklet);
717 
718 	if (vmci_dev->notification_bitmap) {
719 		/*
720 		 * The device reset above cleared the bitmap state of the
721 		 * device, so we can safely free it here.
722 		 */
723 
724 		dma_free_coherent(&pdev->dev, PAGE_SIZE,
725 				  vmci_dev->notification_bitmap,
726 				  vmci_dev->notification_base);
727 	}
728 
729 	vfree(vmci_dev->data_buffer);
730 
731 	/* The rest are managed resources and will be freed by PCI core */
732 }
733 
734 static const struct pci_device_id vmci_ids[] = {
735 	{ PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), },
736 	{ 0 },
737 };
738 MODULE_DEVICE_TABLE(pci, vmci_ids);
739 
740 static struct pci_driver vmci_guest_driver = {
741 	.name		= KBUILD_MODNAME,
742 	.id_table	= vmci_ids,
743 	.probe		= vmci_guest_probe_device,
744 	.remove		= vmci_guest_remove_device,
745 };
746 
747 int __init vmci_guest_init(void)
748 {
749 	return pci_register_driver(&vmci_guest_driver);
750 }
751 
752 void __exit vmci_guest_exit(void)
753 {
754 	pci_unregister_driver(&vmci_guest_driver);
755 }
756