1685a6bf8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21f166439SGeorge Zhang /*
31f166439SGeorge Zhang  * VMware VMCI Driver
41f166439SGeorge Zhang  *
51f166439SGeorge Zhang  * Copyright (C) 2012 VMware, Inc. All rights reserved.
61f166439SGeorge Zhang  */
71f166439SGeorge Zhang 
81f166439SGeorge Zhang #include <linux/vmw_vmci_defs.h>
91f166439SGeorge Zhang #include <linux/vmw_vmci_api.h>
101f166439SGeorge Zhang #include <linux/moduleparam.h>
111f166439SGeorge Zhang #include <linux/interrupt.h>
121f166439SGeorge Zhang #include <linux/highmem.h>
131f166439SGeorge Zhang #include <linux/kernel.h>
14ea8a83a4SDmitry Torokhov #include <linux/mm.h>
151f166439SGeorge Zhang #include <linux/module.h>
1622aa5c7fSJorgen Hansen #include <linux/processor.h>
171f166439SGeorge Zhang #include <linux/sched.h>
18ea8a83a4SDmitry Torokhov #include <linux/slab.h>
191f166439SGeorge Zhang #include <linux/init.h>
201f166439SGeorge Zhang #include <linux/pci.h>
211f166439SGeorge Zhang #include <linux/smp.h>
221f166439SGeorge Zhang #include <linux/io.h>
23ea8a83a4SDmitry Torokhov #include <linux/vmalloc.h>
241f166439SGeorge Zhang 
251f166439SGeorge Zhang #include "vmci_datagram.h"
261f166439SGeorge Zhang #include "vmci_doorbell.h"
271f166439SGeorge Zhang #include "vmci_context.h"
281f166439SGeorge Zhang #include "vmci_driver.h"
291f166439SGeorge Zhang #include "vmci_event.h"
301f166439SGeorge Zhang 
311f166439SGeorge Zhang #define PCI_DEVICE_ID_VMWARE_VMCI	0x0740
321f166439SGeorge Zhang 
331f166439SGeorge Zhang #define VMCI_UTIL_NUM_RESOURCES 1
341f166439SGeorge Zhang 
355ee10982SJorgen Hansen /*
365ee10982SJorgen Hansen  * Datagram buffers for DMA send/receive must accommodate at least
375ee10982SJorgen Hansen  * a maximum sized datagram and the header.
385ee10982SJorgen Hansen  */
395ee10982SJorgen Hansen #define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE)
405ee10982SJorgen Hansen 
411f166439SGeorge Zhang static bool vmci_disable_msi;
421f166439SGeorge Zhang module_param_named(disable_msi, vmci_disable_msi, bool, 0);
431f166439SGeorge Zhang MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
441f166439SGeorge Zhang 
451f166439SGeorge Zhang static bool vmci_disable_msix;
461f166439SGeorge Zhang module_param_named(disable_msix, vmci_disable_msix, bool, 0);
471f166439SGeorge Zhang MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
481f166439SGeorge Zhang 
491f166439SGeorge Zhang static u32 ctx_update_sub_id = VMCI_INVALID_ID;
501f166439SGeorge Zhang static u32 vm_context_id = VMCI_INVALID_ID;
511f166439SGeorge Zhang 
521f166439SGeorge Zhang struct vmci_guest_device {
531f166439SGeorge Zhang 	struct device *dev;	/* PCI device we are attached to */
541f166439SGeorge Zhang 	void __iomem *iobase;
55e283a0e8SJorgen Hansen 	void __iomem *mmio_base;
561f166439SGeorge Zhang 
571f166439SGeorge Zhang 	bool exclusive_vectors;
581f166439SGeorge Zhang 
59463713ebSJorgen Hansen 	struct wait_queue_head inout_wq;
601f166439SGeorge Zhang 
611f166439SGeorge Zhang 	void *data_buffer;
625ee10982SJorgen Hansen 	dma_addr_t data_buffer_base;
635ee10982SJorgen Hansen 	void *tx_buffer;
645ee10982SJorgen Hansen 	dma_addr_t tx_buffer_base;
651f166439SGeorge Zhang 	void *notification_bitmap;
666d6dfb4fSAndy King 	dma_addr_t notification_base;
671f166439SGeorge Zhang };
681f166439SGeorge Zhang 
69f2db7361SVishnu DASA static bool use_ppn64;
70f2db7361SVishnu DASA 
vmci_use_ppn64(void)71f2db7361SVishnu DASA bool vmci_use_ppn64(void)
72f2db7361SVishnu DASA {
73f2db7361SVishnu DASA 	return use_ppn64;
74f2db7361SVishnu DASA }
75f2db7361SVishnu DASA 
761f166439SGeorge Zhang /* vmci_dev singleton device and supporting data*/
776d6dfb4fSAndy King struct pci_dev *vmci_pdev;
781f166439SGeorge Zhang static struct vmci_guest_device *vmci_dev_g;
791f166439SGeorge Zhang static DEFINE_SPINLOCK(vmci_dev_spinlock);
801f166439SGeorge Zhang 
811f166439SGeorge Zhang static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0);
821f166439SGeorge Zhang 
vmci_guest_code_active(void)831f166439SGeorge Zhang bool vmci_guest_code_active(void)
841f166439SGeorge Zhang {
851f166439SGeorge Zhang 	return atomic_read(&vmci_num_guest_devices) != 0;
861f166439SGeorge Zhang }
871f166439SGeorge Zhang 
vmci_get_vm_context_id(void)881f166439SGeorge Zhang u32 vmci_get_vm_context_id(void)
891f166439SGeorge Zhang {
901f166439SGeorge Zhang 	if (vm_context_id == VMCI_INVALID_ID) {
911f166439SGeorge Zhang 		struct vmci_datagram get_cid_msg;
921f166439SGeorge Zhang 		get_cid_msg.dst =
931f166439SGeorge Zhang 		    vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
941f166439SGeorge Zhang 				     VMCI_GET_CONTEXT_ID);
951f166439SGeorge Zhang 		get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
961f166439SGeorge Zhang 		get_cid_msg.payload_size = 0;
975a19b789SAndy King 		vm_context_id = vmci_send_datagram(&get_cid_msg);
981f166439SGeorge Zhang 	}
991f166439SGeorge Zhang 	return vm_context_id;
1001f166439SGeorge Zhang }
1011f166439SGeorge Zhang 
vmci_read_reg(struct vmci_guest_device * dev,u32 reg)102e283a0e8SJorgen Hansen static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
103e283a0e8SJorgen Hansen {
104e283a0e8SJorgen Hansen 	if (dev->mmio_base != NULL)
105e283a0e8SJorgen Hansen 		return readl(dev->mmio_base + reg);
106e283a0e8SJorgen Hansen 	return ioread32(dev->iobase + reg);
107e283a0e8SJorgen Hansen }
108e283a0e8SJorgen Hansen 
vmci_write_reg(struct vmci_guest_device * dev,u32 val,u32 reg)109e283a0e8SJorgen Hansen static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
110e283a0e8SJorgen Hansen {
111e283a0e8SJorgen Hansen 	if (dev->mmio_base != NULL)
112e283a0e8SJorgen Hansen 		writel(val, dev->mmio_base + reg);
113e283a0e8SJorgen Hansen 	else
114e283a0e8SJorgen Hansen 		iowrite32(val, dev->iobase + reg);
115e283a0e8SJorgen Hansen }
116e283a0e8SJorgen Hansen 
vmci_read_data(struct vmci_guest_device * vmci_dev,void * dest,size_t size)117463713ebSJorgen Hansen static void vmci_read_data(struct vmci_guest_device *vmci_dev,
118463713ebSJorgen Hansen 			   void *dest, size_t size)
119463713ebSJorgen Hansen {
120463713ebSJorgen Hansen 	if (vmci_dev->mmio_base == NULL)
121463713ebSJorgen Hansen 		ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
122463713ebSJorgen Hansen 			    dest, size);
123463713ebSJorgen Hansen 	else {
124463713ebSJorgen Hansen 		/*
125463713ebSJorgen Hansen 		 * For DMA datagrams, the data_buffer will contain the header on the
126463713ebSJorgen Hansen 		 * first page, followed by the incoming datagram(s) on the following
127463713ebSJorgen Hansen 		 * pages. The header uses an S/G element immediately following the
128463713ebSJorgen Hansen 		 * header on the first page to point to the data area.
129463713ebSJorgen Hansen 		 */
130463713ebSJorgen Hansen 		struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer;
131463713ebSJorgen Hansen 		struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1);
132463713ebSJorgen Hansen 		size_t buffer_offset = dest - vmci_dev->data_buffer;
133463713ebSJorgen Hansen 
134463713ebSJorgen Hansen 		buffer_header->opcode = 1;
135463713ebSJorgen Hansen 		buffer_header->size = 1;
136463713ebSJorgen Hansen 		buffer_header->busy = 0;
137463713ebSJorgen Hansen 		sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset;
138463713ebSJorgen Hansen 		sg_array[0].size = size;
139463713ebSJorgen Hansen 
140463713ebSJorgen Hansen 		vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base),
141463713ebSJorgen Hansen 			       VMCI_DATA_IN_LOW_ADDR);
142463713ebSJorgen Hansen 
143463713ebSJorgen Hansen 		wait_event(vmci_dev->inout_wq, buffer_header->busy == 1);
144463713ebSJorgen Hansen 	}
145463713ebSJorgen Hansen }
146463713ebSJorgen Hansen 
vmci_write_data(struct vmci_guest_device * dev,struct vmci_datagram * dg)14722aa5c7fSJorgen Hansen static int vmci_write_data(struct vmci_guest_device *dev,
14822aa5c7fSJorgen Hansen 			   struct vmci_datagram *dg)
14922aa5c7fSJorgen Hansen {
15022aa5c7fSJorgen Hansen 	int result;
15122aa5c7fSJorgen Hansen 
15222aa5c7fSJorgen Hansen 	if (dev->mmio_base != NULL) {
15322aa5c7fSJorgen Hansen 		struct vmci_data_in_out_header *buffer_header = dev->tx_buffer;
15422aa5c7fSJorgen Hansen 		u8 *dg_out_buffer = (u8 *)(buffer_header + 1);
15522aa5c7fSJorgen Hansen 
15622aa5c7fSJorgen Hansen 		if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE)
15722aa5c7fSJorgen Hansen 			return VMCI_ERROR_INVALID_ARGS;
15822aa5c7fSJorgen Hansen 
15922aa5c7fSJorgen Hansen 		/*
16022aa5c7fSJorgen Hansen 		 * Initialize send buffer with outgoing datagram
16122aa5c7fSJorgen Hansen 		 * and set up header for inline data. Device will
16222aa5c7fSJorgen Hansen 		 * not access buffer asynchronously - only after
16322aa5c7fSJorgen Hansen 		 * the write to VMCI_DATA_OUT_LOW_ADDR.
16422aa5c7fSJorgen Hansen 		 */
16522aa5c7fSJorgen Hansen 		memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg));
16622aa5c7fSJorgen Hansen 		buffer_header->opcode = 0;
16722aa5c7fSJorgen Hansen 		buffer_header->size = VMCI_DG_SIZE(dg);
16822aa5c7fSJorgen Hansen 		buffer_header->busy = 1;
16922aa5c7fSJorgen Hansen 
17022aa5c7fSJorgen Hansen 		vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base),
17122aa5c7fSJorgen Hansen 			       VMCI_DATA_OUT_LOW_ADDR);
17222aa5c7fSJorgen Hansen 
17322aa5c7fSJorgen Hansen 		/* Caller holds a spinlock, so cannot block. */
17422aa5c7fSJorgen Hansen 		spin_until_cond(buffer_header->busy == 0);
17522aa5c7fSJorgen Hansen 
17622aa5c7fSJorgen Hansen 		result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
17722aa5c7fSJorgen Hansen 		if (result == VMCI_SUCCESS)
17822aa5c7fSJorgen Hansen 			result = (int)buffer_header->result;
17922aa5c7fSJorgen Hansen 	} else {
18022aa5c7fSJorgen Hansen 		iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR,
18122aa5c7fSJorgen Hansen 			     dg, VMCI_DG_SIZE(dg));
18222aa5c7fSJorgen Hansen 		result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
18322aa5c7fSJorgen Hansen 	}
18422aa5c7fSJorgen Hansen 
18522aa5c7fSJorgen Hansen 	return result;
18622aa5c7fSJorgen Hansen }
18722aa5c7fSJorgen Hansen 
1881f166439SGeorge Zhang /*
1891f166439SGeorge Zhang  * VM to hypervisor call mechanism. We use the standard VMware naming
1901f166439SGeorge Zhang  * convention since shared code is calling this function as well.
1911f166439SGeorge Zhang  */
vmci_send_datagram(struct vmci_datagram * dg)1921f166439SGeorge Zhang int vmci_send_datagram(struct vmci_datagram *dg)
1931f166439SGeorge Zhang {
1941f166439SGeorge Zhang 	unsigned long flags;
1951f166439SGeorge Zhang 	int result;
1961f166439SGeorge Zhang 
1971f166439SGeorge Zhang 	/* Check args. */
1981f166439SGeorge Zhang 	if (dg == NULL)
1991f166439SGeorge Zhang 		return VMCI_ERROR_INVALID_ARGS;
2001f166439SGeorge Zhang 
2011f166439SGeorge Zhang 	/*
2021f166439SGeorge Zhang 	 * Need to acquire spinlock on the device because the datagram
2031f166439SGeorge Zhang 	 * data may be spread over multiple pages and the monitor may
2041f166439SGeorge Zhang 	 * interleave device user rpc calls from multiple
2051f166439SGeorge Zhang 	 * VCPUs. Acquiring the spinlock precludes that
2061f166439SGeorge Zhang 	 * possibility. Disabling interrupts to avoid incoming
2071f166439SGeorge Zhang 	 * datagrams during a "rep out" and possibly landing up in
2081f166439SGeorge Zhang 	 * this function.
2091f166439SGeorge Zhang 	 */
2101f166439SGeorge Zhang 	spin_lock_irqsave(&vmci_dev_spinlock, flags);
2111f166439SGeorge Zhang 
2121f166439SGeorge Zhang 	if (vmci_dev_g) {
21322aa5c7fSJorgen Hansen 		vmci_write_data(vmci_dev_g, dg);
214e283a0e8SJorgen Hansen 		result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
2151f166439SGeorge Zhang 	} else {
2161f166439SGeorge Zhang 		result = VMCI_ERROR_UNAVAILABLE;
2171f166439SGeorge Zhang 	}
2181f166439SGeorge Zhang 
2191f166439SGeorge Zhang 	spin_unlock_irqrestore(&vmci_dev_spinlock, flags);
2201f166439SGeorge Zhang 
2211f166439SGeorge Zhang 	return result;
2221f166439SGeorge Zhang }
2231f166439SGeorge Zhang EXPORT_SYMBOL_GPL(vmci_send_datagram);
2241f166439SGeorge Zhang 
2251f166439SGeorge Zhang /*
2261f166439SGeorge Zhang  * Gets called with the new context id if updated or resumed.
2271f166439SGeorge Zhang  * Context id.
2281f166439SGeorge Zhang  */
vmci_guest_cid_update(u32 sub_id,const struct vmci_event_data * event_data,void * client_data)2291f166439SGeorge Zhang static void vmci_guest_cid_update(u32 sub_id,
2301f166439SGeorge Zhang 				  const struct vmci_event_data *event_data,
2311f166439SGeorge Zhang 				  void *client_data)
2321f166439SGeorge Zhang {
2331f166439SGeorge Zhang 	const struct vmci_event_payld_ctx *ev_payload =
2341f166439SGeorge Zhang 				vmci_event_data_const_payload(event_data);
2351f166439SGeorge Zhang 
2361f166439SGeorge Zhang 	if (sub_id != ctx_update_sub_id) {
2371f166439SGeorge Zhang 		pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id);
2381f166439SGeorge Zhang 		return;
2391f166439SGeorge Zhang 	}
2401f166439SGeorge Zhang 
2411f166439SGeorge Zhang 	if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) {
2421f166439SGeorge Zhang 		pr_devel("Invalid event data\n");
2431f166439SGeorge Zhang 		return;
2441f166439SGeorge Zhang 	}
2451f166439SGeorge Zhang 
2461f166439SGeorge Zhang 	pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n",
2471f166439SGeorge Zhang 		 vm_context_id, ev_payload->context_id, event_data->event);
2481f166439SGeorge Zhang 
2491f166439SGeorge Zhang 	vm_context_id = ev_payload->context_id;
2501f166439SGeorge Zhang }
2511f166439SGeorge Zhang 
2521f166439SGeorge Zhang /*
2531f166439SGeorge Zhang  * Verify that the host supports the hypercalls we need. If it does not,
254b791da23SChristophe JAILLET  * try to find fallback hypercalls and use those instead.  Returns 0 if
255b791da23SChristophe JAILLET  * required hypercalls (or fallback hypercalls) are supported by the host,
256b791da23SChristophe JAILLET  * an error code otherwise.
2571f166439SGeorge Zhang  */
vmci_check_host_caps(struct pci_dev * pdev)258782f2445SDmitry Torokhov static int vmci_check_host_caps(struct pci_dev *pdev)
2591f166439SGeorge Zhang {
2601f166439SGeorge Zhang 	bool result;
2611f166439SGeorge Zhang 	struct vmci_resource_query_msg *msg;
2621f166439SGeorge Zhang 	u32 msg_size = sizeof(struct vmci_resource_query_hdr) +
2631f166439SGeorge Zhang 				VMCI_UTIL_NUM_RESOURCES * sizeof(u32);
2641f166439SGeorge Zhang 	struct vmci_datagram *check_msg;
2651f166439SGeorge Zhang 
266b2192cfeSTetsuo Handa 	check_msg = kzalloc(msg_size, GFP_KERNEL);
2671f166439SGeorge Zhang 	if (!check_msg) {
2681f166439SGeorge Zhang 		dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__);
269782f2445SDmitry Torokhov 		return -ENOMEM;
2701f166439SGeorge Zhang 	}
2711f166439SGeorge Zhang 
2721f166439SGeorge Zhang 	check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
2731f166439SGeorge Zhang 					  VMCI_RESOURCES_QUERY);
2741f166439SGeorge Zhang 	check_msg->src = VMCI_ANON_SRC_HANDLE;
2751f166439SGeorge Zhang 	check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
2761f166439SGeorge Zhang 	msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg);
2771f166439SGeorge Zhang 
2781f166439SGeorge Zhang 	msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
2791f166439SGeorge Zhang 	msg->resources[0] = VMCI_GET_CONTEXT_ID;
2801f166439SGeorge Zhang 
2811f166439SGeorge Zhang 	/* Checks that hyper calls are supported */
2821f166439SGeorge Zhang 	result = vmci_send_datagram(check_msg) == 0x01;
2831f166439SGeorge Zhang 	kfree(check_msg);
2841f166439SGeorge Zhang 
2851f166439SGeorge Zhang 	dev_dbg(&pdev->dev, "%s: Host capability check: %s\n",
2861f166439SGeorge Zhang 		__func__, result ? "PASSED" : "FAILED");
2871f166439SGeorge Zhang 
2881f166439SGeorge Zhang 	/* We need the vector. There are no fallbacks. */
289782f2445SDmitry Torokhov 	return result ? 0 : -ENXIO;
2901f166439SGeorge Zhang }
2911f166439SGeorge Zhang 
2921f166439SGeorge Zhang /*
293463713ebSJorgen Hansen  * Reads datagrams from the device and dispatches them. For IO port
294463713ebSJorgen Hansen  * based access to the device, we always start reading datagrams into
295463713ebSJorgen Hansen  * only the first page of the datagram buffer. If the datagrams don't
296463713ebSJorgen Hansen  * fit into one page, we use the maximum datagram buffer size for the
297463713ebSJorgen Hansen  * remainder of the invocation. This is a simple heuristic for not
298463713ebSJorgen Hansen  * penalizing small datagrams. For DMA-based datagrams, we always
299463713ebSJorgen Hansen  * use the maximum datagram buffer size, since there is no performance
300463713ebSJorgen Hansen  * penalty for doing so.
3011f166439SGeorge Zhang  *
3021f166439SGeorge Zhang  * This function assumes that it has exclusive access to the data
303463713ebSJorgen Hansen  * in register(s) for the duration of the call.
3041f166439SGeorge Zhang  */
vmci_dispatch_dgs(struct vmci_guest_device * vmci_dev)3053daed634SVishnu Dasa static void vmci_dispatch_dgs(struct vmci_guest_device *vmci_dev)
3061f166439SGeorge Zhang {
3071f166439SGeorge Zhang 	u8 *dg_in_buffer = vmci_dev->data_buffer;
3081f166439SGeorge Zhang 	struct vmci_datagram *dg;
3091f166439SGeorge Zhang 	size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
310463713ebSJorgen Hansen 	size_t current_dg_in_buffer_size;
3111f166439SGeorge Zhang 	size_t remaining_bytes;
312463713ebSJorgen Hansen 	bool is_io_port = vmci_dev->mmio_base == NULL;
3131f166439SGeorge Zhang 
3141f166439SGeorge Zhang 	BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
3151f166439SGeorge Zhang 
316463713ebSJorgen Hansen 	if (!is_io_port) {
317463713ebSJorgen Hansen 		/* For mmio, the first page is used for the header. */
318463713ebSJorgen Hansen 		dg_in_buffer += PAGE_SIZE;
319463713ebSJorgen Hansen 
320463713ebSJorgen Hansen 		/*
321463713ebSJorgen Hansen 		 * For DMA-based datagram operations, there is no performance
322463713ebSJorgen Hansen 		 * penalty for reading the maximum buffer size.
323463713ebSJorgen Hansen 		 */
324463713ebSJorgen Hansen 		current_dg_in_buffer_size = VMCI_MAX_DG_SIZE;
325463713ebSJorgen Hansen 	} else {
326463713ebSJorgen Hansen 		current_dg_in_buffer_size = PAGE_SIZE;
327463713ebSJorgen Hansen 	}
328463713ebSJorgen Hansen 	vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size);
3291f166439SGeorge Zhang 	dg = (struct vmci_datagram *)dg_in_buffer;
3301f166439SGeorge Zhang 	remaining_bytes = current_dg_in_buffer_size;
3311f166439SGeorge Zhang 
332463713ebSJorgen Hansen 	/*
333463713ebSJorgen Hansen 	 * Read through the buffer until an invalid datagram header is
334463713ebSJorgen Hansen 	 * encountered. The exit condition for datagrams read through
335463713ebSJorgen Hansen 	 * VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram
336463713ebSJorgen Hansen 	 * can start on any page boundary in the buffer.
337463713ebSJorgen Hansen 	 */
3381f166439SGeorge Zhang 	while (dg->dst.resource != VMCI_INVALID_ID ||
339463713ebSJorgen Hansen 	       (is_io_port && remaining_bytes > PAGE_SIZE)) {
3401f166439SGeorge Zhang 		unsigned dg_in_size;
3411f166439SGeorge Zhang 
3421f166439SGeorge Zhang 		/*
343463713ebSJorgen Hansen 		 * If using VMCI_DATA_IN_ADDR, skip to the next page
344463713ebSJorgen Hansen 		 * as a datagram can start on any page boundary.
3451f166439SGeorge Zhang 		 */
3461f166439SGeorge Zhang 		if (dg->dst.resource == VMCI_INVALID_ID) {
3471f166439SGeorge Zhang 			dg = (struct vmci_datagram *)roundup(
3481f166439SGeorge Zhang 				(uintptr_t)dg + 1, PAGE_SIZE);
3491f166439SGeorge Zhang 			remaining_bytes =
3501f166439SGeorge Zhang 				(size_t)(dg_in_buffer +
3511f166439SGeorge Zhang 					 current_dg_in_buffer_size -
3521f166439SGeorge Zhang 					 (u8 *)dg);
3531f166439SGeorge Zhang 			continue;
3541f166439SGeorge Zhang 		}
3551f166439SGeorge Zhang 
3561f166439SGeorge Zhang 		dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
3571f166439SGeorge Zhang 
3581f166439SGeorge Zhang 		if (dg_in_size <= dg_in_buffer_size) {
3591f166439SGeorge Zhang 			int result;
3601f166439SGeorge Zhang 
3611f166439SGeorge Zhang 			/*
3621f166439SGeorge Zhang 			 * If the remaining bytes in the datagram
3631f166439SGeorge Zhang 			 * buffer doesn't contain the complete
3641f166439SGeorge Zhang 			 * datagram, we first make sure we have enough
3651f166439SGeorge Zhang 			 * room for it and then we read the reminder
3661f166439SGeorge Zhang 			 * of the datagram and possibly any following
3671f166439SGeorge Zhang 			 * datagrams.
3681f166439SGeorge Zhang 			 */
3691f166439SGeorge Zhang 			if (dg_in_size > remaining_bytes) {
3701f166439SGeorge Zhang 				if (remaining_bytes !=
3711f166439SGeorge Zhang 				    current_dg_in_buffer_size) {
3721f166439SGeorge Zhang 
3731f166439SGeorge Zhang 					/*
3741f166439SGeorge Zhang 					 * We move the partial
3751f166439SGeorge Zhang 					 * datagram to the front and
3761f166439SGeorge Zhang 					 * read the reminder of the
3771f166439SGeorge Zhang 					 * datagram and possibly
3781f166439SGeorge Zhang 					 * following calls into the
3791f166439SGeorge Zhang 					 * following bytes.
3801f166439SGeorge Zhang 					 */
3811f166439SGeorge Zhang 					memmove(dg_in_buffer, dg_in_buffer +
3821f166439SGeorge Zhang 						current_dg_in_buffer_size -
3831f166439SGeorge Zhang 						remaining_bytes,
3841f166439SGeorge Zhang 						remaining_bytes);
3851f166439SGeorge Zhang 					dg = (struct vmci_datagram *)
3861f166439SGeorge Zhang 					    dg_in_buffer;
3871f166439SGeorge Zhang 				}
3881f166439SGeorge Zhang 
3891f166439SGeorge Zhang 				if (current_dg_in_buffer_size !=
3901f166439SGeorge Zhang 				    dg_in_buffer_size)
3911f166439SGeorge Zhang 					current_dg_in_buffer_size =
3921f166439SGeorge Zhang 					    dg_in_buffer_size;
3931f166439SGeorge Zhang 
394463713ebSJorgen Hansen 				vmci_read_data(vmci_dev,
395463713ebSJorgen Hansen 					       dg_in_buffer +
3961f166439SGeorge Zhang 						remaining_bytes,
3971f166439SGeorge Zhang 					       current_dg_in_buffer_size -
3981f166439SGeorge Zhang 						remaining_bytes);
3991f166439SGeorge Zhang 			}
4001f166439SGeorge Zhang 
4011f166439SGeorge Zhang 			/*
4021f166439SGeorge Zhang 			 * We special case event datagrams from the
4031f166439SGeorge Zhang 			 * hypervisor.
4041f166439SGeorge Zhang 			 */
4051f166439SGeorge Zhang 			if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
4061f166439SGeorge Zhang 			    dg->dst.resource == VMCI_EVENT_HANDLER) {
4071f166439SGeorge Zhang 				result = vmci_event_dispatch(dg);
4081f166439SGeorge Zhang 			} else {
4091f166439SGeorge Zhang 				result = vmci_datagram_invoke_guest_handler(dg);
4101f166439SGeorge Zhang 			}
4111f166439SGeorge Zhang 			if (result < VMCI_SUCCESS)
4121f166439SGeorge Zhang 				dev_dbg(vmci_dev->dev,
4131f166439SGeorge Zhang 					"Datagram with resource (ID=0x%x) failed (err=%d)\n",
4141f166439SGeorge Zhang 					 dg->dst.resource, result);
4151f166439SGeorge Zhang 
4161f166439SGeorge Zhang 			/* On to the next datagram. */
4171f166439SGeorge Zhang 			dg = (struct vmci_datagram *)((u8 *)dg +
4181f166439SGeorge Zhang 						      dg_in_size);
4191f166439SGeorge Zhang 		} else {
4201f166439SGeorge Zhang 			size_t bytes_to_skip;
4211f166439SGeorge Zhang 
4221f166439SGeorge Zhang 			/*
4231f166439SGeorge Zhang 			 * Datagram doesn't fit in datagram buffer of maximal
4241f166439SGeorge Zhang 			 * size. We drop it.
4251f166439SGeorge Zhang 			 */
4261f166439SGeorge Zhang 			dev_dbg(vmci_dev->dev,
4271f166439SGeorge Zhang 				"Failed to receive datagram (size=%u bytes)\n",
4281f166439SGeorge Zhang 				 dg_in_size);
4291f166439SGeorge Zhang 
4301f166439SGeorge Zhang 			bytes_to_skip = dg_in_size - remaining_bytes;
4311f166439SGeorge Zhang 			if (current_dg_in_buffer_size != dg_in_buffer_size)
4321f166439SGeorge Zhang 				current_dg_in_buffer_size = dg_in_buffer_size;
4331f166439SGeorge Zhang 
4341f166439SGeorge Zhang 			for (;;) {
435463713ebSJorgen Hansen 				vmci_read_data(vmci_dev, dg_in_buffer,
4361f166439SGeorge Zhang 					       current_dg_in_buffer_size);
4371f166439SGeorge Zhang 				if (bytes_to_skip <= current_dg_in_buffer_size)
4381f166439SGeorge Zhang 					break;
4391f166439SGeorge Zhang 
4401f166439SGeorge Zhang 				bytes_to_skip -= current_dg_in_buffer_size;
4411f166439SGeorge Zhang 			}
4421f166439SGeorge Zhang 			dg = (struct vmci_datagram *)(dg_in_buffer +
4431f166439SGeorge Zhang 						      bytes_to_skip);
4441f166439SGeorge Zhang 		}
4451f166439SGeorge Zhang 
4461f166439SGeorge Zhang 		remaining_bytes =
4471f166439SGeorge Zhang 		    (size_t) (dg_in_buffer + current_dg_in_buffer_size -
4481f166439SGeorge Zhang 			      (u8 *)dg);
4491f166439SGeorge Zhang 
4501f166439SGeorge Zhang 		if (remaining_bytes < VMCI_DG_HEADERSIZE) {
4511f166439SGeorge Zhang 			/* Get the next batch of datagrams. */
4521f166439SGeorge Zhang 
453463713ebSJorgen Hansen 			vmci_read_data(vmci_dev, dg_in_buffer,
4541f166439SGeorge Zhang 				    current_dg_in_buffer_size);
4551f166439SGeorge Zhang 			dg = (struct vmci_datagram *)dg_in_buffer;
4561f166439SGeorge Zhang 			remaining_bytes = current_dg_in_buffer_size;
4571f166439SGeorge Zhang 		}
4581f166439SGeorge Zhang 	}
4591f166439SGeorge Zhang }
4601f166439SGeorge Zhang 
4611f166439SGeorge Zhang /*
4621f166439SGeorge Zhang  * Scans the notification bitmap for raised flags, clears them
4631f166439SGeorge Zhang  * and handles the notifications.
4641f166439SGeorge Zhang  */
vmci_process_bitmap(struct vmci_guest_device * dev)4653daed634SVishnu Dasa static void vmci_process_bitmap(struct vmci_guest_device *dev)
4661f166439SGeorge Zhang {
4671f166439SGeorge Zhang 	if (!dev->notification_bitmap) {
4681f166439SGeorge Zhang 		dev_dbg(dev->dev, "No bitmap present in %s\n", __func__);
4691f166439SGeorge Zhang 		return;
4701f166439SGeorge Zhang 	}
4711f166439SGeorge Zhang 
4721f166439SGeorge Zhang 	vmci_dbell_scan_notification_entries(dev->notification_bitmap);
4731f166439SGeorge Zhang }
4741f166439SGeorge Zhang 
4751f166439SGeorge Zhang /*
4761f166439SGeorge Zhang  * Interrupt handler for legacy or MSI interrupt, or for first MSI-X
4771f166439SGeorge Zhang  * interrupt (vector VMCI_INTR_DATAGRAM).
4781f166439SGeorge Zhang  */
vmci_interrupt(int irq,void * _dev)4791f166439SGeorge Zhang static irqreturn_t vmci_interrupt(int irq, void *_dev)
4801f166439SGeorge Zhang {
4811f166439SGeorge Zhang 	struct vmci_guest_device *dev = _dev;
4821f166439SGeorge Zhang 
4831f166439SGeorge Zhang 	/*
4843daed634SVishnu Dasa 	 * If we are using MSI-X with exclusive vectors then we simply call
4853daed634SVishnu Dasa 	 * vmci_dispatch_dgs(), since we know the interrupt was meant for us.
4861f166439SGeorge Zhang 	 * Otherwise we must read the ICR to determine what to do.
4871f166439SGeorge Zhang 	 */
4881f166439SGeorge Zhang 
4893bb434cdSChristoph Hellwig 	if (dev->exclusive_vectors) {
4903daed634SVishnu Dasa 		vmci_dispatch_dgs(dev);
4911f166439SGeorge Zhang 	} else {
4921f166439SGeorge Zhang 		unsigned int icr;
4931f166439SGeorge Zhang 
4941f166439SGeorge Zhang 		/* Acknowledge interrupt and determine what needs doing. */
495e283a0e8SJorgen Hansen 		icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
4961f166439SGeorge Zhang 		if (icr == 0 || icr == ~0)
4971f166439SGeorge Zhang 			return IRQ_NONE;
4981f166439SGeorge Zhang 
4991f166439SGeorge Zhang 		if (icr & VMCI_ICR_DATAGRAM) {
5003daed634SVishnu Dasa 			vmci_dispatch_dgs(dev);
5011f166439SGeorge Zhang 			icr &= ~VMCI_ICR_DATAGRAM;
5021f166439SGeorge Zhang 		}
5031f166439SGeorge Zhang 
5041f166439SGeorge Zhang 		if (icr & VMCI_ICR_NOTIFICATION) {
5053daed634SVishnu Dasa 			vmci_process_bitmap(dev);
5061f166439SGeorge Zhang 			icr &= ~VMCI_ICR_NOTIFICATION;
5071f166439SGeorge Zhang 		}
5081f166439SGeorge Zhang 
509463713ebSJorgen Hansen 
510463713ebSJorgen Hansen 		if (icr & VMCI_ICR_DMA_DATAGRAM) {
511463713ebSJorgen Hansen 			wake_up_all(&dev->inout_wq);
512cc68f217SJorgen Hansen 			icr &= ~VMCI_ICR_DMA_DATAGRAM;
513463713ebSJorgen Hansen 		}
514cc68f217SJorgen Hansen 
5151f166439SGeorge Zhang 		if (icr != 0)
5161f166439SGeorge Zhang 			dev_warn(dev->dev,
5171f166439SGeorge Zhang 				 "Ignoring unknown interrupt cause (%d)\n",
5181f166439SGeorge Zhang 				 icr);
5191f166439SGeorge Zhang 	}
5201f166439SGeorge Zhang 
5211f166439SGeorge Zhang 	return IRQ_HANDLED;
5221f166439SGeorge Zhang }
5231f166439SGeorge Zhang 
5241f166439SGeorge Zhang /*
5251f166439SGeorge Zhang  * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
5261f166439SGeorge Zhang  * which is for the notification bitmap.  Will only get called if we are
5271f166439SGeorge Zhang  * using MSI-X with exclusive vectors.
5281f166439SGeorge Zhang  */
vmci_interrupt_bm(int irq,void * _dev)5291f166439SGeorge Zhang static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
5301f166439SGeorge Zhang {
5311f166439SGeorge Zhang 	struct vmci_guest_device *dev = _dev;
5321f166439SGeorge Zhang 
5331f166439SGeorge Zhang 	/* For MSI-X we can just assume it was meant for us. */
5343daed634SVishnu Dasa 	vmci_process_bitmap(dev);
5351f166439SGeorge Zhang 
5361f166439SGeorge Zhang 	return IRQ_HANDLED;
5371f166439SGeorge Zhang }
5381f166439SGeorge Zhang 
5391f166439SGeorge Zhang /*
540cc68f217SJorgen Hansen  * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM,
541cc68f217SJorgen Hansen  * which is for the completion of a DMA datagram send or receive operation.
542cc68f217SJorgen Hansen  * Will only get called if we are using MSI-X with exclusive vectors.
543cc68f217SJorgen Hansen  */
vmci_interrupt_dma_datagram(int irq,void * _dev)544cc68f217SJorgen Hansen static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev)
545cc68f217SJorgen Hansen {
546463713ebSJorgen Hansen 	struct vmci_guest_device *dev = _dev;
547463713ebSJorgen Hansen 
548463713ebSJorgen Hansen 	wake_up_all(&dev->inout_wq);
549463713ebSJorgen Hansen 
550cc68f217SJorgen Hansen 	return IRQ_HANDLED;
551cc68f217SJorgen Hansen }
552cc68f217SJorgen Hansen 
vmci_free_dg_buffers(struct vmci_guest_device * vmci_dev)5535ee10982SJorgen Hansen static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev)
5545ee10982SJorgen Hansen {
5555ee10982SJorgen Hansen 	if (vmci_dev->mmio_base != NULL) {
5565ee10982SJorgen Hansen 		if (vmci_dev->tx_buffer != NULL)
5575ee10982SJorgen Hansen 			dma_free_coherent(vmci_dev->dev,
5585ee10982SJorgen Hansen 					  VMCI_DMA_DG_BUFFER_SIZE,
5595ee10982SJorgen Hansen 					  vmci_dev->tx_buffer,
5605ee10982SJorgen Hansen 					  vmci_dev->tx_buffer_base);
5615ee10982SJorgen Hansen 		if (vmci_dev->data_buffer != NULL)
5625ee10982SJorgen Hansen 			dma_free_coherent(vmci_dev->dev,
5635ee10982SJorgen Hansen 					  VMCI_DMA_DG_BUFFER_SIZE,
5645ee10982SJorgen Hansen 					  vmci_dev->data_buffer,
5655ee10982SJorgen Hansen 					  vmci_dev->data_buffer_base);
5665ee10982SJorgen Hansen 	} else {
5675ee10982SJorgen Hansen 		vfree(vmci_dev->data_buffer);
5685ee10982SJorgen Hansen 	}
5695ee10982SJorgen Hansen }
5705ee10982SJorgen Hansen 
571cc68f217SJorgen Hansen /*
5721f166439SGeorge Zhang  * Most of the initialization at module load time is done here.
5731f166439SGeorge Zhang  */
vmci_guest_probe_device(struct pci_dev * pdev,const struct pci_device_id * id)5741f166439SGeorge Zhang static int vmci_guest_probe_device(struct pci_dev *pdev,
5751f166439SGeorge Zhang 				   const struct pci_device_id *id)
5761f166439SGeorge Zhang {
5771f166439SGeorge Zhang 	struct vmci_guest_device *vmci_dev;
578e283a0e8SJorgen Hansen 	void __iomem *iobase = NULL;
579e283a0e8SJorgen Hansen 	void __iomem *mmio_base = NULL;
580cc68f217SJorgen Hansen 	unsigned int num_irq_vectors;
5811f166439SGeorge Zhang 	unsigned int capabilities;
582f2db7361SVishnu DASA 	unsigned int caps_in_use;
5831f166439SGeorge Zhang 	unsigned long cmd;
5841f166439SGeorge Zhang 	int vmci_err;
5851f166439SGeorge Zhang 	int error;
5861f166439SGeorge Zhang 
5871f166439SGeorge Zhang 	dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n");
5881f166439SGeorge Zhang 
5891f166439SGeorge Zhang 	error = pcim_enable_device(pdev);
5901f166439SGeorge Zhang 	if (error) {
5911f166439SGeorge Zhang 		dev_err(&pdev->dev,
5921f166439SGeorge Zhang 			"Failed to enable VMCI device: %d\n", error);
5931f166439SGeorge Zhang 		return error;
5941f166439SGeorge Zhang 	}
5951f166439SGeorge Zhang 
596e283a0e8SJorgen Hansen 	/*
597e283a0e8SJorgen Hansen 	 * The VMCI device with mmio access to registers requests 256KB
598e283a0e8SJorgen Hansen 	 * for BAR1. If present, driver will use new VMCI device
599e283a0e8SJorgen Hansen 	 * functionality for register access and datagram send/recv.
600e283a0e8SJorgen Hansen 	 */
601e283a0e8SJorgen Hansen 
602e283a0e8SJorgen Hansen 	if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
603e283a0e8SJorgen Hansen 		dev_info(&pdev->dev, "MMIO register access is available\n");
604e283a0e8SJorgen Hansen 		mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET,
605e283a0e8SJorgen Hansen 					    VMCI_MMIO_ACCESS_SIZE);
606e283a0e8SJorgen Hansen 		/* If the map fails, we fall back to IOIO access. */
607e283a0e8SJorgen Hansen 		if (!mmio_base)
608e283a0e8SJorgen Hansen 			dev_warn(&pdev->dev, "Failed to map MMIO register access\n");
609e283a0e8SJorgen Hansen 	}
610e283a0e8SJorgen Hansen 
611e283a0e8SJorgen Hansen 	if (!mmio_base) {
6121f714291SVishnu Dasa 		if (IS_ENABLED(CONFIG_ARM64)) {
6131f714291SVishnu Dasa 			dev_err(&pdev->dev, "MMIO base is invalid\n");
6141f714291SVishnu Dasa 			return -ENXIO;
6151f714291SVishnu Dasa 		}
616e283a0e8SJorgen Hansen 		error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
6171f166439SGeorge Zhang 		if (error) {
6181f166439SGeorge Zhang 			dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
6191f166439SGeorge Zhang 			return error;
6201f166439SGeorge Zhang 		}
6211f166439SGeorge Zhang 		iobase = pcim_iomap_table(pdev)[0];
622e283a0e8SJorgen Hansen 	}
6231f166439SGeorge Zhang 
6241f166439SGeorge Zhang 	vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL);
6251f166439SGeorge Zhang 	if (!vmci_dev) {
6261f166439SGeorge Zhang 		dev_err(&pdev->dev,
6271f166439SGeorge Zhang 			"Can't allocate memory for VMCI device\n");
6286954ae01SChristophe JAILLET 		error = -ENOMEM;
6296954ae01SChristophe JAILLET 		goto err_unmap_mmio_base;
6301f166439SGeorge Zhang 	}
6311f166439SGeorge Zhang 
6321f166439SGeorge Zhang 	vmci_dev->dev = &pdev->dev;
6331f166439SGeorge Zhang 	vmci_dev->exclusive_vectors = false;
6341f166439SGeorge Zhang 	vmci_dev->iobase = iobase;
635e283a0e8SJorgen Hansen 	vmci_dev->mmio_base = mmio_base;
6361f166439SGeorge Zhang 
637463713ebSJorgen Hansen 	init_waitqueue_head(&vmci_dev->inout_wq);
6381f166439SGeorge Zhang 
6395ee10982SJorgen Hansen 	if (mmio_base != NULL) {
6405ee10982SJorgen Hansen 		vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
6415ee10982SJorgen Hansen 							 &vmci_dev->tx_buffer_base,
6425ee10982SJorgen Hansen 							 GFP_KERNEL);
6435ee10982SJorgen Hansen 		if (!vmci_dev->tx_buffer) {
6445ee10982SJorgen Hansen 			dev_err(&pdev->dev,
6455ee10982SJorgen Hansen 				"Can't allocate memory for datagram tx buffer\n");
6466954ae01SChristophe JAILLET 			error = -ENOMEM;
6476954ae01SChristophe JAILLET 			goto err_unmap_mmio_base;
6485ee10982SJorgen Hansen 		}
6495ee10982SJorgen Hansen 
6505ee10982SJorgen Hansen 		vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
6515ee10982SJorgen Hansen 							   &vmci_dev->data_buffer_base,
6525ee10982SJorgen Hansen 							   GFP_KERNEL);
6535ee10982SJorgen Hansen 	} else {
6541f166439SGeorge Zhang 		vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
6555ee10982SJorgen Hansen 	}
6561f166439SGeorge Zhang 	if (!vmci_dev->data_buffer) {
6571f166439SGeorge Zhang 		dev_err(&pdev->dev,
6581f166439SGeorge Zhang 			"Can't allocate memory for datagram buffer\n");
6595ee10982SJorgen Hansen 		error = -ENOMEM;
6605ee10982SJorgen Hansen 		goto err_free_data_buffers;
6611f166439SGeorge Zhang 	}
6621f166439SGeorge Zhang 
6631f166439SGeorge Zhang 	pci_set_master(pdev);	/* To enable queue_pair functionality. */
6641f166439SGeorge Zhang 
6651f166439SGeorge Zhang 	/*
6661f166439SGeorge Zhang 	 * Verify that the VMCI Device supports the capabilities that
6671f166439SGeorge Zhang 	 * we need. If the device is missing capabilities that we would
6681f166439SGeorge Zhang 	 * like to use, check for fallback capabilities and use those
6691f166439SGeorge Zhang 	 * instead (so we can run a new VM on old hosts). Fail the load if
6701f166439SGeorge Zhang 	 * a required capability is missing and there is no fallback.
6711f166439SGeorge Zhang 	 *
6721f166439SGeorge Zhang 	 * Right now, we need datagrams. There are no fallbacks.
6731f166439SGeorge Zhang 	 */
674e283a0e8SJorgen Hansen 	capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR);
6751f166439SGeorge Zhang 	if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
6761f166439SGeorge Zhang 		dev_err(&pdev->dev, "Device does not support datagrams\n");
6771f166439SGeorge Zhang 		error = -ENXIO;
6785ee10982SJorgen Hansen 		goto err_free_data_buffers;
6791f166439SGeorge Zhang 	}
680f2db7361SVishnu DASA 	caps_in_use = VMCI_CAPS_DATAGRAM;
681f2db7361SVishnu DASA 
682f2db7361SVishnu DASA 	/*
683f2db7361SVishnu DASA 	 * Use 64-bit PPNs if the device supports.
684f2db7361SVishnu DASA 	 *
685f2db7361SVishnu DASA 	 * There is no check for the return value of dma_set_mask_and_coherent
686f2db7361SVishnu DASA 	 * since this driver can handle the default mask values if
687f2db7361SVishnu DASA 	 * dma_set_mask_and_coherent fails.
688f2db7361SVishnu DASA 	 */
689f2db7361SVishnu DASA 	if (capabilities & VMCI_CAPS_PPN64) {
690f2db7361SVishnu DASA 		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
691f2db7361SVishnu DASA 		use_ppn64 = true;
692f2db7361SVishnu DASA 		caps_in_use |= VMCI_CAPS_PPN64;
693f2db7361SVishnu DASA 	} else {
694f2db7361SVishnu DASA 		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
695f2db7361SVishnu DASA 		use_ppn64 = false;
696f2db7361SVishnu DASA 	}
6971f166439SGeorge Zhang 
6981f166439SGeorge Zhang 	/*
6991f166439SGeorge Zhang 	 * If the hardware supports notifications, we will use that as
7001f166439SGeorge Zhang 	 * well.
7011f166439SGeorge Zhang 	 */
7021f166439SGeorge Zhang 	if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
7036d6dfb4fSAndy King 		vmci_dev->notification_bitmap = dma_alloc_coherent(
7046d6dfb4fSAndy King 			&pdev->dev, PAGE_SIZE, &vmci_dev->notification_base,
7056d6dfb4fSAndy King 			GFP_KERNEL);
706cc312da2SChristophe JAILLET 		if (!vmci_dev->notification_bitmap)
7071f166439SGeorge Zhang 			dev_warn(&pdev->dev,
7081f166439SGeorge Zhang 				 "Unable to allocate notification bitmap\n");
709cc312da2SChristophe JAILLET 		else
710f2db7361SVishnu DASA 			caps_in_use |= VMCI_CAPS_NOTIFICATIONS;
7111f166439SGeorge Zhang 	}
7121f166439SGeorge Zhang 
713eed2298dSJorgen Hansen 	if (mmio_base != NULL) {
714eed2298dSJorgen Hansen 		if (capabilities & VMCI_CAPS_DMA_DATAGRAM) {
715eed2298dSJorgen Hansen 			caps_in_use |= VMCI_CAPS_DMA_DATAGRAM;
716eed2298dSJorgen Hansen 		} else {
717eed2298dSJorgen Hansen 			dev_err(&pdev->dev,
718eed2298dSJorgen Hansen 				"Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
719eed2298dSJorgen Hansen 			error = -ENXIO;
720c8e9b30cSVishnu Dasa 			goto err_free_notification_bitmap;
721eed2298dSJorgen Hansen 		}
722eed2298dSJorgen Hansen 	}
723eed2298dSJorgen Hansen 
724f2db7361SVishnu DASA 	dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use);
7251f166439SGeorge Zhang 
7261f166439SGeorge Zhang 	/* Let the host know which capabilities we intend to use. */
727e283a0e8SJorgen Hansen 	vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
7281f166439SGeorge Zhang 
7295ee10982SJorgen Hansen 	if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
7308cb520beSJorgen Hansen 		/* Let the device know the size for pages passed down. */
7318cb520beSJorgen Hansen 		vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
7328cb520beSJorgen Hansen 
7335ee10982SJorgen Hansen 		/* Configure the high order parts of the data in/out buffers. */
7345ee10982SJorgen Hansen 		vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base),
7355ee10982SJorgen Hansen 			       VMCI_DATA_IN_HIGH_ADDR);
7365ee10982SJorgen Hansen 		vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base),
7375ee10982SJorgen Hansen 			       VMCI_DATA_OUT_HIGH_ADDR);
7385ee10982SJorgen Hansen 	}
7395ee10982SJorgen Hansen 
7401f166439SGeorge Zhang 	/* Set up global device so that we can start sending datagrams */
7411f166439SGeorge Zhang 	spin_lock_irq(&vmci_dev_spinlock);
7421f166439SGeorge Zhang 	vmci_dev_g = vmci_dev;
7436d6dfb4fSAndy King 	vmci_pdev = pdev;
7441f166439SGeorge Zhang 	spin_unlock_irq(&vmci_dev_spinlock);
7451f166439SGeorge Zhang 
7461f166439SGeorge Zhang 	/*
7471f166439SGeorge Zhang 	 * Register notification bitmap with device if that capability is
7481f166439SGeorge Zhang 	 * used.
7491f166439SGeorge Zhang 	 */
750f2db7361SVishnu DASA 	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) {
7516d6dfb4fSAndy King 		unsigned long bitmap_ppn =
7526d6dfb4fSAndy King 			vmci_dev->notification_base >> PAGE_SHIFT;
7531f166439SGeorge Zhang 		if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
7541f166439SGeorge Zhang 			dev_warn(&pdev->dev,
755f2db7361SVishnu DASA 				 "VMCI device unable to register notification bitmap with PPN 0x%lx\n",
756f2db7361SVishnu DASA 				 bitmap_ppn);
757782f2445SDmitry Torokhov 			error = -ENXIO;
7581f166439SGeorge Zhang 			goto err_remove_vmci_dev_g;
7591f166439SGeorge Zhang 		}
7601f166439SGeorge Zhang 	}
7611f166439SGeorge Zhang 
7621f166439SGeorge Zhang 	/* Check host capabilities. */
763782f2445SDmitry Torokhov 	error = vmci_check_host_caps(pdev);
764782f2445SDmitry Torokhov 	if (error)
76577e86161SChristophe JAILLET 		goto err_remove_vmci_dev_g;
7661f166439SGeorge Zhang 
7671f166439SGeorge Zhang 	/* Enable device. */
7681f166439SGeorge Zhang 
7691f166439SGeorge Zhang 	/*
7701f166439SGeorge Zhang 	 * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can
7711f166439SGeorge Zhang 	 * update the internal context id when needed.
7721f166439SGeorge Zhang 	 */
7731f166439SGeorge Zhang 	vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE,
7741f166439SGeorge Zhang 					vmci_guest_cid_update, NULL,
7751f166439SGeorge Zhang 					&ctx_update_sub_id);
7761f166439SGeorge Zhang 	if (vmci_err < VMCI_SUCCESS)
7771f166439SGeorge Zhang 		dev_warn(&pdev->dev,
7781f166439SGeorge Zhang 			 "Failed to subscribe to event (type=%d): %d\n",
7791f166439SGeorge Zhang 			 VMCI_EVENT_CTX_ID_UPDATE, vmci_err);
7801f166439SGeorge Zhang 
7811f166439SGeorge Zhang 	/*
7821f166439SGeorge Zhang 	 * Enable interrupts.  Try MSI-X first, then MSI, and then fallback on
7831f166439SGeorge Zhang 	 * legacy interrupts.
7841f166439SGeorge Zhang 	 */
785cc68f217SJorgen Hansen 	if (vmci_dev->mmio_base != NULL)
786cc68f217SJorgen Hansen 		num_irq_vectors = VMCI_MAX_INTRS;
787cc68f217SJorgen Hansen 	else
788cc68f217SJorgen Hansen 		num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION;
789cc68f217SJorgen Hansen 	error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors,
7903bb434cdSChristoph Hellwig 				      PCI_IRQ_MSIX);
791c3423563SChristoph Hellwig 	if (error < 0) {
7923bb434cdSChristoph Hellwig 		error = pci_alloc_irq_vectors(pdev, 1, 1,
7933bb434cdSChristoph Hellwig 				PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
794c3423563SChristoph Hellwig 		if (error < 0)
79577e86161SChristophe JAILLET 			goto err_unsubscribe_event;
7961f166439SGeorge Zhang 	} else {
7973bb434cdSChristoph Hellwig 		vmci_dev->exclusive_vectors = true;
7981f166439SGeorge Zhang 	}
7991f166439SGeorge Zhang 
8001f166439SGeorge Zhang 	/*
8011f166439SGeorge Zhang 	 * Request IRQ for legacy or MSI interrupts, or for first
8021f166439SGeorge Zhang 	 * MSI-X vector.
8031f166439SGeorge Zhang 	 */
8043daed634SVishnu Dasa 	error = request_threaded_irq(pci_irq_vector(pdev, 0), NULL,
8053daed634SVishnu Dasa 				     vmci_interrupt, IRQF_SHARED,
8063daed634SVishnu Dasa 				     KBUILD_MODNAME, vmci_dev);
8071f166439SGeorge Zhang 	if (error) {
8081f166439SGeorge Zhang 		dev_err(&pdev->dev, "Irq %u in use: %d\n",
8093bb434cdSChristoph Hellwig 			pci_irq_vector(pdev, 0), error);
8101f166439SGeorge Zhang 		goto err_disable_msi;
8111f166439SGeorge Zhang 	}
8121f166439SGeorge Zhang 
8131f166439SGeorge Zhang 	/*
8141f166439SGeorge Zhang 	 * For MSI-X with exclusive vectors we need to request an
8151f166439SGeorge Zhang 	 * interrupt for each vector so that we get a separate
8161f166439SGeorge Zhang 	 * interrupt handler routine.  This allows us to distinguish
8171f166439SGeorge Zhang 	 * between the vectors.
8181f166439SGeorge Zhang 	 */
8191f166439SGeorge Zhang 	if (vmci_dev->exclusive_vectors) {
8203daed634SVishnu Dasa 		error = request_threaded_irq(pci_irq_vector(pdev, 1), NULL,
8213daed634SVishnu Dasa 					     vmci_interrupt_bm, 0,
8223daed634SVishnu Dasa 					     KBUILD_MODNAME, vmci_dev);
8231f166439SGeorge Zhang 		if (error) {
8241f166439SGeorge Zhang 			dev_err(&pdev->dev,
8251f166439SGeorge Zhang 				"Failed to allocate irq %u: %d\n",
8263bb434cdSChristoph Hellwig 				pci_irq_vector(pdev, 1), error);
8271f166439SGeorge Zhang 			goto err_free_irq;
8281f166439SGeorge Zhang 		}
829cc68f217SJorgen Hansen 		if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
8303daed634SVishnu Dasa 			error = request_threaded_irq(pci_irq_vector(pdev, 2),
8313daed634SVishnu Dasa 						     NULL,
832cc68f217SJorgen Hansen 						    vmci_interrupt_dma_datagram,
8333daed634SVishnu Dasa 						     0, KBUILD_MODNAME,
8343daed634SVishnu Dasa 						     vmci_dev);
835cc68f217SJorgen Hansen 			if (error) {
836cc68f217SJorgen Hansen 				dev_err(&pdev->dev,
837cc68f217SJorgen Hansen 					"Failed to allocate irq %u: %d\n",
838cc68f217SJorgen Hansen 					pci_irq_vector(pdev, 2), error);
839cc68f217SJorgen Hansen 				goto err_free_bm_irq;
840cc68f217SJorgen Hansen 			}
841cc68f217SJorgen Hansen 		}
8421f166439SGeorge Zhang 	}
8431f166439SGeorge Zhang 
8441f166439SGeorge Zhang 	dev_dbg(&pdev->dev, "Registered device\n");
8451f166439SGeorge Zhang 
8461f166439SGeorge Zhang 	atomic_inc(&vmci_num_guest_devices);
8471f166439SGeorge Zhang 
8481f166439SGeorge Zhang 	/* Enable specific interrupt bits. */
8491f166439SGeorge Zhang 	cmd = VMCI_IMR_DATAGRAM;
850f2db7361SVishnu DASA 	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
8511f166439SGeorge Zhang 		cmd |= VMCI_IMR_NOTIFICATION;
852cc68f217SJorgen Hansen 	if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM)
853cc68f217SJorgen Hansen 		cmd |= VMCI_IMR_DMA_DATAGRAM;
854e283a0e8SJorgen Hansen 	vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR);
8551f166439SGeorge Zhang 
8561f166439SGeorge Zhang 	/* Enable interrupts. */
857e283a0e8SJorgen Hansen 	vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR);
8581f166439SGeorge Zhang 
8591f166439SGeorge Zhang 	pci_set_drvdata(pdev, vmci_dev);
860b1bba80aSStefano Garzarella 
861b1bba80aSStefano Garzarella 	vmci_call_vsock_callback(false);
8621f166439SGeorge Zhang 	return 0;
8631f166439SGeorge Zhang 
864cc68f217SJorgen Hansen err_free_bm_irq:
8655df0e734SVishnu Dasa 	if (vmci_dev->exclusive_vectors)
866cc68f217SJorgen Hansen 		free_irq(pci_irq_vector(pdev, 1), vmci_dev);
8675df0e734SVishnu Dasa 
8681f166439SGeorge Zhang err_free_irq:
8693bb434cdSChristoph Hellwig 	free_irq(pci_irq_vector(pdev, 0), vmci_dev);
8701f166439SGeorge Zhang 
8711f166439SGeorge Zhang err_disable_msi:
8723bb434cdSChristoph Hellwig 	pci_free_irq_vectors(pdev);
8731f166439SGeorge Zhang 
87477e86161SChristophe JAILLET err_unsubscribe_event:
8751f166439SGeorge Zhang 	vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
8761f166439SGeorge Zhang 	if (vmci_err < VMCI_SUCCESS)
8771f166439SGeorge Zhang 		dev_warn(&pdev->dev,
8781f166439SGeorge Zhang 			 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
8791f166439SGeorge Zhang 			 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
8801f166439SGeorge Zhang 
88177e86161SChristophe JAILLET err_remove_vmci_dev_g:
88277e86161SChristophe JAILLET 	spin_lock_irq(&vmci_dev_spinlock);
88377e86161SChristophe JAILLET 	vmci_pdev = NULL;
88477e86161SChristophe JAILLET 	vmci_dev_g = NULL;
88577e86161SChristophe JAILLET 	spin_unlock_irq(&vmci_dev_spinlock);
88677e86161SChristophe JAILLET 
887c8e9b30cSVishnu Dasa err_free_notification_bitmap:
8881f166439SGeorge Zhang 	if (vmci_dev->notification_bitmap) {
889e283a0e8SJorgen Hansen 		vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
8906d6dfb4fSAndy King 		dma_free_coherent(&pdev->dev, PAGE_SIZE,
8916d6dfb4fSAndy King 				  vmci_dev->notification_bitmap,
8926d6dfb4fSAndy King 				  vmci_dev->notification_base);
8931f166439SGeorge Zhang 	}
8941f166439SGeorge Zhang 
8955ee10982SJorgen Hansen err_free_data_buffers:
8965ee10982SJorgen Hansen 	vmci_free_dg_buffers(vmci_dev);
8971f166439SGeorge Zhang 
8986954ae01SChristophe JAILLET err_unmap_mmio_base:
8996954ae01SChristophe JAILLET 	if (mmio_base != NULL)
9006954ae01SChristophe JAILLET 		pci_iounmap(pdev, mmio_base);
9016954ae01SChristophe JAILLET 
9021f166439SGeorge Zhang 	/* The rest are managed resources and will be freed by PCI core */
9031f166439SGeorge Zhang 	return error;
9041f166439SGeorge Zhang }
9051f166439SGeorge Zhang 
vmci_guest_remove_device(struct pci_dev * pdev)9061f166439SGeorge Zhang static void vmci_guest_remove_device(struct pci_dev *pdev)
9071f166439SGeorge Zhang {
9081f166439SGeorge Zhang 	struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev);
9091f166439SGeorge Zhang 	int vmci_err;
9101f166439SGeorge Zhang 
9111f166439SGeorge Zhang 	dev_dbg(&pdev->dev, "Removing device\n");
9121f166439SGeorge Zhang 
9131f166439SGeorge Zhang 	atomic_dec(&vmci_num_guest_devices);
9141f166439SGeorge Zhang 
9151f166439SGeorge Zhang 	vmci_qp_guest_endpoints_exit();
9161f166439SGeorge Zhang 
9171f166439SGeorge Zhang 	vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
9181f166439SGeorge Zhang 	if (vmci_err < VMCI_SUCCESS)
9191f166439SGeorge Zhang 		dev_warn(&pdev->dev,
9201f166439SGeorge Zhang 			 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
9211f166439SGeorge Zhang 			 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
9221f166439SGeorge Zhang 
9231f166439SGeorge Zhang 	spin_lock_irq(&vmci_dev_spinlock);
9241f166439SGeorge Zhang 	vmci_dev_g = NULL;
9256d6dfb4fSAndy King 	vmci_pdev = NULL;
9261f166439SGeorge Zhang 	spin_unlock_irq(&vmci_dev_spinlock);
9271f166439SGeorge Zhang 
9281f166439SGeorge Zhang 	dev_dbg(&pdev->dev, "Resetting vmci device\n");
929e283a0e8SJorgen Hansen 	vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
9301f166439SGeorge Zhang 
9311f166439SGeorge Zhang 	/*
9321f166439SGeorge Zhang 	 * Free IRQ and then disable MSI/MSI-X as appropriate.  For
9331f166439SGeorge Zhang 	 * MSI-X, we might have multiple vectors, each with their own
9341f166439SGeorge Zhang 	 * IRQ, which we must free too.
9351f166439SGeorge Zhang 	 */
936cc68f217SJorgen Hansen 	if (vmci_dev->exclusive_vectors) {
9373bb434cdSChristoph Hellwig 		free_irq(pci_irq_vector(pdev, 1), vmci_dev);
938cc68f217SJorgen Hansen 		if (vmci_dev->mmio_base != NULL)
939cc68f217SJorgen Hansen 			free_irq(pci_irq_vector(pdev, 2), vmci_dev);
940cc68f217SJorgen Hansen 	}
9413bb434cdSChristoph Hellwig 	free_irq(pci_irq_vector(pdev, 0), vmci_dev);
9423bb434cdSChristoph Hellwig 	pci_free_irq_vectors(pdev);
9431f166439SGeorge Zhang 
9441f166439SGeorge Zhang 	if (vmci_dev->notification_bitmap) {
9451f166439SGeorge Zhang 		/*
9461f166439SGeorge Zhang 		 * The device reset above cleared the bitmap state of the
9471f166439SGeorge Zhang 		 * device, so we can safely free it here.
9481f166439SGeorge Zhang 		 */
9491f166439SGeorge Zhang 
9506d6dfb4fSAndy King 		dma_free_coherent(&pdev->dev, PAGE_SIZE,
9516d6dfb4fSAndy King 				  vmci_dev->notification_bitmap,
9526d6dfb4fSAndy King 				  vmci_dev->notification_base);
9531f166439SGeorge Zhang 	}
9541f166439SGeorge Zhang 
9555ee10982SJorgen Hansen 	vmci_free_dg_buffers(vmci_dev);
9565ee10982SJorgen Hansen 
9575ee10982SJorgen Hansen 	if (vmci_dev->mmio_base != NULL)
9585ee10982SJorgen Hansen 		pci_iounmap(pdev, vmci_dev->mmio_base);
9591f166439SGeorge Zhang 
9601f166439SGeorge Zhang 	/* The rest are managed resources and will be freed by PCI core */
9611f166439SGeorge Zhang }
9621f166439SGeorge Zhang 
96332182cd3SGreg Kroah-Hartman static const struct pci_device_id vmci_ids[] = {
9641f166439SGeorge Zhang 	{ PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), },
9651f166439SGeorge Zhang 	{ 0 },
9661f166439SGeorge Zhang };
9671f166439SGeorge Zhang MODULE_DEVICE_TABLE(pci, vmci_ids);
9681f166439SGeorge Zhang 
9691f166439SGeorge Zhang static struct pci_driver vmci_guest_driver = {
9701f166439SGeorge Zhang 	.name		= KBUILD_MODNAME,
9711f166439SGeorge Zhang 	.id_table	= vmci_ids,
9721f166439SGeorge Zhang 	.probe		= vmci_guest_probe_device,
9731f166439SGeorge Zhang 	.remove		= vmci_guest_remove_device,
9741f166439SGeorge Zhang };
9751f166439SGeorge Zhang 
vmci_guest_init(void)9761f166439SGeorge Zhang int __init vmci_guest_init(void)
9771f166439SGeorge Zhang {
9781f166439SGeorge Zhang 	return pci_register_driver(&vmci_guest_driver);
9791f166439SGeorge Zhang }
9801f166439SGeorge Zhang 
vmci_guest_exit(void)9811f166439SGeorge Zhang void __exit vmci_guest_exit(void)
9821f166439SGeorge Zhang {
9831f166439SGeorge Zhang 	pci_unregister_driver(&vmci_guest_driver);
9841f166439SGeorge Zhang }
985