1685a6bf8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21f166439SGeorge Zhang /*
31f166439SGeorge Zhang  * VMware VMCI Driver
41f166439SGeorge Zhang  *
51f166439SGeorge Zhang  * Copyright (C) 2012 VMware, Inc. All rights reserved.
61f166439SGeorge Zhang  */
71f166439SGeorge Zhang 
81f166439SGeorge Zhang #include <linux/vmw_vmci_defs.h>
91f166439SGeorge Zhang #include <linux/vmw_vmci_api.h>
101f166439SGeorge Zhang #include <linux/moduleparam.h>
111f166439SGeorge Zhang #include <linux/interrupt.h>
121f166439SGeorge Zhang #include <linux/highmem.h>
131f166439SGeorge Zhang #include <linux/kernel.h>
14ea8a83a4SDmitry Torokhov #include <linux/mm.h>
151f166439SGeorge Zhang #include <linux/module.h>
1622aa5c7fSJorgen Hansen #include <linux/processor.h>
171f166439SGeorge Zhang #include <linux/sched.h>
18ea8a83a4SDmitry Torokhov #include <linux/slab.h>
191f166439SGeorge Zhang #include <linux/init.h>
201f166439SGeorge Zhang #include <linux/pci.h>
211f166439SGeorge Zhang #include <linux/smp.h>
221f166439SGeorge Zhang #include <linux/io.h>
23ea8a83a4SDmitry Torokhov #include <linux/vmalloc.h>
241f166439SGeorge Zhang 
251f166439SGeorge Zhang #include "vmci_datagram.h"
261f166439SGeorge Zhang #include "vmci_doorbell.h"
271f166439SGeorge Zhang #include "vmci_context.h"
281f166439SGeorge Zhang #include "vmci_driver.h"
291f166439SGeorge Zhang #include "vmci_event.h"
301f166439SGeorge Zhang 
311f166439SGeorge Zhang #define PCI_DEVICE_ID_VMWARE_VMCI	0x0740
321f166439SGeorge Zhang 
331f166439SGeorge Zhang #define VMCI_UTIL_NUM_RESOURCES 1
341f166439SGeorge Zhang 
355ee10982SJorgen Hansen /*
365ee10982SJorgen Hansen  * Datagram buffers for DMA send/receive must accommodate at least
375ee10982SJorgen Hansen  * a maximum sized datagram and the header.
385ee10982SJorgen Hansen  */
395ee10982SJorgen Hansen #define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE)
405ee10982SJorgen Hansen 
411f166439SGeorge Zhang static bool vmci_disable_msi;
421f166439SGeorge Zhang module_param_named(disable_msi, vmci_disable_msi, bool, 0);
431f166439SGeorge Zhang MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
441f166439SGeorge Zhang 
451f166439SGeorge Zhang static bool vmci_disable_msix;
461f166439SGeorge Zhang module_param_named(disable_msix, vmci_disable_msix, bool, 0);
471f166439SGeorge Zhang MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
481f166439SGeorge Zhang 
491f166439SGeorge Zhang static u32 ctx_update_sub_id = VMCI_INVALID_ID;
501f166439SGeorge Zhang static u32 vm_context_id = VMCI_INVALID_ID;
511f166439SGeorge Zhang 
521f166439SGeorge Zhang struct vmci_guest_device {
531f166439SGeorge Zhang 	struct device *dev;	/* PCI device we are attached to */
541f166439SGeorge Zhang 	void __iomem *iobase;
55e283a0e8SJorgen Hansen 	void __iomem *mmio_base;
561f166439SGeorge Zhang 
571f166439SGeorge Zhang 	bool exclusive_vectors;
581f166439SGeorge Zhang 
591f166439SGeorge Zhang 	struct tasklet_struct datagram_tasklet;
601f166439SGeorge Zhang 	struct tasklet_struct bm_tasklet;
61463713ebSJorgen Hansen 	struct wait_queue_head inout_wq;
621f166439SGeorge Zhang 
631f166439SGeorge Zhang 	void *data_buffer;
645ee10982SJorgen Hansen 	dma_addr_t data_buffer_base;
655ee10982SJorgen Hansen 	void *tx_buffer;
665ee10982SJorgen Hansen 	dma_addr_t tx_buffer_base;
671f166439SGeorge Zhang 	void *notification_bitmap;
686d6dfb4fSAndy King 	dma_addr_t notification_base;
691f166439SGeorge Zhang };
701f166439SGeorge Zhang 
71f2db7361SVishnu DASA static bool use_ppn64;
72f2db7361SVishnu DASA 
73f2db7361SVishnu DASA bool vmci_use_ppn64(void)
74f2db7361SVishnu DASA {
75f2db7361SVishnu DASA 	return use_ppn64;
76f2db7361SVishnu DASA }
77f2db7361SVishnu DASA 
781f166439SGeorge Zhang /* vmci_dev singleton device and supporting data*/
796d6dfb4fSAndy King struct pci_dev *vmci_pdev;
801f166439SGeorge Zhang static struct vmci_guest_device *vmci_dev_g;
811f166439SGeorge Zhang static DEFINE_SPINLOCK(vmci_dev_spinlock);
821f166439SGeorge Zhang 
831f166439SGeorge Zhang static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0);
841f166439SGeorge Zhang 
851f166439SGeorge Zhang bool vmci_guest_code_active(void)
861f166439SGeorge Zhang {
871f166439SGeorge Zhang 	return atomic_read(&vmci_num_guest_devices) != 0;
881f166439SGeorge Zhang }
891f166439SGeorge Zhang 
901f166439SGeorge Zhang u32 vmci_get_vm_context_id(void)
911f166439SGeorge Zhang {
921f166439SGeorge Zhang 	if (vm_context_id == VMCI_INVALID_ID) {
931f166439SGeorge Zhang 		struct vmci_datagram get_cid_msg;
941f166439SGeorge Zhang 		get_cid_msg.dst =
951f166439SGeorge Zhang 		    vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
961f166439SGeorge Zhang 				     VMCI_GET_CONTEXT_ID);
971f166439SGeorge Zhang 		get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
981f166439SGeorge Zhang 		get_cid_msg.payload_size = 0;
995a19b789SAndy King 		vm_context_id = vmci_send_datagram(&get_cid_msg);
1001f166439SGeorge Zhang 	}
1011f166439SGeorge Zhang 	return vm_context_id;
1021f166439SGeorge Zhang }
1031f166439SGeorge Zhang 
104e283a0e8SJorgen Hansen static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
105e283a0e8SJorgen Hansen {
106e283a0e8SJorgen Hansen 	if (dev->mmio_base != NULL)
107e283a0e8SJorgen Hansen 		return readl(dev->mmio_base + reg);
108e283a0e8SJorgen Hansen 	return ioread32(dev->iobase + reg);
109e283a0e8SJorgen Hansen }
110e283a0e8SJorgen Hansen 
111e283a0e8SJorgen Hansen static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
112e283a0e8SJorgen Hansen {
113e283a0e8SJorgen Hansen 	if (dev->mmio_base != NULL)
114e283a0e8SJorgen Hansen 		writel(val, dev->mmio_base + reg);
115e283a0e8SJorgen Hansen 	else
116e283a0e8SJorgen Hansen 		iowrite32(val, dev->iobase + reg);
117e283a0e8SJorgen Hansen }
118e283a0e8SJorgen Hansen 
119463713ebSJorgen Hansen static void vmci_read_data(struct vmci_guest_device *vmci_dev,
120463713ebSJorgen Hansen 			   void *dest, size_t size)
121463713ebSJorgen Hansen {
122463713ebSJorgen Hansen 	if (vmci_dev->mmio_base == NULL)
123463713ebSJorgen Hansen 		ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
124463713ebSJorgen Hansen 			    dest, size);
125463713ebSJorgen Hansen 	else {
126463713ebSJorgen Hansen 		/*
127463713ebSJorgen Hansen 		 * For DMA datagrams, the data_buffer will contain the header on the
128463713ebSJorgen Hansen 		 * first page, followed by the incoming datagram(s) on the following
129463713ebSJorgen Hansen 		 * pages. The header uses an S/G element immediately following the
130463713ebSJorgen Hansen 		 * header on the first page to point to the data area.
131463713ebSJorgen Hansen 		 */
132463713ebSJorgen Hansen 		struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer;
133463713ebSJorgen Hansen 		struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1);
134463713ebSJorgen Hansen 		size_t buffer_offset = dest - vmci_dev->data_buffer;
135463713ebSJorgen Hansen 
136463713ebSJorgen Hansen 		buffer_header->opcode = 1;
137463713ebSJorgen Hansen 		buffer_header->size = 1;
138463713ebSJorgen Hansen 		buffer_header->busy = 0;
139463713ebSJorgen Hansen 		sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset;
140463713ebSJorgen Hansen 		sg_array[0].size = size;
141463713ebSJorgen Hansen 
142463713ebSJorgen Hansen 		vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base),
143463713ebSJorgen Hansen 			       VMCI_DATA_IN_LOW_ADDR);
144463713ebSJorgen Hansen 
145463713ebSJorgen Hansen 		wait_event(vmci_dev->inout_wq, buffer_header->busy == 1);
146463713ebSJorgen Hansen 	}
147463713ebSJorgen Hansen }
148463713ebSJorgen Hansen 
14922aa5c7fSJorgen Hansen static int vmci_write_data(struct vmci_guest_device *dev,
15022aa5c7fSJorgen Hansen 			   struct vmci_datagram *dg)
15122aa5c7fSJorgen Hansen {
15222aa5c7fSJorgen Hansen 	int result;
15322aa5c7fSJorgen Hansen 
15422aa5c7fSJorgen Hansen 	if (dev->mmio_base != NULL) {
15522aa5c7fSJorgen Hansen 		struct vmci_data_in_out_header *buffer_header = dev->tx_buffer;
15622aa5c7fSJorgen Hansen 		u8 *dg_out_buffer = (u8 *)(buffer_header + 1);
15722aa5c7fSJorgen Hansen 
15822aa5c7fSJorgen Hansen 		if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE)
15922aa5c7fSJorgen Hansen 			return VMCI_ERROR_INVALID_ARGS;
16022aa5c7fSJorgen Hansen 
16122aa5c7fSJorgen Hansen 		/*
16222aa5c7fSJorgen Hansen 		 * Initialize send buffer with outgoing datagram
16322aa5c7fSJorgen Hansen 		 * and set up header for inline data. Device will
16422aa5c7fSJorgen Hansen 		 * not access buffer asynchronously - only after
16522aa5c7fSJorgen Hansen 		 * the write to VMCI_DATA_OUT_LOW_ADDR.
16622aa5c7fSJorgen Hansen 		 */
16722aa5c7fSJorgen Hansen 		memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg));
16822aa5c7fSJorgen Hansen 		buffer_header->opcode = 0;
16922aa5c7fSJorgen Hansen 		buffer_header->size = VMCI_DG_SIZE(dg);
17022aa5c7fSJorgen Hansen 		buffer_header->busy = 1;
17122aa5c7fSJorgen Hansen 
17222aa5c7fSJorgen Hansen 		vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base),
17322aa5c7fSJorgen Hansen 			       VMCI_DATA_OUT_LOW_ADDR);
17422aa5c7fSJorgen Hansen 
17522aa5c7fSJorgen Hansen 		/* Caller holds a spinlock, so cannot block. */
17622aa5c7fSJorgen Hansen 		spin_until_cond(buffer_header->busy == 0);
17722aa5c7fSJorgen Hansen 
17822aa5c7fSJorgen Hansen 		result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
17922aa5c7fSJorgen Hansen 		if (result == VMCI_SUCCESS)
18022aa5c7fSJorgen Hansen 			result = (int)buffer_header->result;
18122aa5c7fSJorgen Hansen 	} else {
18222aa5c7fSJorgen Hansen 		iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR,
18322aa5c7fSJorgen Hansen 			     dg, VMCI_DG_SIZE(dg));
18422aa5c7fSJorgen Hansen 		result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
18522aa5c7fSJorgen Hansen 	}
18622aa5c7fSJorgen Hansen 
18722aa5c7fSJorgen Hansen 	return result;
18822aa5c7fSJorgen Hansen }
18922aa5c7fSJorgen Hansen 
1901f166439SGeorge Zhang /*
1911f166439SGeorge Zhang  * VM to hypervisor call mechanism. We use the standard VMware naming
1921f166439SGeorge Zhang  * convention since shared code is calling this function as well.
1931f166439SGeorge Zhang  */
1941f166439SGeorge Zhang int vmci_send_datagram(struct vmci_datagram *dg)
1951f166439SGeorge Zhang {
1961f166439SGeorge Zhang 	unsigned long flags;
1971f166439SGeorge Zhang 	int result;
1981f166439SGeorge Zhang 
1991f166439SGeorge Zhang 	/* Check args. */
2001f166439SGeorge Zhang 	if (dg == NULL)
2011f166439SGeorge Zhang 		return VMCI_ERROR_INVALID_ARGS;
2021f166439SGeorge Zhang 
2031f166439SGeorge Zhang 	/*
2041f166439SGeorge Zhang 	 * Need to acquire spinlock on the device because the datagram
2051f166439SGeorge Zhang 	 * data may be spread over multiple pages and the monitor may
2061f166439SGeorge Zhang 	 * interleave device user rpc calls from multiple
2071f166439SGeorge Zhang 	 * VCPUs. Acquiring the spinlock precludes that
2081f166439SGeorge Zhang 	 * possibility. Disabling interrupts to avoid incoming
2091f166439SGeorge Zhang 	 * datagrams during a "rep out" and possibly landing up in
2101f166439SGeorge Zhang 	 * this function.
2111f166439SGeorge Zhang 	 */
2121f166439SGeorge Zhang 	spin_lock_irqsave(&vmci_dev_spinlock, flags);
2131f166439SGeorge Zhang 
2141f166439SGeorge Zhang 	if (vmci_dev_g) {
21522aa5c7fSJorgen Hansen 		vmci_write_data(vmci_dev_g, dg);
216e283a0e8SJorgen Hansen 		result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
2171f166439SGeorge Zhang 	} else {
2181f166439SGeorge Zhang 		result = VMCI_ERROR_UNAVAILABLE;
2191f166439SGeorge Zhang 	}
2201f166439SGeorge Zhang 
2211f166439SGeorge Zhang 	spin_unlock_irqrestore(&vmci_dev_spinlock, flags);
2221f166439SGeorge Zhang 
2231f166439SGeorge Zhang 	return result;
2241f166439SGeorge Zhang }
2251f166439SGeorge Zhang EXPORT_SYMBOL_GPL(vmci_send_datagram);
2261f166439SGeorge Zhang 
2271f166439SGeorge Zhang /*
2281f166439SGeorge Zhang  * Gets called with the new context id if updated or resumed.
2291f166439SGeorge Zhang  * Context id.
2301f166439SGeorge Zhang  */
2311f166439SGeorge Zhang static void vmci_guest_cid_update(u32 sub_id,
2321f166439SGeorge Zhang 				  const struct vmci_event_data *event_data,
2331f166439SGeorge Zhang 				  void *client_data)
2341f166439SGeorge Zhang {
2351f166439SGeorge Zhang 	const struct vmci_event_payld_ctx *ev_payload =
2361f166439SGeorge Zhang 				vmci_event_data_const_payload(event_data);
2371f166439SGeorge Zhang 
2381f166439SGeorge Zhang 	if (sub_id != ctx_update_sub_id) {
2391f166439SGeorge Zhang 		pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id);
2401f166439SGeorge Zhang 		return;
2411f166439SGeorge Zhang 	}
2421f166439SGeorge Zhang 
2431f166439SGeorge Zhang 	if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) {
2441f166439SGeorge Zhang 		pr_devel("Invalid event data\n");
2451f166439SGeorge Zhang 		return;
2461f166439SGeorge Zhang 	}
2471f166439SGeorge Zhang 
2481f166439SGeorge Zhang 	pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n",
2491f166439SGeorge Zhang 		 vm_context_id, ev_payload->context_id, event_data->event);
2501f166439SGeorge Zhang 
2511f166439SGeorge Zhang 	vm_context_id = ev_payload->context_id;
2521f166439SGeorge Zhang }
2531f166439SGeorge Zhang 
2541f166439SGeorge Zhang /*
2551f166439SGeorge Zhang  * Verify that the host supports the hypercalls we need. If it does not,
256b791da23SChristophe JAILLET  * try to find fallback hypercalls and use those instead.  Returns 0 if
257b791da23SChristophe JAILLET  * required hypercalls (or fallback hypercalls) are supported by the host,
258b791da23SChristophe JAILLET  * an error code otherwise.
2591f166439SGeorge Zhang  */
260782f2445SDmitry Torokhov static int vmci_check_host_caps(struct pci_dev *pdev)
2611f166439SGeorge Zhang {
2621f166439SGeorge Zhang 	bool result;
2631f166439SGeorge Zhang 	struct vmci_resource_query_msg *msg;
2641f166439SGeorge Zhang 	u32 msg_size = sizeof(struct vmci_resource_query_hdr) +
2651f166439SGeorge Zhang 				VMCI_UTIL_NUM_RESOURCES * sizeof(u32);
2661f166439SGeorge Zhang 	struct vmci_datagram *check_msg;
2671f166439SGeorge Zhang 
268b2192cfeSTetsuo Handa 	check_msg = kzalloc(msg_size, GFP_KERNEL);
2691f166439SGeorge Zhang 	if (!check_msg) {
2701f166439SGeorge Zhang 		dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__);
271782f2445SDmitry Torokhov 		return -ENOMEM;
2721f166439SGeorge Zhang 	}
2731f166439SGeorge Zhang 
2741f166439SGeorge Zhang 	check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
2751f166439SGeorge Zhang 					  VMCI_RESOURCES_QUERY);
2761f166439SGeorge Zhang 	check_msg->src = VMCI_ANON_SRC_HANDLE;
2771f166439SGeorge Zhang 	check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
2781f166439SGeorge Zhang 	msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg);
2791f166439SGeorge Zhang 
2801f166439SGeorge Zhang 	msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
2811f166439SGeorge Zhang 	msg->resources[0] = VMCI_GET_CONTEXT_ID;
2821f166439SGeorge Zhang 
2831f166439SGeorge Zhang 	/* Checks that hyper calls are supported */
2841f166439SGeorge Zhang 	result = vmci_send_datagram(check_msg) == 0x01;
2851f166439SGeorge Zhang 	kfree(check_msg);
2861f166439SGeorge Zhang 
2871f166439SGeorge Zhang 	dev_dbg(&pdev->dev, "%s: Host capability check: %s\n",
2881f166439SGeorge Zhang 		__func__, result ? "PASSED" : "FAILED");
2891f166439SGeorge Zhang 
2901f166439SGeorge Zhang 	/* We need the vector. There are no fallbacks. */
291782f2445SDmitry Torokhov 	return result ? 0 : -ENXIO;
2921f166439SGeorge Zhang }
2931f166439SGeorge Zhang 
2941f166439SGeorge Zhang /*
295463713ebSJorgen Hansen  * Reads datagrams from the device and dispatches them. For IO port
296463713ebSJorgen Hansen  * based access to the device, we always start reading datagrams into
297463713ebSJorgen Hansen  * only the first page of the datagram buffer. If the datagrams don't
298463713ebSJorgen Hansen  * fit into one page, we use the maximum datagram buffer size for the
299463713ebSJorgen Hansen  * remainder of the invocation. This is a simple heuristic for not
300463713ebSJorgen Hansen  * penalizing small datagrams. For DMA-based datagrams, we always
301463713ebSJorgen Hansen  * use the maximum datagram buffer size, since there is no performance
302463713ebSJorgen Hansen  * penalty for doing so.
3031f166439SGeorge Zhang  *
3041f166439SGeorge Zhang  * This function assumes that it has exclusive access to the data
305463713ebSJorgen Hansen  * in register(s) for the duration of the call.
3061f166439SGeorge Zhang  */
3071f166439SGeorge Zhang static void vmci_dispatch_dgs(unsigned long data)
3081f166439SGeorge Zhang {
3091f166439SGeorge Zhang 	struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data;
3101f166439SGeorge Zhang 	u8 *dg_in_buffer = vmci_dev->data_buffer;
3111f166439SGeorge Zhang 	struct vmci_datagram *dg;
3121f166439SGeorge Zhang 	size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
313463713ebSJorgen Hansen 	size_t current_dg_in_buffer_size;
3141f166439SGeorge Zhang 	size_t remaining_bytes;
315463713ebSJorgen Hansen 	bool is_io_port = vmci_dev->mmio_base == NULL;
3161f166439SGeorge Zhang 
3171f166439SGeorge Zhang 	BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
3181f166439SGeorge Zhang 
319463713ebSJorgen Hansen 	if (!is_io_port) {
320463713ebSJorgen Hansen 		/* For mmio, the first page is used for the header. */
321463713ebSJorgen Hansen 		dg_in_buffer += PAGE_SIZE;
322463713ebSJorgen Hansen 
323463713ebSJorgen Hansen 		/*
324463713ebSJorgen Hansen 		 * For DMA-based datagram operations, there is no performance
325463713ebSJorgen Hansen 		 * penalty for reading the maximum buffer size.
326463713ebSJorgen Hansen 		 */
327463713ebSJorgen Hansen 		current_dg_in_buffer_size = VMCI_MAX_DG_SIZE;
328463713ebSJorgen Hansen 	} else {
329463713ebSJorgen Hansen 		current_dg_in_buffer_size = PAGE_SIZE;
330463713ebSJorgen Hansen 	}
331463713ebSJorgen Hansen 	vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size);
3321f166439SGeorge Zhang 	dg = (struct vmci_datagram *)dg_in_buffer;
3331f166439SGeorge Zhang 	remaining_bytes = current_dg_in_buffer_size;
3341f166439SGeorge Zhang 
335463713ebSJorgen Hansen 	/*
336463713ebSJorgen Hansen 	 * Read through the buffer until an invalid datagram header is
337463713ebSJorgen Hansen 	 * encountered. The exit condition for datagrams read through
338463713ebSJorgen Hansen 	 * VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram
339463713ebSJorgen Hansen 	 * can start on any page boundary in the buffer.
340463713ebSJorgen Hansen 	 */
3411f166439SGeorge Zhang 	while (dg->dst.resource != VMCI_INVALID_ID ||
342463713ebSJorgen Hansen 	       (is_io_port && remaining_bytes > PAGE_SIZE)) {
3431f166439SGeorge Zhang 		unsigned dg_in_size;
3441f166439SGeorge Zhang 
3451f166439SGeorge Zhang 		/*
346463713ebSJorgen Hansen 		 * If using VMCI_DATA_IN_ADDR, skip to the next page
347463713ebSJorgen Hansen 		 * as a datagram can start on any page boundary.
3481f166439SGeorge Zhang 		 */
3491f166439SGeorge Zhang 		if (dg->dst.resource == VMCI_INVALID_ID) {
3501f166439SGeorge Zhang 			dg = (struct vmci_datagram *)roundup(
3511f166439SGeorge Zhang 				(uintptr_t)dg + 1, PAGE_SIZE);
3521f166439SGeorge Zhang 			remaining_bytes =
3531f166439SGeorge Zhang 				(size_t)(dg_in_buffer +
3541f166439SGeorge Zhang 					 current_dg_in_buffer_size -
3551f166439SGeorge Zhang 					 (u8 *)dg);
3561f166439SGeorge Zhang 			continue;
3571f166439SGeorge Zhang 		}
3581f166439SGeorge Zhang 
3591f166439SGeorge Zhang 		dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
3601f166439SGeorge Zhang 
3611f166439SGeorge Zhang 		if (dg_in_size <= dg_in_buffer_size) {
3621f166439SGeorge Zhang 			int result;
3631f166439SGeorge Zhang 
3641f166439SGeorge Zhang 			/*
3651f166439SGeorge Zhang 			 * If the remaining bytes in the datagram
3661f166439SGeorge Zhang 			 * buffer doesn't contain the complete
3671f166439SGeorge Zhang 			 * datagram, we first make sure we have enough
3681f166439SGeorge Zhang 			 * room for it and then we read the reminder
3691f166439SGeorge Zhang 			 * of the datagram and possibly any following
3701f166439SGeorge Zhang 			 * datagrams.
3711f166439SGeorge Zhang 			 */
3721f166439SGeorge Zhang 			if (dg_in_size > remaining_bytes) {
3731f166439SGeorge Zhang 				if (remaining_bytes !=
3741f166439SGeorge Zhang 				    current_dg_in_buffer_size) {
3751f166439SGeorge Zhang 
3761f166439SGeorge Zhang 					/*
3771f166439SGeorge Zhang 					 * We move the partial
3781f166439SGeorge Zhang 					 * datagram to the front and
3791f166439SGeorge Zhang 					 * read the reminder of the
3801f166439SGeorge Zhang 					 * datagram and possibly
3811f166439SGeorge Zhang 					 * following calls into the
3821f166439SGeorge Zhang 					 * following bytes.
3831f166439SGeorge Zhang 					 */
3841f166439SGeorge Zhang 					memmove(dg_in_buffer, dg_in_buffer +
3851f166439SGeorge Zhang 						current_dg_in_buffer_size -
3861f166439SGeorge Zhang 						remaining_bytes,
3871f166439SGeorge Zhang 						remaining_bytes);
3881f166439SGeorge Zhang 					dg = (struct vmci_datagram *)
3891f166439SGeorge Zhang 					    dg_in_buffer;
3901f166439SGeorge Zhang 				}
3911f166439SGeorge Zhang 
3921f166439SGeorge Zhang 				if (current_dg_in_buffer_size !=
3931f166439SGeorge Zhang 				    dg_in_buffer_size)
3941f166439SGeorge Zhang 					current_dg_in_buffer_size =
3951f166439SGeorge Zhang 					    dg_in_buffer_size;
3961f166439SGeorge Zhang 
397463713ebSJorgen Hansen 				vmci_read_data(vmci_dev,
398463713ebSJorgen Hansen 					       dg_in_buffer +
3991f166439SGeorge Zhang 						remaining_bytes,
4001f166439SGeorge Zhang 					       current_dg_in_buffer_size -
4011f166439SGeorge Zhang 						remaining_bytes);
4021f166439SGeorge Zhang 			}
4031f166439SGeorge Zhang 
4041f166439SGeorge Zhang 			/*
4051f166439SGeorge Zhang 			 * We special case event datagrams from the
4061f166439SGeorge Zhang 			 * hypervisor.
4071f166439SGeorge Zhang 			 */
4081f166439SGeorge Zhang 			if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
4091f166439SGeorge Zhang 			    dg->dst.resource == VMCI_EVENT_HANDLER) {
4101f166439SGeorge Zhang 				result = vmci_event_dispatch(dg);
4111f166439SGeorge Zhang 			} else {
4121f166439SGeorge Zhang 				result = vmci_datagram_invoke_guest_handler(dg);
4131f166439SGeorge Zhang 			}
4141f166439SGeorge Zhang 			if (result < VMCI_SUCCESS)
4151f166439SGeorge Zhang 				dev_dbg(vmci_dev->dev,
4161f166439SGeorge Zhang 					"Datagram with resource (ID=0x%x) failed (err=%d)\n",
4171f166439SGeorge Zhang 					 dg->dst.resource, result);
4181f166439SGeorge Zhang 
4191f166439SGeorge Zhang 			/* On to the next datagram. */
4201f166439SGeorge Zhang 			dg = (struct vmci_datagram *)((u8 *)dg +
4211f166439SGeorge Zhang 						      dg_in_size);
4221f166439SGeorge Zhang 		} else {
4231f166439SGeorge Zhang 			size_t bytes_to_skip;
4241f166439SGeorge Zhang 
4251f166439SGeorge Zhang 			/*
4261f166439SGeorge Zhang 			 * Datagram doesn't fit in datagram buffer of maximal
4271f166439SGeorge Zhang 			 * size. We drop it.
4281f166439SGeorge Zhang 			 */
4291f166439SGeorge Zhang 			dev_dbg(vmci_dev->dev,
4301f166439SGeorge Zhang 				"Failed to receive datagram (size=%u bytes)\n",
4311f166439SGeorge Zhang 				 dg_in_size);
4321f166439SGeorge Zhang 
4331f166439SGeorge Zhang 			bytes_to_skip = dg_in_size - remaining_bytes;
4341f166439SGeorge Zhang 			if (current_dg_in_buffer_size != dg_in_buffer_size)
4351f166439SGeorge Zhang 				current_dg_in_buffer_size = dg_in_buffer_size;
4361f166439SGeorge Zhang 
4371f166439SGeorge Zhang 			for (;;) {
438463713ebSJorgen Hansen 				vmci_read_data(vmci_dev, dg_in_buffer,
4391f166439SGeorge Zhang 					       current_dg_in_buffer_size);
4401f166439SGeorge Zhang 				if (bytes_to_skip <= current_dg_in_buffer_size)
4411f166439SGeorge Zhang 					break;
4421f166439SGeorge Zhang 
4431f166439SGeorge Zhang 				bytes_to_skip -= current_dg_in_buffer_size;
4441f166439SGeorge Zhang 			}
4451f166439SGeorge Zhang 			dg = (struct vmci_datagram *)(dg_in_buffer +
4461f166439SGeorge Zhang 						      bytes_to_skip);
4471f166439SGeorge Zhang 		}
4481f166439SGeorge Zhang 
4491f166439SGeorge Zhang 		remaining_bytes =
4501f166439SGeorge Zhang 		    (size_t) (dg_in_buffer + current_dg_in_buffer_size -
4511f166439SGeorge Zhang 			      (u8 *)dg);
4521f166439SGeorge Zhang 
4531f166439SGeorge Zhang 		if (remaining_bytes < VMCI_DG_HEADERSIZE) {
4541f166439SGeorge Zhang 			/* Get the next batch of datagrams. */
4551f166439SGeorge Zhang 
456463713ebSJorgen Hansen 			vmci_read_data(vmci_dev, dg_in_buffer,
4571f166439SGeorge Zhang 				    current_dg_in_buffer_size);
4581f166439SGeorge Zhang 			dg = (struct vmci_datagram *)dg_in_buffer;
4591f166439SGeorge Zhang 			remaining_bytes = current_dg_in_buffer_size;
4601f166439SGeorge Zhang 		}
4611f166439SGeorge Zhang 	}
4621f166439SGeorge Zhang }
4631f166439SGeorge Zhang 
4641f166439SGeorge Zhang /*
4651f166439SGeorge Zhang  * Scans the notification bitmap for raised flags, clears them
4661f166439SGeorge Zhang  * and handles the notifications.
4671f166439SGeorge Zhang  */
4681f166439SGeorge Zhang static void vmci_process_bitmap(unsigned long data)
4691f166439SGeorge Zhang {
4701f166439SGeorge Zhang 	struct vmci_guest_device *dev = (struct vmci_guest_device *)data;
4711f166439SGeorge Zhang 
4721f166439SGeorge Zhang 	if (!dev->notification_bitmap) {
4731f166439SGeorge Zhang 		dev_dbg(dev->dev, "No bitmap present in %s\n", __func__);
4741f166439SGeorge Zhang 		return;
4751f166439SGeorge Zhang 	}
4761f166439SGeorge Zhang 
4771f166439SGeorge Zhang 	vmci_dbell_scan_notification_entries(dev->notification_bitmap);
4781f166439SGeorge Zhang }
4791f166439SGeorge Zhang 
4801f166439SGeorge Zhang /*
4811f166439SGeorge Zhang  * Interrupt handler for legacy or MSI interrupt, or for first MSI-X
4821f166439SGeorge Zhang  * interrupt (vector VMCI_INTR_DATAGRAM).
4831f166439SGeorge Zhang  */
4841f166439SGeorge Zhang static irqreturn_t vmci_interrupt(int irq, void *_dev)
4851f166439SGeorge Zhang {
4861f166439SGeorge Zhang 	struct vmci_guest_device *dev = _dev;
4871f166439SGeorge Zhang 
4881f166439SGeorge Zhang 	/*
4891f166439SGeorge Zhang 	 * If we are using MSI-X with exclusive vectors then we simply schedule
4901f166439SGeorge Zhang 	 * the datagram tasklet, since we know the interrupt was meant for us.
4911f166439SGeorge Zhang 	 * Otherwise we must read the ICR to determine what to do.
4921f166439SGeorge Zhang 	 */
4931f166439SGeorge Zhang 
4943bb434cdSChristoph Hellwig 	if (dev->exclusive_vectors) {
4951f166439SGeorge Zhang 		tasklet_schedule(&dev->datagram_tasklet);
4961f166439SGeorge Zhang 	} else {
4971f166439SGeorge Zhang 		unsigned int icr;
4981f166439SGeorge Zhang 
4991f166439SGeorge Zhang 		/* Acknowledge interrupt and determine what needs doing. */
500e283a0e8SJorgen Hansen 		icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
5011f166439SGeorge Zhang 		if (icr == 0 || icr == ~0)
5021f166439SGeorge Zhang 			return IRQ_NONE;
5031f166439SGeorge Zhang 
5041f166439SGeorge Zhang 		if (icr & VMCI_ICR_DATAGRAM) {
5051f166439SGeorge Zhang 			tasklet_schedule(&dev->datagram_tasklet);
5061f166439SGeorge Zhang 			icr &= ~VMCI_ICR_DATAGRAM;
5071f166439SGeorge Zhang 		}
5081f166439SGeorge Zhang 
5091f166439SGeorge Zhang 		if (icr & VMCI_ICR_NOTIFICATION) {
5101f166439SGeorge Zhang 			tasklet_schedule(&dev->bm_tasklet);
5111f166439SGeorge Zhang 			icr &= ~VMCI_ICR_NOTIFICATION;
5121f166439SGeorge Zhang 		}
5131f166439SGeorge Zhang 
514463713ebSJorgen Hansen 
515463713ebSJorgen Hansen 		if (icr & VMCI_ICR_DMA_DATAGRAM) {
516463713ebSJorgen Hansen 			wake_up_all(&dev->inout_wq);
517cc68f217SJorgen Hansen 			icr &= ~VMCI_ICR_DMA_DATAGRAM;
518463713ebSJorgen Hansen 		}
519cc68f217SJorgen Hansen 
5201f166439SGeorge Zhang 		if (icr != 0)
5211f166439SGeorge Zhang 			dev_warn(dev->dev,
5221f166439SGeorge Zhang 				 "Ignoring unknown interrupt cause (%d)\n",
5231f166439SGeorge Zhang 				 icr);
5241f166439SGeorge Zhang 	}
5251f166439SGeorge Zhang 
5261f166439SGeorge Zhang 	return IRQ_HANDLED;
5271f166439SGeorge Zhang }
5281f166439SGeorge Zhang 
5291f166439SGeorge Zhang /*
5301f166439SGeorge Zhang  * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
5311f166439SGeorge Zhang  * which is for the notification bitmap.  Will only get called if we are
5321f166439SGeorge Zhang  * using MSI-X with exclusive vectors.
5331f166439SGeorge Zhang  */
5341f166439SGeorge Zhang static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
5351f166439SGeorge Zhang {
5361f166439SGeorge Zhang 	struct vmci_guest_device *dev = _dev;
5371f166439SGeorge Zhang 
5381f166439SGeorge Zhang 	/* For MSI-X we can just assume it was meant for us. */
5391f166439SGeorge Zhang 	tasklet_schedule(&dev->bm_tasklet);
5401f166439SGeorge Zhang 
5411f166439SGeorge Zhang 	return IRQ_HANDLED;
5421f166439SGeorge Zhang }
5431f166439SGeorge Zhang 
5441f166439SGeorge Zhang /*
545cc68f217SJorgen Hansen  * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM,
546cc68f217SJorgen Hansen  * which is for the completion of a DMA datagram send or receive operation.
547cc68f217SJorgen Hansen  * Will only get called if we are using MSI-X with exclusive vectors.
548cc68f217SJorgen Hansen  */
549cc68f217SJorgen Hansen static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev)
550cc68f217SJorgen Hansen {
551463713ebSJorgen Hansen 	struct vmci_guest_device *dev = _dev;
552463713ebSJorgen Hansen 
553463713ebSJorgen Hansen 	wake_up_all(&dev->inout_wq);
554463713ebSJorgen Hansen 
555cc68f217SJorgen Hansen 	return IRQ_HANDLED;
556cc68f217SJorgen Hansen }
557cc68f217SJorgen Hansen 
5585ee10982SJorgen Hansen static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev)
5595ee10982SJorgen Hansen {
5605ee10982SJorgen Hansen 	if (vmci_dev->mmio_base != NULL) {
5615ee10982SJorgen Hansen 		if (vmci_dev->tx_buffer != NULL)
5625ee10982SJorgen Hansen 			dma_free_coherent(vmci_dev->dev,
5635ee10982SJorgen Hansen 					  VMCI_DMA_DG_BUFFER_SIZE,
5645ee10982SJorgen Hansen 					  vmci_dev->tx_buffer,
5655ee10982SJorgen Hansen 					  vmci_dev->tx_buffer_base);
5665ee10982SJorgen Hansen 		if (vmci_dev->data_buffer != NULL)
5675ee10982SJorgen Hansen 			dma_free_coherent(vmci_dev->dev,
5685ee10982SJorgen Hansen 					  VMCI_DMA_DG_BUFFER_SIZE,
5695ee10982SJorgen Hansen 					  vmci_dev->data_buffer,
5705ee10982SJorgen Hansen 					  vmci_dev->data_buffer_base);
5715ee10982SJorgen Hansen 	} else {
5725ee10982SJorgen Hansen 		vfree(vmci_dev->data_buffer);
5735ee10982SJorgen Hansen 	}
5745ee10982SJorgen Hansen }
5755ee10982SJorgen Hansen 
576cc68f217SJorgen Hansen /*
5771f166439SGeorge Zhang  * Most of the initialization at module load time is done here.
5781f166439SGeorge Zhang  */
5791f166439SGeorge Zhang static int vmci_guest_probe_device(struct pci_dev *pdev,
5801f166439SGeorge Zhang 				   const struct pci_device_id *id)
5811f166439SGeorge Zhang {
5821f166439SGeorge Zhang 	struct vmci_guest_device *vmci_dev;
583e283a0e8SJorgen Hansen 	void __iomem *iobase = NULL;
584e283a0e8SJorgen Hansen 	void __iomem *mmio_base = NULL;
585cc68f217SJorgen Hansen 	unsigned int num_irq_vectors;
5861f166439SGeorge Zhang 	unsigned int capabilities;
587f2db7361SVishnu DASA 	unsigned int caps_in_use;
5881f166439SGeorge Zhang 	unsigned long cmd;
5891f166439SGeorge Zhang 	int vmci_err;
5901f166439SGeorge Zhang 	int error;
5911f166439SGeorge Zhang 
5921f166439SGeorge Zhang 	dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n");
5931f166439SGeorge Zhang 
5941f166439SGeorge Zhang 	error = pcim_enable_device(pdev);
5951f166439SGeorge Zhang 	if (error) {
5961f166439SGeorge Zhang 		dev_err(&pdev->dev,
5971f166439SGeorge Zhang 			"Failed to enable VMCI device: %d\n", error);
5981f166439SGeorge Zhang 		return error;
5991f166439SGeorge Zhang 	}
6001f166439SGeorge Zhang 
601e283a0e8SJorgen Hansen 	/*
602e283a0e8SJorgen Hansen 	 * The VMCI device with mmio access to registers requests 256KB
603e283a0e8SJorgen Hansen 	 * for BAR1. If present, driver will use new VMCI device
604e283a0e8SJorgen Hansen 	 * functionality for register access and datagram send/recv.
605e283a0e8SJorgen Hansen 	 */
606e283a0e8SJorgen Hansen 
607e283a0e8SJorgen Hansen 	if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
608e283a0e8SJorgen Hansen 		dev_info(&pdev->dev, "MMIO register access is available\n");
609e283a0e8SJorgen Hansen 		mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET,
610e283a0e8SJorgen Hansen 					    VMCI_MMIO_ACCESS_SIZE);
611e283a0e8SJorgen Hansen 		/* If the map fails, we fall back to IOIO access. */
612e283a0e8SJorgen Hansen 		if (!mmio_base)
613e283a0e8SJorgen Hansen 			dev_warn(&pdev->dev, "Failed to map MMIO register access\n");
614e283a0e8SJorgen Hansen 	}
615e283a0e8SJorgen Hansen 
616e283a0e8SJorgen Hansen 	if (!mmio_base) {
617e283a0e8SJorgen Hansen 		error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
6181f166439SGeorge Zhang 		if (error) {
6191f166439SGeorge Zhang 			dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
6201f166439SGeorge Zhang 			return error;
6211f166439SGeorge Zhang 		}
6221f166439SGeorge Zhang 		iobase = pcim_iomap_table(pdev)[0];
623e283a0e8SJorgen Hansen 	}
6241f166439SGeorge Zhang 
6251f166439SGeorge Zhang 	vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL);
6261f166439SGeorge Zhang 	if (!vmci_dev) {
6271f166439SGeorge Zhang 		dev_err(&pdev->dev,
6281f166439SGeorge Zhang 			"Can't allocate memory for VMCI device\n");
6291f166439SGeorge Zhang 		return -ENOMEM;
6301f166439SGeorge Zhang 	}
6311f166439SGeorge Zhang 
6321f166439SGeorge Zhang 	vmci_dev->dev = &pdev->dev;
6331f166439SGeorge Zhang 	vmci_dev->exclusive_vectors = false;
6341f166439SGeorge Zhang 	vmci_dev->iobase = iobase;
635e283a0e8SJorgen Hansen 	vmci_dev->mmio_base = mmio_base;
6361f166439SGeorge Zhang 
6371f166439SGeorge Zhang 	tasklet_init(&vmci_dev->datagram_tasklet,
6381f166439SGeorge Zhang 		     vmci_dispatch_dgs, (unsigned long)vmci_dev);
6391f166439SGeorge Zhang 	tasklet_init(&vmci_dev->bm_tasklet,
6401f166439SGeorge Zhang 		     vmci_process_bitmap, (unsigned long)vmci_dev);
641463713ebSJorgen Hansen 	init_waitqueue_head(&vmci_dev->inout_wq);
6421f166439SGeorge Zhang 
6435ee10982SJorgen Hansen 	if (mmio_base != NULL) {
6445ee10982SJorgen Hansen 		vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
6455ee10982SJorgen Hansen 							 &vmci_dev->tx_buffer_base,
6465ee10982SJorgen Hansen 							 GFP_KERNEL);
6475ee10982SJorgen Hansen 		if (!vmci_dev->tx_buffer) {
6485ee10982SJorgen Hansen 			dev_err(&pdev->dev,
6495ee10982SJorgen Hansen 				"Can't allocate memory for datagram tx buffer\n");
6505ee10982SJorgen Hansen 			return -ENOMEM;
6515ee10982SJorgen Hansen 		}
6525ee10982SJorgen Hansen 
6535ee10982SJorgen Hansen 		vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
6545ee10982SJorgen Hansen 							   &vmci_dev->data_buffer_base,
6555ee10982SJorgen Hansen 							   GFP_KERNEL);
6565ee10982SJorgen Hansen 	} else {
6571f166439SGeorge Zhang 		vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
6585ee10982SJorgen Hansen 	}
6591f166439SGeorge Zhang 	if (!vmci_dev->data_buffer) {
6601f166439SGeorge Zhang 		dev_err(&pdev->dev,
6611f166439SGeorge Zhang 			"Can't allocate memory for datagram buffer\n");
6625ee10982SJorgen Hansen 		error = -ENOMEM;
6635ee10982SJorgen Hansen 		goto err_free_data_buffers;
6641f166439SGeorge Zhang 	}
6651f166439SGeorge Zhang 
6661f166439SGeorge Zhang 	pci_set_master(pdev);	/* To enable queue_pair functionality. */
6671f166439SGeorge Zhang 
6681f166439SGeorge Zhang 	/*
6691f166439SGeorge Zhang 	 * Verify that the VMCI Device supports the capabilities that
6701f166439SGeorge Zhang 	 * we need. If the device is missing capabilities that we would
6711f166439SGeorge Zhang 	 * like to use, check for fallback capabilities and use those
6721f166439SGeorge Zhang 	 * instead (so we can run a new VM on old hosts). Fail the load if
6731f166439SGeorge Zhang 	 * a required capability is missing and there is no fallback.
6741f166439SGeorge Zhang 	 *
6751f166439SGeorge Zhang 	 * Right now, we need datagrams. There are no fallbacks.
6761f166439SGeorge Zhang 	 */
677e283a0e8SJorgen Hansen 	capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR);
6781f166439SGeorge Zhang 	if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
6791f166439SGeorge Zhang 		dev_err(&pdev->dev, "Device does not support datagrams\n");
6801f166439SGeorge Zhang 		error = -ENXIO;
6815ee10982SJorgen Hansen 		goto err_free_data_buffers;
6821f166439SGeorge Zhang 	}
683f2db7361SVishnu DASA 	caps_in_use = VMCI_CAPS_DATAGRAM;
684f2db7361SVishnu DASA 
685f2db7361SVishnu DASA 	/*
686f2db7361SVishnu DASA 	 * Use 64-bit PPNs if the device supports.
687f2db7361SVishnu DASA 	 *
688f2db7361SVishnu DASA 	 * There is no check for the return value of dma_set_mask_and_coherent
689f2db7361SVishnu DASA 	 * since this driver can handle the default mask values if
690f2db7361SVishnu DASA 	 * dma_set_mask_and_coherent fails.
691f2db7361SVishnu DASA 	 */
692f2db7361SVishnu DASA 	if (capabilities & VMCI_CAPS_PPN64) {
693f2db7361SVishnu DASA 		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
694f2db7361SVishnu DASA 		use_ppn64 = true;
695f2db7361SVishnu DASA 		caps_in_use |= VMCI_CAPS_PPN64;
696f2db7361SVishnu DASA 	} else {
697f2db7361SVishnu DASA 		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
698f2db7361SVishnu DASA 		use_ppn64 = false;
699f2db7361SVishnu DASA 	}
7001f166439SGeorge Zhang 
7011f166439SGeorge Zhang 	/*
7021f166439SGeorge Zhang 	 * If the hardware supports notifications, we will use that as
7031f166439SGeorge Zhang 	 * well.
7041f166439SGeorge Zhang 	 */
7051f166439SGeorge Zhang 	if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
7066d6dfb4fSAndy King 		vmci_dev->notification_bitmap = dma_alloc_coherent(
7076d6dfb4fSAndy King 			&pdev->dev, PAGE_SIZE, &vmci_dev->notification_base,
7086d6dfb4fSAndy King 			GFP_KERNEL);
709cc312da2SChristophe JAILLET 		if (!vmci_dev->notification_bitmap)
7101f166439SGeorge Zhang 			dev_warn(&pdev->dev,
7111f166439SGeorge Zhang 				 "Unable to allocate notification bitmap\n");
712cc312da2SChristophe JAILLET 		else
713f2db7361SVishnu DASA 			caps_in_use |= VMCI_CAPS_NOTIFICATIONS;
7141f166439SGeorge Zhang 	}
7151f166439SGeorge Zhang 
716eed2298dSJorgen Hansen 	if (mmio_base != NULL) {
717eed2298dSJorgen Hansen 		if (capabilities & VMCI_CAPS_DMA_DATAGRAM) {
718eed2298dSJorgen Hansen 			caps_in_use |= VMCI_CAPS_DMA_DATAGRAM;
719eed2298dSJorgen Hansen 		} else {
720eed2298dSJorgen Hansen 			dev_err(&pdev->dev,
721eed2298dSJorgen Hansen 				"Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
722eed2298dSJorgen Hansen 			error = -ENXIO;
723*c8e9b30cSVishnu Dasa 			goto err_free_notification_bitmap;
724eed2298dSJorgen Hansen 		}
725eed2298dSJorgen Hansen 	}
726eed2298dSJorgen Hansen 
727f2db7361SVishnu DASA 	dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use);
7281f166439SGeorge Zhang 
7291f166439SGeorge Zhang 	/* Let the host know which capabilities we intend to use. */
730e283a0e8SJorgen Hansen 	vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
7311f166439SGeorge Zhang 
7325ee10982SJorgen Hansen 	if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
7338cb520beSJorgen Hansen 		/* Let the device know the size for pages passed down. */
7348cb520beSJorgen Hansen 		vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
7358cb520beSJorgen Hansen 
7365ee10982SJorgen Hansen 		/* Configure the high order parts of the data in/out buffers. */
7375ee10982SJorgen Hansen 		vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base),
7385ee10982SJorgen Hansen 			       VMCI_DATA_IN_HIGH_ADDR);
7395ee10982SJorgen Hansen 		vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base),
7405ee10982SJorgen Hansen 			       VMCI_DATA_OUT_HIGH_ADDR);
7415ee10982SJorgen Hansen 	}
7425ee10982SJorgen Hansen 
7431f166439SGeorge Zhang 	/* Set up global device so that we can start sending datagrams */
7441f166439SGeorge Zhang 	spin_lock_irq(&vmci_dev_spinlock);
7451f166439SGeorge Zhang 	vmci_dev_g = vmci_dev;
7466d6dfb4fSAndy King 	vmci_pdev = pdev;
7471f166439SGeorge Zhang 	spin_unlock_irq(&vmci_dev_spinlock);
7481f166439SGeorge Zhang 
7491f166439SGeorge Zhang 	/*
7501f166439SGeorge Zhang 	 * Register notification bitmap with device if that capability is
7511f166439SGeorge Zhang 	 * used.
7521f166439SGeorge Zhang 	 */
753f2db7361SVishnu DASA 	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) {
7546d6dfb4fSAndy King 		unsigned long bitmap_ppn =
7556d6dfb4fSAndy King 			vmci_dev->notification_base >> PAGE_SHIFT;
7561f166439SGeorge Zhang 		if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
7571f166439SGeorge Zhang 			dev_warn(&pdev->dev,
758f2db7361SVishnu DASA 				 "VMCI device unable to register notification bitmap with PPN 0x%lx\n",
759f2db7361SVishnu DASA 				 bitmap_ppn);
760782f2445SDmitry Torokhov 			error = -ENXIO;
7611f166439SGeorge Zhang 			goto err_remove_vmci_dev_g;
7621f166439SGeorge Zhang 		}
7631f166439SGeorge Zhang 	}
7641f166439SGeorge Zhang 
7651f166439SGeorge Zhang 	/* Check host capabilities. */
766782f2445SDmitry Torokhov 	error = vmci_check_host_caps(pdev);
767782f2445SDmitry Torokhov 	if (error)
76877e86161SChristophe JAILLET 		goto err_remove_vmci_dev_g;
7691f166439SGeorge Zhang 
7701f166439SGeorge Zhang 	/* Enable device. */
7711f166439SGeorge Zhang 
7721f166439SGeorge Zhang 	/*
7731f166439SGeorge Zhang 	 * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can
7741f166439SGeorge Zhang 	 * update the internal context id when needed.
7751f166439SGeorge Zhang 	 */
7761f166439SGeorge Zhang 	vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE,
7771f166439SGeorge Zhang 					vmci_guest_cid_update, NULL,
7781f166439SGeorge Zhang 					&ctx_update_sub_id);
7791f166439SGeorge Zhang 	if (vmci_err < VMCI_SUCCESS)
7801f166439SGeorge Zhang 		dev_warn(&pdev->dev,
7811f166439SGeorge Zhang 			 "Failed to subscribe to event (type=%d): %d\n",
7821f166439SGeorge Zhang 			 VMCI_EVENT_CTX_ID_UPDATE, vmci_err);
7831f166439SGeorge Zhang 
7841f166439SGeorge Zhang 	/*
7851f166439SGeorge Zhang 	 * Enable interrupts.  Try MSI-X first, then MSI, and then fallback on
7861f166439SGeorge Zhang 	 * legacy interrupts.
7871f166439SGeorge Zhang 	 */
788cc68f217SJorgen Hansen 	if (vmci_dev->mmio_base != NULL)
789cc68f217SJorgen Hansen 		num_irq_vectors = VMCI_MAX_INTRS;
790cc68f217SJorgen Hansen 	else
791cc68f217SJorgen Hansen 		num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION;
792cc68f217SJorgen Hansen 	error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors,
7933bb434cdSChristoph Hellwig 				      PCI_IRQ_MSIX);
794c3423563SChristoph Hellwig 	if (error < 0) {
7953bb434cdSChristoph Hellwig 		error = pci_alloc_irq_vectors(pdev, 1, 1,
7963bb434cdSChristoph Hellwig 				PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
797c3423563SChristoph Hellwig 		if (error < 0)
79877e86161SChristophe JAILLET 			goto err_unsubscribe_event;
7991f166439SGeorge Zhang 	} else {
8003bb434cdSChristoph Hellwig 		vmci_dev->exclusive_vectors = true;
8011f166439SGeorge Zhang 	}
8021f166439SGeorge Zhang 
8031f166439SGeorge Zhang 	/*
8041f166439SGeorge Zhang 	 * Request IRQ for legacy or MSI interrupts, or for first
8051f166439SGeorge Zhang 	 * MSI-X vector.
8061f166439SGeorge Zhang 	 */
8073bb434cdSChristoph Hellwig 	error = request_irq(pci_irq_vector(pdev, 0), vmci_interrupt,
8083bb434cdSChristoph Hellwig 			    IRQF_SHARED, KBUILD_MODNAME, vmci_dev);
8091f166439SGeorge Zhang 	if (error) {
8101f166439SGeorge Zhang 		dev_err(&pdev->dev, "Irq %u in use: %d\n",
8113bb434cdSChristoph Hellwig 			pci_irq_vector(pdev, 0), error);
8121f166439SGeorge Zhang 		goto err_disable_msi;
8131f166439SGeorge Zhang 	}
8141f166439SGeorge Zhang 
8151f166439SGeorge Zhang 	/*
8161f166439SGeorge Zhang 	 * For MSI-X with exclusive vectors we need to request an
8171f166439SGeorge Zhang 	 * interrupt for each vector so that we get a separate
8181f166439SGeorge Zhang 	 * interrupt handler routine.  This allows us to distinguish
8191f166439SGeorge Zhang 	 * between the vectors.
8201f166439SGeorge Zhang 	 */
8211f166439SGeorge Zhang 	if (vmci_dev->exclusive_vectors) {
8223bb434cdSChristoph Hellwig 		error = request_irq(pci_irq_vector(pdev, 1),
8231f166439SGeorge Zhang 				    vmci_interrupt_bm, 0, KBUILD_MODNAME,
8241f166439SGeorge Zhang 				    vmci_dev);
8251f166439SGeorge Zhang 		if (error) {
8261f166439SGeorge Zhang 			dev_err(&pdev->dev,
8271f166439SGeorge Zhang 				"Failed to allocate irq %u: %d\n",
8283bb434cdSChristoph Hellwig 				pci_irq_vector(pdev, 1), error);
8291f166439SGeorge Zhang 			goto err_free_irq;
8301f166439SGeorge Zhang 		}
831cc68f217SJorgen Hansen 		if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
832cc68f217SJorgen Hansen 			error = request_irq(pci_irq_vector(pdev, 2),
833cc68f217SJorgen Hansen 					    vmci_interrupt_dma_datagram,
834cc68f217SJorgen Hansen 					    0, KBUILD_MODNAME, vmci_dev);
835cc68f217SJorgen Hansen 			if (error) {
836cc68f217SJorgen Hansen 				dev_err(&pdev->dev,
837cc68f217SJorgen Hansen 					"Failed to allocate irq %u: %d\n",
838cc68f217SJorgen Hansen 					pci_irq_vector(pdev, 2), error);
839cc68f217SJorgen Hansen 				goto err_free_bm_irq;
840cc68f217SJorgen Hansen 			}
841cc68f217SJorgen Hansen 		}
8421f166439SGeorge Zhang 	}
8431f166439SGeorge Zhang 
8441f166439SGeorge Zhang 	dev_dbg(&pdev->dev, "Registered device\n");
8451f166439SGeorge Zhang 
8461f166439SGeorge Zhang 	atomic_inc(&vmci_num_guest_devices);
8471f166439SGeorge Zhang 
8481f166439SGeorge Zhang 	/* Enable specific interrupt bits. */
8491f166439SGeorge Zhang 	cmd = VMCI_IMR_DATAGRAM;
850f2db7361SVishnu DASA 	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
8511f166439SGeorge Zhang 		cmd |= VMCI_IMR_NOTIFICATION;
852cc68f217SJorgen Hansen 	if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM)
853cc68f217SJorgen Hansen 		cmd |= VMCI_IMR_DMA_DATAGRAM;
854e283a0e8SJorgen Hansen 	vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR);
8551f166439SGeorge Zhang 
8561f166439SGeorge Zhang 	/* Enable interrupts. */
857e283a0e8SJorgen Hansen 	vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR);
8581f166439SGeorge Zhang 
8591f166439SGeorge Zhang 	pci_set_drvdata(pdev, vmci_dev);
860b1bba80aSStefano Garzarella 
861b1bba80aSStefano Garzarella 	vmci_call_vsock_callback(false);
8621f166439SGeorge Zhang 	return 0;
8631f166439SGeorge Zhang 
864cc68f217SJorgen Hansen err_free_bm_irq:
8655df0e734SVishnu Dasa 	if (vmci_dev->exclusive_vectors)
866cc68f217SJorgen Hansen 		free_irq(pci_irq_vector(pdev, 1), vmci_dev);
8675df0e734SVishnu Dasa 
8681f166439SGeorge Zhang err_free_irq:
8693bb434cdSChristoph Hellwig 	free_irq(pci_irq_vector(pdev, 0), vmci_dev);
8701f166439SGeorge Zhang 	tasklet_kill(&vmci_dev->datagram_tasklet);
8711f166439SGeorge Zhang 	tasklet_kill(&vmci_dev->bm_tasklet);
8721f166439SGeorge Zhang 
8731f166439SGeorge Zhang err_disable_msi:
8743bb434cdSChristoph Hellwig 	pci_free_irq_vectors(pdev);
8751f166439SGeorge Zhang 
87677e86161SChristophe JAILLET err_unsubscribe_event:
8771f166439SGeorge Zhang 	vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
8781f166439SGeorge Zhang 	if (vmci_err < VMCI_SUCCESS)
8791f166439SGeorge Zhang 		dev_warn(&pdev->dev,
8801f166439SGeorge Zhang 			 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
8811f166439SGeorge Zhang 			 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
8821f166439SGeorge Zhang 
88377e86161SChristophe JAILLET err_remove_vmci_dev_g:
88477e86161SChristophe JAILLET 	spin_lock_irq(&vmci_dev_spinlock);
88577e86161SChristophe JAILLET 	vmci_pdev = NULL;
88677e86161SChristophe JAILLET 	vmci_dev_g = NULL;
88777e86161SChristophe JAILLET 	spin_unlock_irq(&vmci_dev_spinlock);
88877e86161SChristophe JAILLET 
889*c8e9b30cSVishnu Dasa err_free_notification_bitmap:
8901f166439SGeorge Zhang 	if (vmci_dev->notification_bitmap) {
891e283a0e8SJorgen Hansen 		vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
8926d6dfb4fSAndy King 		dma_free_coherent(&pdev->dev, PAGE_SIZE,
8936d6dfb4fSAndy King 				  vmci_dev->notification_bitmap,
8946d6dfb4fSAndy King 				  vmci_dev->notification_base);
8951f166439SGeorge Zhang 	}
8961f166439SGeorge Zhang 
8975ee10982SJorgen Hansen err_free_data_buffers:
8985ee10982SJorgen Hansen 	vmci_free_dg_buffers(vmci_dev);
8991f166439SGeorge Zhang 
9001f166439SGeorge Zhang 	/* The rest are managed resources and will be freed by PCI core */
9011f166439SGeorge Zhang 	return error;
9021f166439SGeorge Zhang }
9031f166439SGeorge Zhang 
9041f166439SGeorge Zhang static void vmci_guest_remove_device(struct pci_dev *pdev)
9051f166439SGeorge Zhang {
9061f166439SGeorge Zhang 	struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev);
9071f166439SGeorge Zhang 	int vmci_err;
9081f166439SGeorge Zhang 
9091f166439SGeorge Zhang 	dev_dbg(&pdev->dev, "Removing device\n");
9101f166439SGeorge Zhang 
9111f166439SGeorge Zhang 	atomic_dec(&vmci_num_guest_devices);
9121f166439SGeorge Zhang 
9131f166439SGeorge Zhang 	vmci_qp_guest_endpoints_exit();
9141f166439SGeorge Zhang 
9151f166439SGeorge Zhang 	vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
9161f166439SGeorge Zhang 	if (vmci_err < VMCI_SUCCESS)
9171f166439SGeorge Zhang 		dev_warn(&pdev->dev,
9181f166439SGeorge Zhang 			 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
9191f166439SGeorge Zhang 			 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
9201f166439SGeorge Zhang 
9211f166439SGeorge Zhang 	spin_lock_irq(&vmci_dev_spinlock);
9221f166439SGeorge Zhang 	vmci_dev_g = NULL;
9236d6dfb4fSAndy King 	vmci_pdev = NULL;
9241f166439SGeorge Zhang 	spin_unlock_irq(&vmci_dev_spinlock);
9251f166439SGeorge Zhang 
9261f166439SGeorge Zhang 	dev_dbg(&pdev->dev, "Resetting vmci device\n");
927e283a0e8SJorgen Hansen 	vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
9281f166439SGeorge Zhang 
9291f166439SGeorge Zhang 	/*
9301f166439SGeorge Zhang 	 * Free IRQ and then disable MSI/MSI-X as appropriate.  For
9311f166439SGeorge Zhang 	 * MSI-X, we might have multiple vectors, each with their own
9321f166439SGeorge Zhang 	 * IRQ, which we must free too.
9331f166439SGeorge Zhang 	 */
934cc68f217SJorgen Hansen 	if (vmci_dev->exclusive_vectors) {
9353bb434cdSChristoph Hellwig 		free_irq(pci_irq_vector(pdev, 1), vmci_dev);
936cc68f217SJorgen Hansen 		if (vmci_dev->mmio_base != NULL)
937cc68f217SJorgen Hansen 			free_irq(pci_irq_vector(pdev, 2), vmci_dev);
938cc68f217SJorgen Hansen 	}
9393bb434cdSChristoph Hellwig 	free_irq(pci_irq_vector(pdev, 0), vmci_dev);
9403bb434cdSChristoph Hellwig 	pci_free_irq_vectors(pdev);
9411f166439SGeorge Zhang 
9421f166439SGeorge Zhang 	tasklet_kill(&vmci_dev->datagram_tasklet);
9431f166439SGeorge Zhang 	tasklet_kill(&vmci_dev->bm_tasklet);
9441f166439SGeorge Zhang 
9451f166439SGeorge Zhang 	if (vmci_dev->notification_bitmap) {
9461f166439SGeorge Zhang 		/*
9471f166439SGeorge Zhang 		 * The device reset above cleared the bitmap state of the
9481f166439SGeorge Zhang 		 * device, so we can safely free it here.
9491f166439SGeorge Zhang 		 */
9501f166439SGeorge Zhang 
9516d6dfb4fSAndy King 		dma_free_coherent(&pdev->dev, PAGE_SIZE,
9526d6dfb4fSAndy King 				  vmci_dev->notification_bitmap,
9536d6dfb4fSAndy King 				  vmci_dev->notification_base);
9541f166439SGeorge Zhang 	}
9551f166439SGeorge Zhang 
9565ee10982SJorgen Hansen 	vmci_free_dg_buffers(vmci_dev);
9575ee10982SJorgen Hansen 
9585ee10982SJorgen Hansen 	if (vmci_dev->mmio_base != NULL)
9595ee10982SJorgen Hansen 		pci_iounmap(pdev, vmci_dev->mmio_base);
9601f166439SGeorge Zhang 
9611f166439SGeorge Zhang 	/* The rest are managed resources and will be freed by PCI core */
9621f166439SGeorge Zhang }
9631f166439SGeorge Zhang 
96432182cd3SGreg Kroah-Hartman static const struct pci_device_id vmci_ids[] = {
9651f166439SGeorge Zhang 	{ PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), },
9661f166439SGeorge Zhang 	{ 0 },
9671f166439SGeorge Zhang };
9681f166439SGeorge Zhang MODULE_DEVICE_TABLE(pci, vmci_ids);
9691f166439SGeorge Zhang 
9701f166439SGeorge Zhang static struct pci_driver vmci_guest_driver = {
9711f166439SGeorge Zhang 	.name		= KBUILD_MODNAME,
9721f166439SGeorge Zhang 	.id_table	= vmci_ids,
9731f166439SGeorge Zhang 	.probe		= vmci_guest_probe_device,
9741f166439SGeorge Zhang 	.remove		= vmci_guest_remove_device,
9751f166439SGeorge Zhang };
9761f166439SGeorge Zhang 
9771f166439SGeorge Zhang int __init vmci_guest_init(void)
9781f166439SGeorge Zhang {
9791f166439SGeorge Zhang 	return pci_register_driver(&vmci_guest_driver);
9801f166439SGeorge Zhang }
9811f166439SGeorge Zhang 
9821f166439SGeorge Zhang void __exit vmci_guest_exit(void)
9831f166439SGeorge Zhang {
9841f166439SGeorge Zhang 	pci_unregister_driver(&vmci_guest_driver);
9851f166439SGeorge Zhang }
986