xref: /openbmc/linux/arch/um/drivers/virt-pci.c (revision e5ab9eff)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2020 Intel Corporation
4  * Author: Johannes Berg <johannes@sipsolutions.net>
5  */
6 #include <linux/module.h>
7 #include <linux/pci.h>
8 #include <linux/virtio.h>
9 #include <linux/virtio_config.h>
10 #include <linux/logic_iomem.h>
11 #include <linux/of_platform.h>
12 #include <linux/irqdomain.h>
13 #include <linux/virtio_pcidev.h>
14 #include <linux/virtio-uml.h>
15 #include <linux/delay.h>
16 #include <linux/msi.h>
17 #include <asm/unaligned.h>
18 #include <irq_kern.h>
19 
20 #define MAX_DEVICES 8
21 #define MAX_MSI_VECTORS 32
22 #define CFG_SPACE_SIZE 4096
23 
24 /* for MSI-X we have a 32-bit payload */
25 #define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32))
26 #define NUM_IRQ_MSGS	10
27 
28 #define HANDLE_NO_FREE(ptr) ((void *)((unsigned long)(ptr) | 1))
29 #define HANDLE_IS_NO_FREE(ptr) ((unsigned long)(ptr) & 1)
30 
31 struct um_pci_device {
32 	struct virtio_device *vdev;
33 
34 	/* for now just standard BARs */
35 	u8 resptr[PCI_STD_NUM_BARS];
36 
37 	struct virtqueue *cmd_vq, *irq_vq;
38 
39 #define UM_PCI_STAT_WAITING	0
40 	unsigned long status;
41 
42 	int irq;
43 
44 	bool platform;
45 };
46 
47 struct um_pci_device_reg {
48 	struct um_pci_device *dev;
49 	void __iomem *iomem;
50 };
51 
52 static struct pci_host_bridge *bridge;
53 static DEFINE_MUTEX(um_pci_mtx);
54 static struct um_pci_device *um_pci_platform_device;
55 static struct um_pci_device_reg um_pci_devices[MAX_DEVICES];
56 static struct fwnode_handle *um_pci_fwnode;
57 static struct irq_domain *um_pci_inner_domain;
58 static struct irq_domain *um_pci_msi_domain;
59 static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)];
60 
61 static unsigned int um_pci_max_delay_us = 40000;
62 module_param_named(max_delay_us, um_pci_max_delay_us, uint, 0644);
63 
64 struct um_pci_message_buffer {
65 	struct virtio_pcidev_msg hdr;
66 	u8 data[8];
67 };
68 
69 static struct um_pci_message_buffer __percpu *um_pci_msg_bufs;
70 
71 static int um_pci_send_cmd(struct um_pci_device *dev,
72 			   struct virtio_pcidev_msg *cmd,
73 			   unsigned int cmd_size,
74 			   const void *extra, unsigned int extra_size,
75 			   void *out, unsigned int out_size)
76 {
77 	struct scatterlist out_sg, extra_sg, in_sg;
78 	struct scatterlist *sgs_list[] = {
79 		[0] = &out_sg,
80 		[1] = extra ? &extra_sg : &in_sg,
81 		[2] = extra ? &in_sg : NULL,
82 	};
83 	struct um_pci_message_buffer *buf;
84 	int delay_count = 0;
85 	int ret, len;
86 	bool posted;
87 
88 	if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf)))
89 		return -EINVAL;
90 
91 	switch (cmd->op) {
92 	case VIRTIO_PCIDEV_OP_CFG_WRITE:
93 	case VIRTIO_PCIDEV_OP_MMIO_WRITE:
94 	case VIRTIO_PCIDEV_OP_MMIO_MEMSET:
95 		/* in PCI, writes are posted, so don't wait */
96 		posted = !out;
97 		WARN_ON(!posted);
98 		break;
99 	default:
100 		posted = false;
101 		break;
102 	}
103 
104 	buf = get_cpu_var(um_pci_msg_bufs);
105 	if (buf)
106 		memcpy(buf, cmd, cmd_size);
107 
108 	if (posted) {
109 		u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC);
110 
111 		if (ncmd) {
112 			memcpy(ncmd, cmd, cmd_size);
113 			if (extra)
114 				memcpy(ncmd + cmd_size, extra, extra_size);
115 			cmd = (void *)ncmd;
116 			cmd_size += extra_size;
117 			extra = NULL;
118 			extra_size = 0;
119 		} else {
120 			/* try without allocating memory */
121 			posted = false;
122 			cmd = (void *)buf;
123 		}
124 	} else {
125 		cmd = (void *)buf;
126 	}
127 
128 	sg_init_one(&out_sg, cmd, cmd_size);
129 	if (extra)
130 		sg_init_one(&extra_sg, extra, extra_size);
131 	if (out)
132 		sg_init_one(&in_sg, out, out_size);
133 
134 	/* add to internal virtio queue */
135 	ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list,
136 				extra ? 2 : 1,
137 				out ? 1 : 0,
138 				posted ? cmd : HANDLE_NO_FREE(cmd),
139 				GFP_ATOMIC);
140 	if (ret) {
141 		if (posted)
142 			kfree(cmd);
143 		goto out;
144 	}
145 
146 	if (posted) {
147 		virtqueue_kick(dev->cmd_vq);
148 		ret = 0;
149 		goto out;
150 	}
151 
152 	/* kick and poll for getting a response on the queue */
153 	set_bit(UM_PCI_STAT_WAITING, &dev->status);
154 	virtqueue_kick(dev->cmd_vq);
155 
156 	while (1) {
157 		void *completed = virtqueue_get_buf(dev->cmd_vq, &len);
158 
159 		if (completed == HANDLE_NO_FREE(cmd))
160 			break;
161 
162 		if (completed && !HANDLE_IS_NO_FREE(completed))
163 			kfree(completed);
164 
165 		if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) ||
166 			      ++delay_count > um_pci_max_delay_us,
167 			      "um virt-pci delay: %d", delay_count)) {
168 			ret = -EIO;
169 			break;
170 		}
171 		udelay(1);
172 	}
173 	clear_bit(UM_PCI_STAT_WAITING, &dev->status);
174 
175 out:
176 	put_cpu_var(um_pci_msg_bufs);
177 	return ret;
178 }
179 
180 static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
181 					  int size)
182 {
183 	struct um_pci_device_reg *reg = priv;
184 	struct um_pci_device *dev = reg->dev;
185 	struct virtio_pcidev_msg hdr = {
186 		.op = VIRTIO_PCIDEV_OP_CFG_READ,
187 		.size = size,
188 		.addr = offset,
189 	};
190 	/* buf->data is maximum size - we may only use parts of it */
191 	struct um_pci_message_buffer *buf;
192 	u8 *data;
193 	unsigned long ret = ULONG_MAX;
194 	size_t bytes = sizeof(buf->data);
195 
196 	if (!dev)
197 		return ULONG_MAX;
198 
199 	buf = get_cpu_var(um_pci_msg_bufs);
200 	data = buf->data;
201 
202 	if (buf)
203 		memset(data, 0xff, bytes);
204 
205 	switch (size) {
206 	case 1:
207 	case 2:
208 	case 4:
209 #ifdef CONFIG_64BIT
210 	case 8:
211 #endif
212 		break;
213 	default:
214 		WARN(1, "invalid config space read size %d\n", size);
215 		goto out;
216 	}
217 
218 	if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, bytes))
219 		goto out;
220 
221 	switch (size) {
222 	case 1:
223 		ret = data[0];
224 		break;
225 	case 2:
226 		ret = le16_to_cpup((void *)data);
227 		break;
228 	case 4:
229 		ret = le32_to_cpup((void *)data);
230 		break;
231 #ifdef CONFIG_64BIT
232 	case 8:
233 		ret = le64_to_cpup((void *)data);
234 		break;
235 #endif
236 	default:
237 		break;
238 	}
239 
240 out:
241 	put_cpu_var(um_pci_msg_bufs);
242 	return ret;
243 }
244 
245 static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size,
246 				  unsigned long val)
247 {
248 	struct um_pci_device_reg *reg = priv;
249 	struct um_pci_device *dev = reg->dev;
250 	struct {
251 		struct virtio_pcidev_msg hdr;
252 		/* maximum size - we may only use parts of it */
253 		u8 data[8];
254 	} msg = {
255 		.hdr = {
256 			.op = VIRTIO_PCIDEV_OP_CFG_WRITE,
257 			.size = size,
258 			.addr = offset,
259 		},
260 	};
261 
262 	if (!dev)
263 		return;
264 
265 	switch (size) {
266 	case 1:
267 		msg.data[0] = (u8)val;
268 		break;
269 	case 2:
270 		put_unaligned_le16(val, (void *)msg.data);
271 		break;
272 	case 4:
273 		put_unaligned_le32(val, (void *)msg.data);
274 		break;
275 #ifdef CONFIG_64BIT
276 	case 8:
277 		put_unaligned_le64(val, (void *)msg.data);
278 		break;
279 #endif
280 	default:
281 		WARN(1, "invalid config space write size %d\n", size);
282 		return;
283 	}
284 
285 	WARN_ON(um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0));
286 }
287 
288 static const struct logic_iomem_ops um_pci_device_cfgspace_ops = {
289 	.read = um_pci_cfgspace_read,
290 	.write = um_pci_cfgspace_write,
291 };
292 
293 static void um_pci_bar_copy_from(void *priv, void *buffer,
294 				 unsigned int offset, int size)
295 {
296 	u8 *resptr = priv;
297 	struct um_pci_device *dev = container_of(resptr - *resptr,
298 						 struct um_pci_device,
299 						 resptr[0]);
300 	struct virtio_pcidev_msg hdr = {
301 		.op = VIRTIO_PCIDEV_OP_MMIO_READ,
302 		.bar = *resptr,
303 		.size = size,
304 		.addr = offset,
305 	};
306 
307 	memset(buffer, 0xff, size);
308 
309 	um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size);
310 }
311 
312 static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
313 				     int size)
314 {
315 	/* buf->data is maximum size - we may only use parts of it */
316 	struct um_pci_message_buffer *buf;
317 	u8 *data;
318 	unsigned long ret = ULONG_MAX;
319 
320 	buf = get_cpu_var(um_pci_msg_bufs);
321 	data = buf->data;
322 
323 	switch (size) {
324 	case 1:
325 	case 2:
326 	case 4:
327 #ifdef CONFIG_64BIT
328 	case 8:
329 #endif
330 		break;
331 	default:
332 		WARN(1, "invalid config space read size %d\n", size);
333 		goto out;
334 	}
335 
336 	um_pci_bar_copy_from(priv, data, offset, size);
337 
338 	switch (size) {
339 	case 1:
340 		ret = data[0];
341 		break;
342 	case 2:
343 		ret = le16_to_cpup((void *)data);
344 		break;
345 	case 4:
346 		ret = le32_to_cpup((void *)data);
347 		break;
348 #ifdef CONFIG_64BIT
349 	case 8:
350 		ret = le64_to_cpup((void *)data);
351 		break;
352 #endif
353 	default:
354 		break;
355 	}
356 
357 out:
358 	put_cpu_var(um_pci_msg_bufs);
359 	return ret;
360 }
361 
362 static void um_pci_bar_copy_to(void *priv, unsigned int offset,
363 			       const void *buffer, int size)
364 {
365 	u8 *resptr = priv;
366 	struct um_pci_device *dev = container_of(resptr - *resptr,
367 						 struct um_pci_device,
368 						 resptr[0]);
369 	struct virtio_pcidev_msg hdr = {
370 		.op = VIRTIO_PCIDEV_OP_MMIO_WRITE,
371 		.bar = *resptr,
372 		.size = size,
373 		.addr = offset,
374 	};
375 
376 	um_pci_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0);
377 }
378 
379 static void um_pci_bar_write(void *priv, unsigned int offset, int size,
380 			     unsigned long val)
381 {
382 	/* maximum size - we may only use parts of it */
383 	u8 data[8];
384 
385 	switch (size) {
386 	case 1:
387 		data[0] = (u8)val;
388 		break;
389 	case 2:
390 		put_unaligned_le16(val, (void *)data);
391 		break;
392 	case 4:
393 		put_unaligned_le32(val, (void *)data);
394 		break;
395 #ifdef CONFIG_64BIT
396 	case 8:
397 		put_unaligned_le64(val, (void *)data);
398 		break;
399 #endif
400 	default:
401 		WARN(1, "invalid config space write size %d\n", size);
402 		return;
403 	}
404 
405 	um_pci_bar_copy_to(priv, offset, data, size);
406 }
407 
408 static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size)
409 {
410 	u8 *resptr = priv;
411 	struct um_pci_device *dev = container_of(resptr - *resptr,
412 						 struct um_pci_device,
413 						 resptr[0]);
414 	struct {
415 		struct virtio_pcidev_msg hdr;
416 		u8 data;
417 	} msg = {
418 		.hdr = {
419 			.op = VIRTIO_PCIDEV_OP_CFG_WRITE,
420 			.bar = *resptr,
421 			.size = size,
422 			.addr = offset,
423 		},
424 		.data = value,
425 	};
426 
427 	um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0);
428 }
429 
430 static const struct logic_iomem_ops um_pci_device_bar_ops = {
431 	.read = um_pci_bar_read,
432 	.write = um_pci_bar_write,
433 	.set = um_pci_bar_set,
434 	.copy_from = um_pci_bar_copy_from,
435 	.copy_to = um_pci_bar_copy_to,
436 };
437 
438 static void __iomem *um_pci_map_bus(struct pci_bus *bus, unsigned int devfn,
439 				    int where)
440 {
441 	struct um_pci_device_reg *dev;
442 	unsigned int busn = bus->number;
443 
444 	if (busn > 0)
445 		return NULL;
446 
447 	/* not allowing functions for now ... */
448 	if (devfn % 8)
449 		return NULL;
450 
451 	if (devfn / 8 >= ARRAY_SIZE(um_pci_devices))
452 		return NULL;
453 
454 	dev = &um_pci_devices[devfn / 8];
455 	if (!dev)
456 		return NULL;
457 
458 	return (void __iomem *)((unsigned long)dev->iomem + where);
459 }
460 
461 static struct pci_ops um_pci_ops = {
462 	.map_bus = um_pci_map_bus,
463 	.read = pci_generic_config_read,
464 	.write = pci_generic_config_write,
465 };
466 
467 static void um_pci_rescan(void)
468 {
469 	pci_lock_rescan_remove();
470 	pci_rescan_bus(bridge->bus);
471 	pci_unlock_rescan_remove();
472 }
473 
474 static void um_pci_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick)
475 {
476 	struct scatterlist sg[1];
477 
478 	sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE);
479 	if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC))
480 		kfree(buf);
481 	else if (kick)
482 		virtqueue_kick(vq);
483 }
484 
485 static void um_pci_handle_irq_message(struct virtqueue *vq,
486 				      struct virtio_pcidev_msg *msg)
487 {
488 	struct virtio_device *vdev = vq->vdev;
489 	struct um_pci_device *dev = vdev->priv;
490 
491 	if (!dev->irq)
492 		return;
493 
494 	/* we should properly chain interrupts, but on ARCH=um we don't care */
495 
496 	switch (msg->op) {
497 	case VIRTIO_PCIDEV_OP_INT:
498 		generic_handle_irq(dev->irq);
499 		break;
500 	case VIRTIO_PCIDEV_OP_MSI:
501 		/* our MSI message is just the interrupt number */
502 		if (msg->size == sizeof(u32))
503 			generic_handle_irq(le32_to_cpup((void *)msg->data));
504 		else
505 			generic_handle_irq(le16_to_cpup((void *)msg->data));
506 		break;
507 	case VIRTIO_PCIDEV_OP_PME:
508 		/* nothing to do - we already woke up due to the message */
509 		break;
510 	default:
511 		dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op);
512 		break;
513 	}
514 }
515 
516 static void um_pci_cmd_vq_cb(struct virtqueue *vq)
517 {
518 	struct virtio_device *vdev = vq->vdev;
519 	struct um_pci_device *dev = vdev->priv;
520 	void *cmd;
521 	int len;
522 
523 	if (test_bit(UM_PCI_STAT_WAITING, &dev->status))
524 		return;
525 
526 	while ((cmd = virtqueue_get_buf(vq, &len))) {
527 		if (WARN_ON(HANDLE_IS_NO_FREE(cmd)))
528 			continue;
529 		kfree(cmd);
530 	}
531 }
532 
533 static void um_pci_irq_vq_cb(struct virtqueue *vq)
534 {
535 	struct virtio_pcidev_msg *msg;
536 	int len;
537 
538 	while ((msg = virtqueue_get_buf(vq, &len))) {
539 		if (len >= sizeof(*msg))
540 			um_pci_handle_irq_message(vq, msg);
541 
542 		/* recycle the message buffer */
543 		um_pci_irq_vq_addbuf(vq, msg, true);
544 	}
545 }
546 
547 /* Copied from arch/x86/kernel/devicetree.c */
548 struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
549 {
550 	struct device_node *np;
551 
552 	for_each_node_by_type(np, "pci") {
553 		const void *prop;
554 		unsigned int bus_min;
555 
556 		prop = of_get_property(np, "bus-range", NULL);
557 		if (!prop)
558 			continue;
559 		bus_min = be32_to_cpup(prop);
560 		if (bus->number == bus_min)
561 			return np;
562 	}
563 	return NULL;
564 }
565 
566 static int um_pci_init_vqs(struct um_pci_device *dev)
567 {
568 	struct virtqueue *vqs[2];
569 	static const char *const names[2] = { "cmd", "irq" };
570 	vq_callback_t *cbs[2] = { um_pci_cmd_vq_cb, um_pci_irq_vq_cb };
571 	int err, i;
572 
573 	err = virtio_find_vqs(dev->vdev, 2, vqs, cbs, names, NULL);
574 	if (err)
575 		return err;
576 
577 	dev->cmd_vq = vqs[0];
578 	dev->irq_vq = vqs[1];
579 
580 	virtio_device_ready(dev->vdev);
581 
582 	for (i = 0; i < NUM_IRQ_MSGS; i++) {
583 		void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL);
584 
585 		if (msg)
586 			um_pci_irq_vq_addbuf(dev->irq_vq, msg, false);
587 	}
588 
589 	virtqueue_kick(dev->irq_vq);
590 
591 	return 0;
592 }
593 
594 static void __um_pci_virtio_platform_remove(struct virtio_device *vdev,
595 					    struct um_pci_device *dev)
596 {
597 	virtio_reset_device(vdev);
598 	vdev->config->del_vqs(vdev);
599 
600 	mutex_lock(&um_pci_mtx);
601 	um_pci_platform_device = NULL;
602 	mutex_unlock(&um_pci_mtx);
603 
604 	kfree(dev);
605 }
606 
607 static int um_pci_virtio_platform_probe(struct virtio_device *vdev,
608 					struct um_pci_device *dev)
609 {
610 	int ret;
611 
612 	dev->platform = true;
613 
614 	mutex_lock(&um_pci_mtx);
615 
616 	if (um_pci_platform_device) {
617 		mutex_unlock(&um_pci_mtx);
618 		ret = -EBUSY;
619 		goto out_free;
620 	}
621 
622 	ret = um_pci_init_vqs(dev);
623 	if (ret) {
624 		mutex_unlock(&um_pci_mtx);
625 		goto out_free;
626 	}
627 
628 	um_pci_platform_device = dev;
629 
630 	mutex_unlock(&um_pci_mtx);
631 
632 	ret = of_platform_default_populate(vdev->dev.of_node, NULL, &vdev->dev);
633 	if (ret)
634 		__um_pci_virtio_platform_remove(vdev, dev);
635 
636 	return ret;
637 
638 out_free:
639 	kfree(dev);
640 	return ret;
641 }
642 
643 static int um_pci_virtio_probe(struct virtio_device *vdev)
644 {
645 	struct um_pci_device *dev;
646 	int i, free = -1;
647 	int err = -ENOSPC;
648 
649 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
650 	if (!dev)
651 		return -ENOMEM;
652 
653 	dev->vdev = vdev;
654 	vdev->priv = dev;
655 
656 	if (of_device_is_compatible(vdev->dev.of_node, "simple-bus"))
657 		return um_pci_virtio_platform_probe(vdev, dev);
658 
659 	mutex_lock(&um_pci_mtx);
660 	for (i = 0; i < MAX_DEVICES; i++) {
661 		if (um_pci_devices[i].dev)
662 			continue;
663 		free = i;
664 		break;
665 	}
666 
667 	if (free < 0)
668 		goto error;
669 
670 	err = um_pci_init_vqs(dev);
671 	if (err)
672 		goto error;
673 
674 	dev->irq = irq_alloc_desc(numa_node_id());
675 	if (dev->irq < 0) {
676 		err = dev->irq;
677 		goto err_reset;
678 	}
679 	um_pci_devices[free].dev = dev;
680 	vdev->priv = dev;
681 
682 	mutex_unlock(&um_pci_mtx);
683 
684 	device_set_wakeup_enable(&vdev->dev, true);
685 
686 	/*
687 	 * In order to do suspend-resume properly, don't allow VQs
688 	 * to be suspended.
689 	 */
690 	virtio_uml_set_no_vq_suspend(vdev, true);
691 
692 	um_pci_rescan();
693 	return 0;
694 err_reset:
695 	virtio_reset_device(vdev);
696 	vdev->config->del_vqs(vdev);
697 error:
698 	mutex_unlock(&um_pci_mtx);
699 	kfree(dev);
700 	return err;
701 }
702 
703 static void um_pci_virtio_remove(struct virtio_device *vdev)
704 {
705 	struct um_pci_device *dev = vdev->priv;
706 	int i;
707 
708 	if (dev->platform) {
709 		of_platform_depopulate(&vdev->dev);
710 		__um_pci_virtio_platform_remove(vdev, dev);
711 		return;
712 	}
713 
714 	device_set_wakeup_enable(&vdev->dev, false);
715 
716 	mutex_lock(&um_pci_mtx);
717 	for (i = 0; i < MAX_DEVICES; i++) {
718 		if (um_pci_devices[i].dev != dev)
719 			continue;
720 
721 		um_pci_devices[i].dev = NULL;
722 		irq_free_desc(dev->irq);
723 
724 		break;
725 	}
726 	mutex_unlock(&um_pci_mtx);
727 
728 	if (i < MAX_DEVICES) {
729 		struct pci_dev *pci_dev;
730 
731 		pci_dev = pci_get_slot(bridge->bus, i);
732 		if (pci_dev)
733 			pci_stop_and_remove_bus_device_locked(pci_dev);
734 	}
735 
736 	/* Stop all virtqueues */
737 	virtio_reset_device(vdev);
738 	dev->cmd_vq = NULL;
739 	dev->irq_vq = NULL;
740 	vdev->config->del_vqs(vdev);
741 
742 	kfree(dev);
743 }
744 
745 static struct virtio_device_id id_table[] = {
746 	{ CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID },
747 	{ 0 },
748 };
749 MODULE_DEVICE_TABLE(virtio, id_table);
750 
751 static struct virtio_driver um_pci_virtio_driver = {
752 	.driver.name = "virtio-pci",
753 	.driver.owner = THIS_MODULE,
754 	.id_table = id_table,
755 	.probe = um_pci_virtio_probe,
756 	.remove = um_pci_virtio_remove,
757 };
758 
759 static struct resource virt_cfgspace_resource = {
760 	.name = "PCI config space",
761 	.start = 0xf0000000 - MAX_DEVICES * CFG_SPACE_SIZE,
762 	.end = 0xf0000000 - 1,
763 	.flags = IORESOURCE_MEM,
764 };
765 
766 static long um_pci_map_cfgspace(unsigned long offset, size_t size,
767 				const struct logic_iomem_ops **ops,
768 				void **priv)
769 {
770 	if (WARN_ON(size > CFG_SPACE_SIZE || offset % CFG_SPACE_SIZE))
771 		return -EINVAL;
772 
773 	if (offset / CFG_SPACE_SIZE < MAX_DEVICES) {
774 		*ops = &um_pci_device_cfgspace_ops;
775 		*priv = &um_pci_devices[offset / CFG_SPACE_SIZE];
776 		return 0;
777 	}
778 
779 	WARN(1, "cannot map offset 0x%lx/0x%zx\n", offset, size);
780 	return -ENOENT;
781 }
782 
783 static const struct logic_iomem_region_ops um_pci_cfgspace_ops = {
784 	.map = um_pci_map_cfgspace,
785 };
786 
787 static struct resource virt_iomem_resource = {
788 	.name = "PCI iomem",
789 	.start = 0xf0000000,
790 	.end = 0xffffffff,
791 	.flags = IORESOURCE_MEM,
792 };
793 
794 struct um_pci_map_iomem_data {
795 	unsigned long offset;
796 	size_t size;
797 	const struct logic_iomem_ops **ops;
798 	void **priv;
799 	long ret;
800 };
801 
802 static int um_pci_map_iomem_walk(struct pci_dev *pdev, void *_data)
803 {
804 	struct um_pci_map_iomem_data *data = _data;
805 	struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8];
806 	struct um_pci_device *dev;
807 	int i;
808 
809 	if (!reg->dev)
810 		return 0;
811 
812 	for (i = 0; i < ARRAY_SIZE(dev->resptr); i++) {
813 		struct resource *r = &pdev->resource[i];
814 
815 		if ((r->flags & IORESOURCE_TYPE_BITS) != IORESOURCE_MEM)
816 			continue;
817 
818 		/*
819 		 * must be the whole or part of the resource,
820 		 * not allowed to only overlap
821 		 */
822 		if (data->offset < r->start || data->offset > r->end)
823 			continue;
824 		if (data->offset + data->size - 1 > r->end)
825 			continue;
826 
827 		dev = reg->dev;
828 		*data->ops = &um_pci_device_bar_ops;
829 		dev->resptr[i] = i;
830 		*data->priv = &dev->resptr[i];
831 		data->ret = data->offset - r->start;
832 
833 		/* no need to continue */
834 		return 1;
835 	}
836 
837 	return 0;
838 }
839 
840 static long um_pci_map_iomem(unsigned long offset, size_t size,
841 			     const struct logic_iomem_ops **ops,
842 			     void **priv)
843 {
844 	struct um_pci_map_iomem_data data = {
845 		/* we want the full address here */
846 		.offset = offset + virt_iomem_resource.start,
847 		.size = size,
848 		.ops = ops,
849 		.priv = priv,
850 		.ret = -ENOENT,
851 	};
852 
853 	pci_walk_bus(bridge->bus, um_pci_map_iomem_walk, &data);
854 	return data.ret;
855 }
856 
857 static const struct logic_iomem_region_ops um_pci_iomem_ops = {
858 	.map = um_pci_map_iomem,
859 };
860 
861 static void um_pci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
862 {
863 	/*
864 	 * This is a very low address and not actually valid 'physical' memory
865 	 * in UML, so we can simply map MSI(-X) vectors to there, it cannot be
866 	 * legitimately written to by the device in any other way.
867 	 * We use the (virtual) IRQ number here as the message to simplify the
868 	 * code that receives the message, where for now we simply trust the
869 	 * device to send the correct message.
870 	 */
871 	msg->address_hi = 0;
872 	msg->address_lo = 0xa0000;
873 	msg->data = data->irq;
874 }
875 
876 static struct irq_chip um_pci_msi_bottom_irq_chip = {
877 	.name = "UM virtio MSI",
878 	.irq_compose_msi_msg = um_pci_compose_msi_msg,
879 };
880 
881 static int um_pci_inner_domain_alloc(struct irq_domain *domain,
882 				     unsigned int virq, unsigned int nr_irqs,
883 				     void *args)
884 {
885 	unsigned long bit;
886 
887 	WARN_ON(nr_irqs != 1);
888 
889 	mutex_lock(&um_pci_mtx);
890 	bit = find_first_zero_bit(um_pci_msi_used, MAX_MSI_VECTORS);
891 	if (bit >= MAX_MSI_VECTORS) {
892 		mutex_unlock(&um_pci_mtx);
893 		return -ENOSPC;
894 	}
895 
896 	set_bit(bit, um_pci_msi_used);
897 	mutex_unlock(&um_pci_mtx);
898 
899 	irq_domain_set_info(domain, virq, bit, &um_pci_msi_bottom_irq_chip,
900 			    domain->host_data, handle_simple_irq,
901 			    NULL, NULL);
902 
903 	return 0;
904 }
905 
906 static void um_pci_inner_domain_free(struct irq_domain *domain,
907 				     unsigned int virq, unsigned int nr_irqs)
908 {
909 	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
910 
911 	mutex_lock(&um_pci_mtx);
912 
913 	if (!test_bit(d->hwirq, um_pci_msi_used))
914 		pr_err("trying to free unused MSI#%lu\n", d->hwirq);
915 	else
916 		__clear_bit(d->hwirq, um_pci_msi_used);
917 
918 	mutex_unlock(&um_pci_mtx);
919 }
920 
921 static const struct irq_domain_ops um_pci_inner_domain_ops = {
922 	.alloc = um_pci_inner_domain_alloc,
923 	.free = um_pci_inner_domain_free,
924 };
925 
926 static struct irq_chip um_pci_msi_irq_chip = {
927 	.name = "UM virtio PCIe MSI",
928 	.irq_mask = pci_msi_mask_irq,
929 	.irq_unmask = pci_msi_unmask_irq,
930 };
931 
932 static struct msi_domain_info um_pci_msi_domain_info = {
933 	.flags	= MSI_FLAG_USE_DEF_DOM_OPS |
934 		  MSI_FLAG_USE_DEF_CHIP_OPS |
935 		  MSI_FLAG_PCI_MSIX,
936 	.chip	= &um_pci_msi_irq_chip,
937 };
938 
939 static struct resource busn_resource = {
940 	.name	= "PCI busn",
941 	.start	= 0,
942 	.end	= 0,
943 	.flags	= IORESOURCE_BUS,
944 };
945 
946 static int um_pci_map_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
947 {
948 	struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8];
949 
950 	if (WARN_ON(!reg->dev))
951 		return -EINVAL;
952 
953 	/* Yes, we map all pins to the same IRQ ... doesn't matter for now. */
954 	return reg->dev->irq;
955 }
956 
957 void *pci_root_bus_fwnode(struct pci_bus *bus)
958 {
959 	return um_pci_fwnode;
960 }
961 
962 static long um_pci_map_platform(unsigned long offset, size_t size,
963 				const struct logic_iomem_ops **ops,
964 				void **priv)
965 {
966 	if (!um_pci_platform_device)
967 		return -ENOENT;
968 
969 	*ops = &um_pci_device_bar_ops;
970 	*priv = &um_pci_platform_device->resptr[0];
971 
972 	return 0;
973 }
974 
975 static const struct logic_iomem_region_ops um_pci_platform_ops = {
976 	.map = um_pci_map_platform,
977 };
978 
979 static struct resource virt_platform_resource = {
980 	.name = "platform",
981 	.start = 0x10000000,
982 	.end = 0x1fffffff,
983 	.flags = IORESOURCE_MEM,
984 };
985 
986 static int __init um_pci_init(void)
987 {
988 	int err, i;
989 
990 	WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource,
991 				       &um_pci_cfgspace_ops));
992 	WARN_ON(logic_iomem_add_region(&virt_iomem_resource,
993 				       &um_pci_iomem_ops));
994 	WARN_ON(logic_iomem_add_region(&virt_platform_resource,
995 				       &um_pci_platform_ops));
996 
997 	if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0,
998 		 "No virtio device ID configured for PCI - no PCI support\n"))
999 		return 0;
1000 
1001 	um_pci_msg_bufs = alloc_percpu(struct um_pci_message_buffer);
1002 	if (!um_pci_msg_bufs)
1003 		return -ENOMEM;
1004 
1005 	bridge = pci_alloc_host_bridge(0);
1006 	if (!bridge) {
1007 		err = -ENOMEM;
1008 		goto free;
1009 	}
1010 
1011 	um_pci_fwnode = irq_domain_alloc_named_fwnode("um-pci");
1012 	if (!um_pci_fwnode) {
1013 		err = -ENOMEM;
1014 		goto free;
1015 	}
1016 
1017 	um_pci_inner_domain = __irq_domain_add(um_pci_fwnode, MAX_MSI_VECTORS,
1018 					       MAX_MSI_VECTORS, 0,
1019 					       &um_pci_inner_domain_ops, NULL);
1020 	if (!um_pci_inner_domain) {
1021 		err = -ENOMEM;
1022 		goto free;
1023 	}
1024 
1025 	um_pci_msi_domain = pci_msi_create_irq_domain(um_pci_fwnode,
1026 						      &um_pci_msi_domain_info,
1027 						      um_pci_inner_domain);
1028 	if (!um_pci_msi_domain) {
1029 		err = -ENOMEM;
1030 		goto free;
1031 	}
1032 
1033 	pci_add_resource(&bridge->windows, &virt_iomem_resource);
1034 	pci_add_resource(&bridge->windows, &busn_resource);
1035 	bridge->ops = &um_pci_ops;
1036 	bridge->map_irq = um_pci_map_irq;
1037 
1038 	for (i = 0; i < MAX_DEVICES; i++) {
1039 		resource_size_t start;
1040 
1041 		start = virt_cfgspace_resource.start + i * CFG_SPACE_SIZE;
1042 		um_pci_devices[i].iomem = ioremap(start, CFG_SPACE_SIZE);
1043 		if (WARN(!um_pci_devices[i].iomem, "failed to map %d\n", i)) {
1044 			err = -ENOMEM;
1045 			goto free;
1046 		}
1047 	}
1048 
1049 	err = pci_host_probe(bridge);
1050 	if (err)
1051 		goto free;
1052 
1053 	err = register_virtio_driver(&um_pci_virtio_driver);
1054 	if (err)
1055 		goto free;
1056 	return 0;
1057 free:
1058 	if (um_pci_inner_domain)
1059 		irq_domain_remove(um_pci_inner_domain);
1060 	if (um_pci_fwnode)
1061 		irq_domain_free_fwnode(um_pci_fwnode);
1062 	if (bridge) {
1063 		pci_free_resource_list(&bridge->windows);
1064 		pci_free_host_bridge(bridge);
1065 	}
1066 	free_percpu(um_pci_msg_bufs);
1067 	return err;
1068 }
1069 module_init(um_pci_init);
1070 
1071 static void __exit um_pci_exit(void)
1072 {
1073 	unregister_virtio_driver(&um_pci_virtio_driver);
1074 	irq_domain_remove(um_pci_msi_domain);
1075 	irq_domain_remove(um_pci_inner_domain);
1076 	pci_free_resource_list(&bridge->windows);
1077 	pci_free_host_bridge(bridge);
1078 	free_percpu(um_pci_msg_bufs);
1079 }
1080 module_exit(um_pci_exit);
1081