xref: /openbmc/linux/drivers/xen/xen-pciback/pci_stub.c (revision 23cb0767f0544858169c02cec445d066d4e02e2b)
1 /*
2  * PCI Stub Driver - Grabs devices in backend to be exported later
3  *
4  * Ryan Wilson <hap9@epoch.ncsc.mil>
5  * Chris Bookholt <hap10@epoch.ncsc.mil>
6  */
7 
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 #define dev_fmt pr_fmt
10 
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/rwsem.h>
14 #include <linux/list.h>
15 #include <linux/spinlock.h>
16 #include <linux/kref.h>
17 #include <linux/pci.h>
18 #include <linux/wait.h>
19 #include <linux/sched.h>
20 #include <linux/atomic.h>
21 #include <xen/events.h>
22 #include <xen/pci.h>
23 #include <xen/xen.h>
24 #include <asm/xen/hypervisor.h>
25 #include <xen/interface/physdev.h>
26 #include "pciback.h"
27 #include "conf_space.h"
28 #include "conf_space_quirks.h"
29 
30 #define PCISTUB_DRIVER_NAME "pciback"
31 
32 static char *pci_devs_to_hide;
33 wait_queue_head_t xen_pcibk_aer_wait_queue;
34 /*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops,
35 * We want to avoid in middle of AER ops, xen_pcibk devices is being removed
36 */
37 static DECLARE_RWSEM(pcistub_sem);
38 module_param_named(hide, pci_devs_to_hide, charp, 0444);
39 
40 struct pcistub_device_id {
41 	struct list_head slot_list;
42 	int domain;
43 	unsigned char bus;
44 	unsigned int devfn;
45 };
46 static LIST_HEAD(pcistub_device_ids);
47 static DEFINE_SPINLOCK(device_ids_lock);
48 
49 struct pcistub_device {
50 	struct kref kref;
51 	struct list_head dev_list;
52 	spinlock_t lock;
53 
54 	struct pci_dev *dev;
55 	struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */
56 };
57 
58 /* Access to pcistub_devices & seized_devices lists and the initialize_devices
59  * flag must be locked with pcistub_devices_lock
60  */
61 static DEFINE_SPINLOCK(pcistub_devices_lock);
62 static LIST_HEAD(pcistub_devices);
63 
64 /* wait for device_initcall before initializing our devices
65  * (see pcistub_init_devices_late)
66  */
67 static int initialize_devices;
68 static LIST_HEAD(seized_devices);
69 
70 static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
71 {
72 	struct pcistub_device *psdev;
73 
74 	dev_dbg(&dev->dev, "pcistub_device_alloc\n");
75 
76 	psdev = kzalloc(sizeof(*psdev), GFP_KERNEL);
77 	if (!psdev)
78 		return NULL;
79 
80 	psdev->dev = pci_dev_get(dev);
81 	if (!psdev->dev) {
82 		kfree(psdev);
83 		return NULL;
84 	}
85 
86 	kref_init(&psdev->kref);
87 	spin_lock_init(&psdev->lock);
88 
89 	return psdev;
90 }
91 
92 /* Don't call this directly as it's called by pcistub_device_put */
93 static void pcistub_device_release(struct kref *kref)
94 {
95 	struct pcistub_device *psdev;
96 	struct pci_dev *dev;
97 	struct xen_pcibk_dev_data *dev_data;
98 
99 	psdev = container_of(kref, struct pcistub_device, kref);
100 	dev = psdev->dev;
101 	dev_data = pci_get_drvdata(dev);
102 
103 	dev_dbg(&dev->dev, "pcistub_device_release\n");
104 
105 	xen_unregister_device_domain_owner(dev);
106 
107 	/* Call the reset function which does not take lock as this
108 	 * is called from "unbind" which takes a device_lock mutex.
109 	 */
110 	__pci_reset_function_locked(dev);
111 	if (dev_data &&
112 	    pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state))
113 		dev_info(&dev->dev, "Could not reload PCI state\n");
114 	else
115 		pci_restore_state(dev);
116 
117 	if (dev->msix_cap) {
118 		struct physdev_pci_device ppdev = {
119 			.seg = pci_domain_nr(dev->bus),
120 			.bus = dev->bus->number,
121 			.devfn = dev->devfn
122 		};
123 		int err = HYPERVISOR_physdev_op(PHYSDEVOP_release_msix,
124 						&ppdev);
125 
126 		if (err && err != -ENOSYS)
127 			dev_warn(&dev->dev, "MSI-X release failed (%d)\n",
128 				 err);
129 	}
130 
131 	/* Disable the device */
132 	xen_pcibk_reset_device(dev);
133 
134 	kfree(dev_data);
135 	pci_set_drvdata(dev, NULL);
136 
137 	/* Clean-up the device */
138 	xen_pcibk_config_free_dyn_fields(dev);
139 	xen_pcibk_config_free_dev(dev);
140 
141 	pci_clear_dev_assigned(dev);
142 	pci_dev_put(dev);
143 
144 	kfree(psdev);
145 }
146 
147 static inline void pcistub_device_get(struct pcistub_device *psdev)
148 {
149 	kref_get(&psdev->kref);
150 }
151 
152 static inline void pcistub_device_put(struct pcistub_device *psdev)
153 {
154 	kref_put(&psdev->kref, pcistub_device_release);
155 }
156 
157 static struct pcistub_device *pcistub_device_find_locked(int domain, int bus,
158 							 int slot, int func)
159 {
160 	struct pcistub_device *psdev;
161 
162 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
163 		if (psdev->dev != NULL
164 		    && domain == pci_domain_nr(psdev->dev->bus)
165 		    && bus == psdev->dev->bus->number
166 		    && slot == PCI_SLOT(psdev->dev->devfn)
167 		    && func == PCI_FUNC(psdev->dev->devfn)) {
168 			return psdev;
169 		}
170 	}
171 
172 	return NULL;
173 }
174 
175 static struct pcistub_device *pcistub_device_find(int domain, int bus,
176 						  int slot, int func)
177 {
178 	struct pcistub_device *psdev;
179 	unsigned long flags;
180 
181 	spin_lock_irqsave(&pcistub_devices_lock, flags);
182 
183 	psdev = pcistub_device_find_locked(domain, bus, slot, func);
184 	if (psdev)
185 		pcistub_device_get(psdev);
186 
187 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
188 	return psdev;
189 }
190 
191 static struct pci_dev *pcistub_device_get_pci_dev(struct xen_pcibk_device *pdev,
192 						  struct pcistub_device *psdev)
193 {
194 	struct pci_dev *pci_dev = NULL;
195 	unsigned long flags;
196 
197 	pcistub_device_get(psdev);
198 
199 	spin_lock_irqsave(&psdev->lock, flags);
200 	if (!psdev->pdev) {
201 		psdev->pdev = pdev;
202 		pci_dev = psdev->dev;
203 	}
204 	spin_unlock_irqrestore(&psdev->lock, flags);
205 
206 	if (!pci_dev)
207 		pcistub_device_put(psdev);
208 
209 	return pci_dev;
210 }
211 
212 struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev,
213 					    int domain, int bus,
214 					    int slot, int func)
215 {
216 	struct pcistub_device *psdev;
217 	struct pci_dev *found_dev = NULL;
218 	unsigned long flags;
219 
220 	spin_lock_irqsave(&pcistub_devices_lock, flags);
221 
222 	psdev = pcistub_device_find_locked(domain, bus, slot, func);
223 	if (psdev)
224 		found_dev = pcistub_device_get_pci_dev(pdev, psdev);
225 
226 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
227 	return found_dev;
228 }
229 
230 struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
231 				    struct pci_dev *dev)
232 {
233 	struct pcistub_device *psdev;
234 	struct pci_dev *found_dev = NULL;
235 	unsigned long flags;
236 
237 	spin_lock_irqsave(&pcistub_devices_lock, flags);
238 
239 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
240 		if (psdev->dev == dev) {
241 			found_dev = pcistub_device_get_pci_dev(pdev, psdev);
242 			break;
243 		}
244 	}
245 
246 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
247 	return found_dev;
248 }
249 
250 /*
251  * Called when:
252  *  - XenBus state has been reconfigure (pci unplug). See xen_pcibk_remove_device
253  *  - XenBus state has been disconnected (guest shutdown). See xen_pcibk_xenbus_remove
254  *  - 'echo BDF > unbind' on pciback module with no guest attached. See pcistub_remove
255  *  - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
256  *
257  *  As such we have to be careful.
258  *
259  *  To make this easier, the caller has to hold the device lock.
260  */
261 void pcistub_put_pci_dev(struct pci_dev *dev)
262 {
263 	struct pcistub_device *psdev, *found_psdev = NULL;
264 	unsigned long flags;
265 	struct xen_pcibk_dev_data *dev_data;
266 	int ret;
267 
268 	spin_lock_irqsave(&pcistub_devices_lock, flags);
269 
270 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
271 		if (psdev->dev == dev) {
272 			found_psdev = psdev;
273 			break;
274 		}
275 	}
276 
277 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
278 	if (WARN_ON(!found_psdev))
279 		return;
280 
281 	/*hold this lock for avoiding breaking link between
282 	* pcistub and xen_pcibk when AER is in processing
283 	*/
284 	down_write(&pcistub_sem);
285 	/* Cleanup our device
286 	 * (so it's ready for the next domain)
287 	 */
288 	device_lock_assert(&dev->dev);
289 	__pci_reset_function_locked(dev);
290 
291 	dev_data = pci_get_drvdata(dev);
292 	ret = pci_load_saved_state(dev, dev_data->pci_saved_state);
293 	if (!ret) {
294 		/*
295 		 * The usual sequence is pci_save_state & pci_restore_state
296 		 * but the guest might have messed the configuration space up.
297 		 * Use the initial version (when device was bound to us).
298 		 */
299 		pci_restore_state(dev);
300 	} else
301 		dev_info(&dev->dev, "Could not reload PCI state\n");
302 	/* This disables the device. */
303 	xen_pcibk_reset_device(dev);
304 
305 	/* And cleanup up our emulated fields. */
306 	xen_pcibk_config_reset_dev(dev);
307 	xen_pcibk_config_free_dyn_fields(dev);
308 
309 	dev_data->allow_interrupt_control = 0;
310 
311 	xen_unregister_device_domain_owner(dev);
312 
313 	spin_lock_irqsave(&found_psdev->lock, flags);
314 	found_psdev->pdev = NULL;
315 	spin_unlock_irqrestore(&found_psdev->lock, flags);
316 
317 	pcistub_device_put(found_psdev);
318 	up_write(&pcistub_sem);
319 }
320 
321 static int pcistub_match_one(struct pci_dev *dev,
322 			     struct pcistub_device_id *pdev_id)
323 {
324 	/* Match the specified device by domain, bus, slot, func and also if
325 	 * any of the device's parent bridges match.
326 	 */
327 	for (; dev != NULL; dev = dev->bus->self) {
328 		if (pci_domain_nr(dev->bus) == pdev_id->domain
329 		    && dev->bus->number == pdev_id->bus
330 		    && dev->devfn == pdev_id->devfn)
331 			return 1;
332 
333 		/* Sometimes topmost bridge links to itself. */
334 		if (dev == dev->bus->self)
335 			break;
336 	}
337 
338 	return 0;
339 }
340 
341 static int pcistub_match(struct pci_dev *dev)
342 {
343 	struct pcistub_device_id *pdev_id;
344 	unsigned long flags;
345 	int found = 0;
346 
347 	spin_lock_irqsave(&device_ids_lock, flags);
348 	list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
349 		if (pcistub_match_one(dev, pdev_id)) {
350 			found = 1;
351 			break;
352 		}
353 	}
354 	spin_unlock_irqrestore(&device_ids_lock, flags);
355 
356 	return found;
357 }
358 
359 static int pcistub_init_device(struct pci_dev *dev)
360 {
361 	struct xen_pcibk_dev_data *dev_data;
362 	int err = 0;
363 
364 	dev_dbg(&dev->dev, "initializing...\n");
365 
366 	/* The PCI backend is not intended to be a module (or to work with
367 	 * removable PCI devices (yet). If it were, xen_pcibk_config_free()
368 	 * would need to be called somewhere to free the memory allocated
369 	 * here and then to call kfree(pci_get_drvdata(psdev->dev)).
370 	 */
371 	dev_data = kzalloc(sizeof(*dev_data) +  strlen(DRV_NAME "[]")
372 				+ strlen(pci_name(dev)) + 1, GFP_KERNEL);
373 	if (!dev_data) {
374 		err = -ENOMEM;
375 		goto out;
376 	}
377 	pci_set_drvdata(dev, dev_data);
378 
379 	/*
380 	 * Setup name for fake IRQ handler. It will only be enabled
381 	 * once the device is turned on by the guest.
382 	 */
383 	sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev));
384 
385 	dev_dbg(&dev->dev, "initializing config\n");
386 
387 	init_waitqueue_head(&xen_pcibk_aer_wait_queue);
388 	err = xen_pcibk_config_init_dev(dev);
389 	if (err)
390 		goto out;
391 
392 	/* HACK: Force device (& ACPI) to determine what IRQ it's on - we
393 	 * must do this here because pcibios_enable_device may specify
394 	 * the pci device's true irq (and possibly its other resources)
395 	 * if they differ from what's in the configuration space.
396 	 * This makes the assumption that the device's resources won't
397 	 * change after this point (otherwise this code may break!)
398 	 */
399 	dev_dbg(&dev->dev, "enabling device\n");
400 	err = pci_enable_device(dev);
401 	if (err)
402 		goto config_release;
403 
404 	if (dev->msix_cap) {
405 		struct physdev_pci_device ppdev = {
406 			.seg = pci_domain_nr(dev->bus),
407 			.bus = dev->bus->number,
408 			.devfn = dev->devfn
409 		};
410 
411 		err = HYPERVISOR_physdev_op(PHYSDEVOP_prepare_msix, &ppdev);
412 		if (err && err != -ENOSYS)
413 			dev_err(&dev->dev, "MSI-X preparation failed (%d)\n",
414 				err);
415 	}
416 
417 	/* We need the device active to save the state. */
418 	dev_dbg(&dev->dev, "save state of device\n");
419 	pci_save_state(dev);
420 	dev_data->pci_saved_state = pci_store_saved_state(dev);
421 	if (!dev_data->pci_saved_state)
422 		dev_err(&dev->dev, "Could not store PCI conf saved state!\n");
423 	else {
424 		dev_dbg(&dev->dev, "resetting (FLR, D3, etc) the device\n");
425 		__pci_reset_function_locked(dev);
426 		pci_restore_state(dev);
427 	}
428 	/* Now disable the device (this also ensures some private device
429 	 * data is setup before we export)
430 	 */
431 	dev_dbg(&dev->dev, "reset device\n");
432 	xen_pcibk_reset_device(dev);
433 
434 	pci_set_dev_assigned(dev);
435 	return 0;
436 
437 config_release:
438 	xen_pcibk_config_free_dev(dev);
439 
440 out:
441 	pci_set_drvdata(dev, NULL);
442 	kfree(dev_data);
443 	return err;
444 }
445 
446 /*
447  * Because some initialization still happens on
448  * devices during fs_initcall, we need to defer
449  * full initialization of our devices until
450  * device_initcall.
451  */
452 static int __init pcistub_init_devices_late(void)
453 {
454 	struct pcistub_device *psdev;
455 	unsigned long flags;
456 	int err = 0;
457 
458 	spin_lock_irqsave(&pcistub_devices_lock, flags);
459 
460 	while (!list_empty(&seized_devices)) {
461 		psdev = container_of(seized_devices.next,
462 				     struct pcistub_device, dev_list);
463 		list_del(&psdev->dev_list);
464 
465 		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
466 
467 		err = pcistub_init_device(psdev->dev);
468 		if (err) {
469 			dev_err(&psdev->dev->dev,
470 				"error %d initializing device\n", err);
471 			kfree(psdev);
472 			psdev = NULL;
473 		}
474 
475 		spin_lock_irqsave(&pcistub_devices_lock, flags);
476 
477 		if (psdev)
478 			list_add_tail(&psdev->dev_list, &pcistub_devices);
479 	}
480 
481 	initialize_devices = 1;
482 
483 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
484 
485 	return 0;
486 }
487 
488 static void pcistub_device_id_add_list(struct pcistub_device_id *new,
489 				       int domain, int bus, unsigned int devfn)
490 {
491 	struct pcistub_device_id *pci_dev_id;
492 	unsigned long flags;
493 	int found = 0;
494 
495 	spin_lock_irqsave(&device_ids_lock, flags);
496 
497 	list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
498 		if (pci_dev_id->domain == domain && pci_dev_id->bus == bus &&
499 		    pci_dev_id->devfn == devfn) {
500 			found = 1;
501 			break;
502 		}
503 	}
504 
505 	if (!found) {
506 		new->domain = domain;
507 		new->bus = bus;
508 		new->devfn = devfn;
509 		list_add_tail(&new->slot_list, &pcistub_device_ids);
510 	}
511 
512 	spin_unlock_irqrestore(&device_ids_lock, flags);
513 
514 	if (found)
515 		kfree(new);
516 }
517 
518 static int pcistub_seize(struct pci_dev *dev,
519 			 struct pcistub_device_id *pci_dev_id)
520 {
521 	struct pcistub_device *psdev;
522 	unsigned long flags;
523 	int err = 0;
524 
525 	psdev = pcistub_device_alloc(dev);
526 	if (!psdev) {
527 		kfree(pci_dev_id);
528 		return -ENOMEM;
529 	}
530 
531 	spin_lock_irqsave(&pcistub_devices_lock, flags);
532 
533 	if (initialize_devices) {
534 		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
535 
536 		/* don't want irqs disabled when calling pcistub_init_device */
537 		err = pcistub_init_device(psdev->dev);
538 
539 		spin_lock_irqsave(&pcistub_devices_lock, flags);
540 
541 		if (!err)
542 			list_add(&psdev->dev_list, &pcistub_devices);
543 	} else {
544 		dev_dbg(&dev->dev, "deferring initialization\n");
545 		list_add(&psdev->dev_list, &seized_devices);
546 	}
547 
548 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
549 
550 	if (err) {
551 		kfree(pci_dev_id);
552 		pcistub_device_put(psdev);
553 	} else if (pci_dev_id)
554 		pcistub_device_id_add_list(pci_dev_id, pci_domain_nr(dev->bus),
555 					   dev->bus->number, dev->devfn);
556 
557 	return err;
558 }
559 
560 /* Called when 'bind'. This means we must _NOT_ call pci_reset_function or
561  * other functions that take the sysfs lock. */
562 static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id)
563 {
564 	int err = 0, match;
565 	struct pcistub_device_id *pci_dev_id = NULL;
566 
567 	dev_dbg(&dev->dev, "probing...\n");
568 
569 	match = pcistub_match(dev);
570 
571 	if ((dev->driver_override &&
572 	     !strcmp(dev->driver_override, PCISTUB_DRIVER_NAME)) ||
573 	    match) {
574 
575 		if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
576 		    && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
577 			dev_err(&dev->dev, "can't export pci devices that "
578 				"don't have a normal (0) or bridge (1) "
579 				"header type!\n");
580 			err = -ENODEV;
581 			goto out;
582 		}
583 
584 		if (!match) {
585 			pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
586 			if (!pci_dev_id) {
587 				err = -ENOMEM;
588 				goto out;
589 			}
590 		}
591 
592 		dev_info(&dev->dev, "seizing device\n");
593 		err = pcistub_seize(dev, pci_dev_id);
594 	} else
595 		/* Didn't find the device */
596 		err = -ENODEV;
597 
598 out:
599 	return err;
600 }
601 
602 /* Called when 'unbind'. This means we must _NOT_ call pci_reset_function or
603  * other functions that take the sysfs lock. */
604 static void pcistub_remove(struct pci_dev *dev)
605 {
606 	struct pcistub_device *psdev, *found_psdev = NULL;
607 	unsigned long flags;
608 
609 	dev_dbg(&dev->dev, "removing\n");
610 
611 	spin_lock_irqsave(&pcistub_devices_lock, flags);
612 
613 	xen_pcibk_config_quirk_release(dev);
614 
615 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
616 		if (psdev->dev == dev) {
617 			found_psdev = psdev;
618 			break;
619 		}
620 	}
621 
622 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
623 
624 	if (found_psdev) {
625 		dev_dbg(&dev->dev, "found device to remove %s\n",
626 			found_psdev->pdev ? "- in-use" : "");
627 
628 		if (found_psdev->pdev) {
629 			int domid = xen_find_device_domain_owner(dev);
630 
631 			dev_warn(&dev->dev, "****** removing device %s while still in-use by domain %d! ******\n",
632 			       pci_name(found_psdev->dev), domid);
633 			dev_warn(&dev->dev, "****** driver domain may still access this device's i/o resources!\n");
634 			dev_warn(&dev->dev, "****** shutdown driver domain before binding device\n");
635 			dev_warn(&dev->dev, "****** to other drivers or domains\n");
636 
637 			/* N.B. This ends up calling pcistub_put_pci_dev which ends up
638 			 * doing the FLR. */
639 			xen_pcibk_release_pci_dev(found_psdev->pdev,
640 						found_psdev->dev,
641 						false /* caller holds the lock. */);
642 		}
643 
644 		spin_lock_irqsave(&pcistub_devices_lock, flags);
645 		list_del(&found_psdev->dev_list);
646 		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
647 
648 		/* the final put for releasing from the list */
649 		pcistub_device_put(found_psdev);
650 	}
651 }
652 
653 static const struct pci_device_id pcistub_ids[] = {
654 	{
655 	 .vendor = PCI_ANY_ID,
656 	 .device = PCI_ANY_ID,
657 	 .subvendor = PCI_ANY_ID,
658 	 .subdevice = PCI_ANY_ID,
659 	 },
660 	{0,},
661 };
662 
663 #define PCI_NODENAME_MAX 40
664 static void kill_domain_by_device(struct pcistub_device *psdev)
665 {
666 	struct xenbus_transaction xbt;
667 	int err;
668 	char nodename[PCI_NODENAME_MAX];
669 
670 	BUG_ON(!psdev);
671 	snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
672 		psdev->pdev->xdev->otherend_id);
673 
674 again:
675 	err = xenbus_transaction_start(&xbt);
676 	if (err) {
677 		dev_err(&psdev->dev->dev,
678 			"error %d when start xenbus transaction\n", err);
679 		return;
680 	}
681 	/*PV AER handlers will set this flag*/
682 	xenbus_printf(xbt, nodename, "aerState" , "aerfail");
683 	err = xenbus_transaction_end(xbt, 0);
684 	if (err) {
685 		if (err == -EAGAIN)
686 			goto again;
687 		dev_err(&psdev->dev->dev,
688 			"error %d when end xenbus transaction\n", err);
689 		return;
690 	}
691 }
692 
693 /* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
694  * backend need to have cooperation. In xen_pcibk, those steps will do similar
695  * jobs: send service request and waiting for front_end response.
696 */
697 static pci_ers_result_t common_process(struct pcistub_device *psdev,
698 				       pci_channel_state_t state, int aer_cmd,
699 				       pci_ers_result_t result)
700 {
701 	pci_ers_result_t res = result;
702 	struct xen_pcie_aer_op *aer_op;
703 	struct xen_pcibk_device *pdev = psdev->pdev;
704 	struct xen_pci_sharedinfo *sh_info = pdev->sh_info;
705 	int ret;
706 
707 	/*with PV AER drivers*/
708 	aer_op = &(sh_info->aer_op);
709 	aer_op->cmd = aer_cmd ;
710 	/*useful for error_detected callback*/
711 	aer_op->err = state;
712 	/*pcifront_end BDF*/
713 	ret = xen_pcibk_get_pcifront_dev(psdev->dev, psdev->pdev,
714 		&aer_op->domain, &aer_op->bus, &aer_op->devfn);
715 	if (!ret) {
716 		dev_err(&psdev->dev->dev, "failed to get pcifront device\n");
717 		return PCI_ERS_RESULT_NONE;
718 	}
719 	wmb();
720 
721 	dev_dbg(&psdev->dev->dev, "aer_op %x dom %x bus %x devfn %x\n",
722 			aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
723 	/*local flag to mark there's aer request, xen_pcibk callback will use
724 	* this flag to judge whether we need to check pci-front give aer
725 	* service ack signal
726 	*/
727 	set_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
728 
729 	/*It is possible that a pcifront conf_read_write ops request invokes
730 	* the callback which cause the spurious execution of wake_up.
731 	* Yet it is harmless and better than a spinlock here
732 	*/
733 	set_bit(_XEN_PCIB_active,
734 		(unsigned long *)&sh_info->flags);
735 	wmb();
736 	notify_remote_via_irq(pdev->evtchn_irq);
737 
738 	/* Enable IRQ to signal "request done". */
739 	xen_pcibk_lateeoi(pdev, 0);
740 
741 	ret = wait_event_timeout(xen_pcibk_aer_wait_queue,
742 				 !(test_bit(_XEN_PCIB_active, (unsigned long *)
743 				 &sh_info->flags)), 300*HZ);
744 
745 	/* Enable IRQ for pcifront request if not already active. */
746 	if (!test_bit(_PDEVF_op_active, &pdev->flags))
747 		xen_pcibk_lateeoi(pdev, 0);
748 
749 	if (!ret) {
750 		if (test_bit(_XEN_PCIB_active,
751 			(unsigned long *)&sh_info->flags)) {
752 			dev_err(&psdev->dev->dev,
753 				"pcifront aer process not responding!\n");
754 			clear_bit(_XEN_PCIB_active,
755 			  (unsigned long *)&sh_info->flags);
756 			aer_op->err = PCI_ERS_RESULT_NONE;
757 			return res;
758 		}
759 	}
760 	clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
761 
762 	res = (pci_ers_result_t)aer_op->err;
763 	return res;
764 }
765 
766 /*
767 * xen_pcibk_slot_reset: it will send the slot_reset request to  pcifront in case
768 * of the device driver could provide this service, and then wait for pcifront
769 * ack.
770 * @dev: pointer to PCI devices
771 * return value is used by aer_core do_recovery policy
772 */
773 static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev)
774 {
775 	struct pcistub_device *psdev;
776 	pci_ers_result_t result;
777 
778 	result = PCI_ERS_RESULT_RECOVERED;
779 	dev_dbg(&dev->dev, "xen_pcibk_slot_reset(bus:%x,devfn:%x)\n",
780 		dev->bus->number, dev->devfn);
781 
782 	down_write(&pcistub_sem);
783 	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
784 				dev->bus->number,
785 				PCI_SLOT(dev->devfn),
786 				PCI_FUNC(dev->devfn));
787 
788 	if (!psdev || !psdev->pdev) {
789 		dev_err(&dev->dev, "device is not found/assigned\n");
790 		goto end;
791 	}
792 
793 	if (!psdev->pdev->sh_info) {
794 		dev_err(&dev->dev, "device is not connected or owned"
795 			" by HVM, kill it\n");
796 		kill_domain_by_device(psdev);
797 		goto end;
798 	}
799 
800 	if (!test_bit(_XEN_PCIB_AERHANDLER,
801 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
802 		dev_err(&dev->dev,
803 			"guest with no AER driver should have been killed\n");
804 		goto end;
805 	}
806 	result = common_process(psdev, pci_channel_io_normal, XEN_PCI_OP_aer_slotreset, result);
807 
808 	if (result == PCI_ERS_RESULT_NONE ||
809 		result == PCI_ERS_RESULT_DISCONNECT) {
810 		dev_dbg(&dev->dev,
811 			"No AER slot_reset service or disconnected!\n");
812 		kill_domain_by_device(psdev);
813 	}
814 end:
815 	if (psdev)
816 		pcistub_device_put(psdev);
817 	up_write(&pcistub_sem);
818 	return result;
819 
820 }
821 
822 
823 /*xen_pcibk_mmio_enabled: it will send the mmio_enabled request to  pcifront
824 * in case of the device driver could provide this service, and then wait
825 * for pcifront ack
826 * @dev: pointer to PCI devices
827 * return value is used by aer_core do_recovery policy
828 */
829 
830 static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev)
831 {
832 	struct pcistub_device *psdev;
833 	pci_ers_result_t result;
834 
835 	result = PCI_ERS_RESULT_RECOVERED;
836 	dev_dbg(&dev->dev, "xen_pcibk_mmio_enabled(bus:%x,devfn:%x)\n",
837 		dev->bus->number, dev->devfn);
838 
839 	down_write(&pcistub_sem);
840 	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
841 				dev->bus->number,
842 				PCI_SLOT(dev->devfn),
843 				PCI_FUNC(dev->devfn));
844 
845 	if (!psdev || !psdev->pdev) {
846 		dev_err(&dev->dev, "device is not found/assigned\n");
847 		goto end;
848 	}
849 
850 	if (!psdev->pdev->sh_info) {
851 		dev_err(&dev->dev, "device is not connected or owned"
852 			" by HVM, kill it\n");
853 		kill_domain_by_device(psdev);
854 		goto end;
855 	}
856 
857 	if (!test_bit(_XEN_PCIB_AERHANDLER,
858 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
859 		dev_err(&dev->dev,
860 			"guest with no AER driver should have been killed\n");
861 		goto end;
862 	}
863 	result = common_process(psdev, pci_channel_io_normal, XEN_PCI_OP_aer_mmio, result);
864 
865 	if (result == PCI_ERS_RESULT_NONE ||
866 		result == PCI_ERS_RESULT_DISCONNECT) {
867 		dev_dbg(&dev->dev,
868 			"No AER mmio_enabled service or disconnected!\n");
869 		kill_domain_by_device(psdev);
870 	}
871 end:
872 	if (psdev)
873 		pcistub_device_put(psdev);
874 	up_write(&pcistub_sem);
875 	return result;
876 }
877 
878 /*xen_pcibk_error_detected: it will send the error_detected request to  pcifront
879 * in case of the device driver could provide this service, and then wait
880 * for pcifront ack.
881 * @dev: pointer to PCI devices
882 * @error: the current PCI connection state
883 * return value is used by aer_core do_recovery policy
884 */
885 
886 static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev,
887 	pci_channel_state_t error)
888 {
889 	struct pcistub_device *psdev;
890 	pci_ers_result_t result;
891 
892 	result = PCI_ERS_RESULT_CAN_RECOVER;
893 	dev_dbg(&dev->dev, "xen_pcibk_error_detected(bus:%x,devfn:%x)\n",
894 		dev->bus->number, dev->devfn);
895 
896 	down_write(&pcistub_sem);
897 	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
898 				dev->bus->number,
899 				PCI_SLOT(dev->devfn),
900 				PCI_FUNC(dev->devfn));
901 
902 	if (!psdev || !psdev->pdev) {
903 		dev_err(&dev->dev, "device is not found/assigned\n");
904 		goto end;
905 	}
906 
907 	if (!psdev->pdev->sh_info) {
908 		dev_err(&dev->dev, "device is not connected or owned"
909 			" by HVM, kill it\n");
910 		kill_domain_by_device(psdev);
911 		goto end;
912 	}
913 
914 	/*Guest owns the device yet no aer handler regiested, kill guest*/
915 	if (!test_bit(_XEN_PCIB_AERHANDLER,
916 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
917 		dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
918 		kill_domain_by_device(psdev);
919 		goto end;
920 	}
921 	result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result);
922 
923 	if (result == PCI_ERS_RESULT_NONE ||
924 		result == PCI_ERS_RESULT_DISCONNECT) {
925 		dev_dbg(&dev->dev,
926 			"No AER error_detected service or disconnected!\n");
927 		kill_domain_by_device(psdev);
928 	}
929 end:
930 	if (psdev)
931 		pcistub_device_put(psdev);
932 	up_write(&pcistub_sem);
933 	return result;
934 }
935 
936 /*xen_pcibk_error_resume: it will send the error_resume request to  pcifront
937 * in case of the device driver could provide this service, and then wait
938 * for pcifront ack.
939 * @dev: pointer to PCI devices
940 */
941 
942 static void xen_pcibk_error_resume(struct pci_dev *dev)
943 {
944 	struct pcistub_device *psdev;
945 
946 	dev_dbg(&dev->dev, "xen_pcibk_error_resume(bus:%x,devfn:%x)\n",
947 		dev->bus->number, dev->devfn);
948 
949 	down_write(&pcistub_sem);
950 	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
951 				dev->bus->number,
952 				PCI_SLOT(dev->devfn),
953 				PCI_FUNC(dev->devfn));
954 
955 	if (!psdev || !psdev->pdev) {
956 		dev_err(&dev->dev, "device is not found/assigned\n");
957 		goto end;
958 	}
959 
960 	if (!psdev->pdev->sh_info) {
961 		dev_err(&dev->dev, "device is not connected or owned"
962 			" by HVM, kill it\n");
963 		kill_domain_by_device(psdev);
964 		goto end;
965 	}
966 
967 	if (!test_bit(_XEN_PCIB_AERHANDLER,
968 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
969 		dev_err(&dev->dev,
970 			"guest with no AER driver should have been killed\n");
971 		kill_domain_by_device(psdev);
972 		goto end;
973 	}
974 	common_process(psdev, pci_channel_io_normal, XEN_PCI_OP_aer_resume,
975 		       PCI_ERS_RESULT_RECOVERED);
976 end:
977 	if (psdev)
978 		pcistub_device_put(psdev);
979 	up_write(&pcistub_sem);
980 	return;
981 }
982 
983 /*add xen_pcibk AER handling*/
984 static const struct pci_error_handlers xen_pcibk_error_handler = {
985 	.error_detected = xen_pcibk_error_detected,
986 	.mmio_enabled = xen_pcibk_mmio_enabled,
987 	.slot_reset = xen_pcibk_slot_reset,
988 	.resume = xen_pcibk_error_resume,
989 };
990 
991 /*
992  * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
993  * for a normal device. I don't want it to be loaded automatically.
994  */
995 
996 static struct pci_driver xen_pcibk_pci_driver = {
997 	/* The name should be xen_pciback, but until the tools are updated
998 	 * we will keep it as pciback. */
999 	.name = PCISTUB_DRIVER_NAME,
1000 	.id_table = pcistub_ids,
1001 	.probe = pcistub_probe,
1002 	.remove = pcistub_remove,
1003 	.err_handler = &xen_pcibk_error_handler,
1004 };
1005 
1006 static inline int str_to_slot(const char *buf, int *domain, int *bus,
1007 			      int *slot, int *func)
1008 {
1009 	int parsed = 0;
1010 
1011 	switch (sscanf(buf, " %x:%x:%x.%x %n", domain, bus, slot, func,
1012 		       &parsed)) {
1013 	case 3:
1014 		*func = -1;
1015 		sscanf(buf, " %x:%x:%x.* %n", domain, bus, slot, &parsed);
1016 		break;
1017 	case 2:
1018 		*slot = *func = -1;
1019 		sscanf(buf, " %x:%x:*.* %n", domain, bus, &parsed);
1020 		break;
1021 	}
1022 	if (parsed && !buf[parsed])
1023 		return 0;
1024 
1025 	/* try again without domain */
1026 	*domain = 0;
1027 	switch (sscanf(buf, " %x:%x.%x %n", bus, slot, func, &parsed)) {
1028 	case 2:
1029 		*func = -1;
1030 		sscanf(buf, " %x:%x.* %n", bus, slot, &parsed);
1031 		break;
1032 	case 1:
1033 		*slot = *func = -1;
1034 		sscanf(buf, " %x:*.* %n", bus, &parsed);
1035 		break;
1036 	}
1037 	if (parsed && !buf[parsed])
1038 		return 0;
1039 
1040 	return -EINVAL;
1041 }
1042 
1043 static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
1044 			       *slot, int *func, int *reg, int *size, int *mask)
1045 {
1046 	int parsed = 0;
1047 
1048 	sscanf(buf, " %x:%x:%x.%x-%x:%x:%x %n", domain, bus, slot, func,
1049 	       reg, size, mask, &parsed);
1050 	if (parsed && !buf[parsed])
1051 		return 0;
1052 
1053 	/* try again without domain */
1054 	*domain = 0;
1055 	sscanf(buf, " %x:%x.%x-%x:%x:%x %n", bus, slot, func, reg, size,
1056 	       mask, &parsed);
1057 	if (parsed && !buf[parsed])
1058 		return 0;
1059 
1060 	return -EINVAL;
1061 }
1062 
1063 static int pcistub_device_id_add(int domain, int bus, int slot, int func)
1064 {
1065 	struct pcistub_device_id *pci_dev_id;
1066 	int rc = 0, devfn = PCI_DEVFN(slot, func);
1067 
1068 	if (slot < 0) {
1069 		for (slot = 0; !rc && slot < 32; ++slot)
1070 			rc = pcistub_device_id_add(domain, bus, slot, func);
1071 		return rc;
1072 	}
1073 
1074 	if (func < 0) {
1075 		for (func = 0; !rc && func < 8; ++func)
1076 			rc = pcistub_device_id_add(domain, bus, slot, func);
1077 		return rc;
1078 	}
1079 
1080 	if ((
1081 #if !defined(MODULE) /* pci_domains_supported is not being exported */ \
1082     || !defined(CONFIG_PCI_DOMAINS)
1083 	     !pci_domains_supported ? domain :
1084 #endif
1085 	     domain < 0 || domain > 0xffff)
1086 	    || bus < 0 || bus > 0xff
1087 	    || PCI_SLOT(devfn) != slot
1088 	    || PCI_FUNC(devfn) != func)
1089 		return -EINVAL;
1090 
1091 	pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
1092 	if (!pci_dev_id)
1093 		return -ENOMEM;
1094 
1095 	pr_debug("wants to seize %04x:%02x:%02x.%d\n",
1096 		 domain, bus, slot, func);
1097 
1098 	pcistub_device_id_add_list(pci_dev_id, domain, bus, devfn);
1099 
1100 	return 0;
1101 }
1102 
1103 static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
1104 {
1105 	struct pcistub_device_id *pci_dev_id, *t;
1106 	int err = -ENOENT;
1107 	unsigned long flags;
1108 
1109 	spin_lock_irqsave(&device_ids_lock, flags);
1110 	list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids,
1111 				 slot_list) {
1112 		if (pci_dev_id->domain == domain && pci_dev_id->bus == bus
1113 		    && (slot < 0 || PCI_SLOT(pci_dev_id->devfn) == slot)
1114 		    && (func < 0 || PCI_FUNC(pci_dev_id->devfn) == func)) {
1115 			/* Don't break; here because it's possible the same
1116 			 * slot could be in the list more than once
1117 			 */
1118 			list_del(&pci_dev_id->slot_list);
1119 			kfree(pci_dev_id);
1120 
1121 			err = 0;
1122 
1123 			pr_debug("removed %04x:%02x:%02x.%d from seize list\n",
1124 				 domain, bus, slot, func);
1125 		}
1126 	}
1127 	spin_unlock_irqrestore(&device_ids_lock, flags);
1128 
1129 	return err;
1130 }
1131 
1132 static int pcistub_reg_add(int domain, int bus, int slot, int func,
1133 			   unsigned int reg, unsigned int size,
1134 			   unsigned int mask)
1135 {
1136 	int err = 0;
1137 	struct pcistub_device *psdev;
1138 	struct pci_dev *dev;
1139 	struct config_field *field;
1140 
1141 	if (reg > 0xfff || (size < 4 && (mask >> (size * 8))))
1142 		return -EINVAL;
1143 
1144 	psdev = pcistub_device_find(domain, bus, slot, func);
1145 	if (!psdev) {
1146 		err = -ENODEV;
1147 		goto out;
1148 	}
1149 	dev = psdev->dev;
1150 
1151 	field = kzalloc(sizeof(*field), GFP_KERNEL);
1152 	if (!field) {
1153 		err = -ENOMEM;
1154 		goto out;
1155 	}
1156 
1157 	field->offset = reg;
1158 	field->size = size;
1159 	field->mask = mask;
1160 	field->init = NULL;
1161 	field->reset = NULL;
1162 	field->release = NULL;
1163 	field->clean = xen_pcibk_config_field_free;
1164 
1165 	err = xen_pcibk_config_quirks_add_field(dev, field);
1166 	if (err)
1167 		kfree(field);
1168 out:
1169 	if (psdev)
1170 		pcistub_device_put(psdev);
1171 	return err;
1172 }
1173 
1174 static ssize_t new_slot_store(struct device_driver *drv, const char *buf,
1175 			      size_t count)
1176 {
1177 	int domain, bus, slot, func;
1178 	int err;
1179 
1180 	err = str_to_slot(buf, &domain, &bus, &slot, &func);
1181 	if (err)
1182 		goto out;
1183 
1184 	err = pcistub_device_id_add(domain, bus, slot, func);
1185 
1186 out:
1187 	if (!err)
1188 		err = count;
1189 	return err;
1190 }
1191 static DRIVER_ATTR_WO(new_slot);
1192 
1193 static ssize_t remove_slot_store(struct device_driver *drv, const char *buf,
1194 				 size_t count)
1195 {
1196 	int domain, bus, slot, func;
1197 	int err;
1198 
1199 	err = str_to_slot(buf, &domain, &bus, &slot, &func);
1200 	if (err)
1201 		goto out;
1202 
1203 	err = pcistub_device_id_remove(domain, bus, slot, func);
1204 
1205 out:
1206 	if (!err)
1207 		err = count;
1208 	return err;
1209 }
1210 static DRIVER_ATTR_WO(remove_slot);
1211 
1212 static ssize_t slots_show(struct device_driver *drv, char *buf)
1213 {
1214 	struct pcistub_device_id *pci_dev_id;
1215 	size_t count = 0;
1216 	unsigned long flags;
1217 
1218 	spin_lock_irqsave(&device_ids_lock, flags);
1219 	list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
1220 		if (count >= PAGE_SIZE)
1221 			break;
1222 
1223 		count += scnprintf(buf + count, PAGE_SIZE - count,
1224 				   "%04x:%02x:%02x.%d\n",
1225 				   pci_dev_id->domain, pci_dev_id->bus,
1226 				   PCI_SLOT(pci_dev_id->devfn),
1227 				   PCI_FUNC(pci_dev_id->devfn));
1228 	}
1229 	spin_unlock_irqrestore(&device_ids_lock, flags);
1230 
1231 	return count;
1232 }
1233 static DRIVER_ATTR_RO(slots);
1234 
1235 static ssize_t irq_handlers_show(struct device_driver *drv, char *buf)
1236 {
1237 	struct pcistub_device *psdev;
1238 	struct xen_pcibk_dev_data *dev_data;
1239 	size_t count = 0;
1240 	unsigned long flags;
1241 
1242 	spin_lock_irqsave(&pcistub_devices_lock, flags);
1243 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
1244 		if (count >= PAGE_SIZE)
1245 			break;
1246 		if (!psdev->dev)
1247 			continue;
1248 		dev_data = pci_get_drvdata(psdev->dev);
1249 		if (!dev_data)
1250 			continue;
1251 		count +=
1252 		    scnprintf(buf + count, PAGE_SIZE - count,
1253 			      "%s:%s:%sing:%ld\n",
1254 			      pci_name(psdev->dev),
1255 			      dev_data->isr_on ? "on" : "off",
1256 			      dev_data->ack_intr ? "ack" : "not ack",
1257 			      dev_data->handled);
1258 	}
1259 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
1260 	return count;
1261 }
1262 static DRIVER_ATTR_RO(irq_handlers);
1263 
1264 static ssize_t irq_handler_state_store(struct device_driver *drv,
1265 				       const char *buf, size_t count)
1266 {
1267 	struct pcistub_device *psdev;
1268 	struct xen_pcibk_dev_data *dev_data;
1269 	int domain, bus, slot, func;
1270 	int err;
1271 
1272 	err = str_to_slot(buf, &domain, &bus, &slot, &func);
1273 	if (err)
1274 		return err;
1275 
1276 	psdev = pcistub_device_find(domain, bus, slot, func);
1277 	if (!psdev) {
1278 		err = -ENOENT;
1279 		goto out;
1280 	}
1281 
1282 	dev_data = pci_get_drvdata(psdev->dev);
1283 	if (!dev_data) {
1284 		err = -ENOENT;
1285 		goto out;
1286 	}
1287 
1288 	dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n",
1289 		dev_data->irq_name, dev_data->isr_on,
1290 		!dev_data->isr_on);
1291 
1292 	dev_data->isr_on = !(dev_data->isr_on);
1293 	if (dev_data->isr_on)
1294 		dev_data->ack_intr = 1;
1295 out:
1296 	if (psdev)
1297 		pcistub_device_put(psdev);
1298 	if (!err)
1299 		err = count;
1300 	return err;
1301 }
1302 static DRIVER_ATTR_WO(irq_handler_state);
1303 
1304 static ssize_t quirks_store(struct device_driver *drv, const char *buf,
1305 			    size_t count)
1306 {
1307 	int domain, bus, slot, func, reg, size, mask;
1308 	int err;
1309 
1310 	err = str_to_quirk(buf, &domain, &bus, &slot, &func, &reg, &size,
1311 			   &mask);
1312 	if (err)
1313 		goto out;
1314 
1315 	err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
1316 
1317 out:
1318 	if (!err)
1319 		err = count;
1320 	return err;
1321 }
1322 
1323 static ssize_t quirks_show(struct device_driver *drv, char *buf)
1324 {
1325 	int count = 0;
1326 	unsigned long flags;
1327 	struct xen_pcibk_config_quirk *quirk;
1328 	struct xen_pcibk_dev_data *dev_data;
1329 	const struct config_field *field;
1330 	const struct config_field_entry *cfg_entry;
1331 
1332 	spin_lock_irqsave(&device_ids_lock, flags);
1333 	list_for_each_entry(quirk, &xen_pcibk_quirks, quirks_list) {
1334 		if (count >= PAGE_SIZE)
1335 			goto out;
1336 
1337 		count += scnprintf(buf + count, PAGE_SIZE - count,
1338 				   "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
1339 				   quirk->pdev->bus->number,
1340 				   PCI_SLOT(quirk->pdev->devfn),
1341 				   PCI_FUNC(quirk->pdev->devfn),
1342 				   quirk->devid.vendor, quirk->devid.device,
1343 				   quirk->devid.subvendor,
1344 				   quirk->devid.subdevice);
1345 
1346 		dev_data = pci_get_drvdata(quirk->pdev);
1347 
1348 		list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
1349 			field = cfg_entry->field;
1350 			if (count >= PAGE_SIZE)
1351 				goto out;
1352 
1353 			count += scnprintf(buf + count, PAGE_SIZE - count,
1354 					   "\t\t%08x:%01x:%08x\n",
1355 					   cfg_entry->base_offset +
1356 					   field->offset, field->size,
1357 					   field->mask);
1358 		}
1359 	}
1360 
1361 out:
1362 	spin_unlock_irqrestore(&device_ids_lock, flags);
1363 
1364 	return count;
1365 }
1366 static DRIVER_ATTR_RW(quirks);
1367 
1368 static ssize_t permissive_store(struct device_driver *drv, const char *buf,
1369 				size_t count)
1370 {
1371 	int domain, bus, slot, func;
1372 	int err;
1373 	struct pcistub_device *psdev;
1374 	struct xen_pcibk_dev_data *dev_data;
1375 
1376 	err = str_to_slot(buf, &domain, &bus, &slot, &func);
1377 	if (err)
1378 		goto out;
1379 
1380 	psdev = pcistub_device_find(domain, bus, slot, func);
1381 	if (!psdev) {
1382 		err = -ENODEV;
1383 		goto out;
1384 	}
1385 
1386 	dev_data = pci_get_drvdata(psdev->dev);
1387 	/* the driver data for a device should never be null at this point */
1388 	if (!dev_data) {
1389 		err = -ENXIO;
1390 		goto release;
1391 	}
1392 	if (!dev_data->permissive) {
1393 		dev_data->permissive = 1;
1394 		/* Let user know that what they're doing could be unsafe */
1395 		dev_warn(&psdev->dev->dev, "enabling permissive mode "
1396 			 "configuration space accesses!\n");
1397 		dev_warn(&psdev->dev->dev,
1398 			 "permissive mode is potentially unsafe!\n");
1399 	}
1400 release:
1401 	pcistub_device_put(psdev);
1402 out:
1403 	if (!err)
1404 		err = count;
1405 	return err;
1406 }
1407 
1408 static ssize_t permissive_show(struct device_driver *drv, char *buf)
1409 {
1410 	struct pcistub_device *psdev;
1411 	struct xen_pcibk_dev_data *dev_data;
1412 	size_t count = 0;
1413 	unsigned long flags;
1414 	spin_lock_irqsave(&pcistub_devices_lock, flags);
1415 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
1416 		if (count >= PAGE_SIZE)
1417 			break;
1418 		if (!psdev->dev)
1419 			continue;
1420 		dev_data = pci_get_drvdata(psdev->dev);
1421 		if (!dev_data || !dev_data->permissive)
1422 			continue;
1423 		count +=
1424 		    scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
1425 			      pci_name(psdev->dev));
1426 	}
1427 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
1428 	return count;
1429 }
1430 static DRIVER_ATTR_RW(permissive);
1431 
1432 static ssize_t allow_interrupt_control_store(struct device_driver *drv,
1433 					     const char *buf, size_t count)
1434 {
1435 	int domain, bus, slot, func;
1436 	int err;
1437 	struct pcistub_device *psdev;
1438 	struct xen_pcibk_dev_data *dev_data;
1439 
1440 	err = str_to_slot(buf, &domain, &bus, &slot, &func);
1441 	if (err)
1442 		goto out;
1443 
1444 	psdev = pcistub_device_find(domain, bus, slot, func);
1445 	if (!psdev) {
1446 		err = -ENODEV;
1447 		goto out;
1448 	}
1449 
1450 	dev_data = pci_get_drvdata(psdev->dev);
1451 	/* the driver data for a device should never be null at this point */
1452 	if (!dev_data) {
1453 		err = -ENXIO;
1454 		goto release;
1455 	}
1456 	dev_data->allow_interrupt_control = 1;
1457 release:
1458 	pcistub_device_put(psdev);
1459 out:
1460 	if (!err)
1461 		err = count;
1462 	return err;
1463 }
1464 
1465 static ssize_t allow_interrupt_control_show(struct device_driver *drv,
1466 					    char *buf)
1467 {
1468 	struct pcistub_device *psdev;
1469 	struct xen_pcibk_dev_data *dev_data;
1470 	size_t count = 0;
1471 	unsigned long flags;
1472 
1473 	spin_lock_irqsave(&pcistub_devices_lock, flags);
1474 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
1475 		if (count >= PAGE_SIZE)
1476 			break;
1477 		if (!psdev->dev)
1478 			continue;
1479 		dev_data = pci_get_drvdata(psdev->dev);
1480 		if (!dev_data || !dev_data->allow_interrupt_control)
1481 			continue;
1482 		count +=
1483 		    scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
1484 			      pci_name(psdev->dev));
1485 	}
1486 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
1487 	return count;
1488 }
1489 static DRIVER_ATTR_RW(allow_interrupt_control);
1490 
1491 static void pcistub_exit(void)
1492 {
1493 	driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_new_slot);
1494 	driver_remove_file(&xen_pcibk_pci_driver.driver,
1495 			   &driver_attr_remove_slot);
1496 	driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_slots);
1497 	driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_quirks);
1498 	driver_remove_file(&xen_pcibk_pci_driver.driver,
1499 			   &driver_attr_permissive);
1500 	driver_remove_file(&xen_pcibk_pci_driver.driver,
1501 			   &driver_attr_allow_interrupt_control);
1502 	driver_remove_file(&xen_pcibk_pci_driver.driver,
1503 			   &driver_attr_irq_handlers);
1504 	driver_remove_file(&xen_pcibk_pci_driver.driver,
1505 			   &driver_attr_irq_handler_state);
1506 	pci_unregister_driver(&xen_pcibk_pci_driver);
1507 }
1508 
1509 static int __init pcistub_init(void)
1510 {
1511 	int pos = 0;
1512 	int err = 0;
1513 	int domain, bus, slot, func;
1514 	int parsed;
1515 
1516 	if (pci_devs_to_hide && *pci_devs_to_hide) {
1517 		do {
1518 			parsed = 0;
1519 
1520 			err = sscanf(pci_devs_to_hide + pos,
1521 				     " (%x:%x:%x.%x) %n",
1522 				     &domain, &bus, &slot, &func, &parsed);
1523 			switch (err) {
1524 			case 3:
1525 				func = -1;
1526 				sscanf(pci_devs_to_hide + pos,
1527 				       " (%x:%x:%x.*) %n",
1528 				       &domain, &bus, &slot, &parsed);
1529 				break;
1530 			case 2:
1531 				slot = func = -1;
1532 				sscanf(pci_devs_to_hide + pos,
1533 				       " (%x:%x:*.*) %n",
1534 				       &domain, &bus, &parsed);
1535 				break;
1536 			}
1537 
1538 			if (!parsed) {
1539 				domain = 0;
1540 				err = sscanf(pci_devs_to_hide + pos,
1541 					     " (%x:%x.%x) %n",
1542 					     &bus, &slot, &func, &parsed);
1543 				switch (err) {
1544 				case 2:
1545 					func = -1;
1546 					sscanf(pci_devs_to_hide + pos,
1547 					       " (%x:%x.*) %n",
1548 					       &bus, &slot, &parsed);
1549 					break;
1550 				case 1:
1551 					slot = func = -1;
1552 					sscanf(pci_devs_to_hide + pos,
1553 					       " (%x:*.*) %n",
1554 					       &bus, &parsed);
1555 					break;
1556 				}
1557 			}
1558 
1559 			if (parsed <= 0)
1560 				goto parse_error;
1561 
1562 			err = pcistub_device_id_add(domain, bus, slot, func);
1563 			if (err)
1564 				goto out;
1565 
1566 			pos += parsed;
1567 		} while (pci_devs_to_hide[pos]);
1568 	}
1569 
1570 	/* If we're the first PCI Device Driver to register, we're the
1571 	 * first one to get offered PCI devices as they become
1572 	 * available (and thus we can be the first to grab them)
1573 	 */
1574 	err = pci_register_driver(&xen_pcibk_pci_driver);
1575 	if (err < 0)
1576 		goto out;
1577 
1578 	err = driver_create_file(&xen_pcibk_pci_driver.driver,
1579 				 &driver_attr_new_slot);
1580 	if (!err)
1581 		err = driver_create_file(&xen_pcibk_pci_driver.driver,
1582 					 &driver_attr_remove_slot);
1583 	if (!err)
1584 		err = driver_create_file(&xen_pcibk_pci_driver.driver,
1585 					 &driver_attr_slots);
1586 	if (!err)
1587 		err = driver_create_file(&xen_pcibk_pci_driver.driver,
1588 					 &driver_attr_quirks);
1589 	if (!err)
1590 		err = driver_create_file(&xen_pcibk_pci_driver.driver,
1591 					 &driver_attr_permissive);
1592 	if (!err)
1593 		err = driver_create_file(&xen_pcibk_pci_driver.driver,
1594 					 &driver_attr_allow_interrupt_control);
1595 
1596 	if (!err)
1597 		err = driver_create_file(&xen_pcibk_pci_driver.driver,
1598 					 &driver_attr_irq_handlers);
1599 	if (!err)
1600 		err = driver_create_file(&xen_pcibk_pci_driver.driver,
1601 					&driver_attr_irq_handler_state);
1602 	if (err)
1603 		pcistub_exit();
1604 
1605 out:
1606 	return err;
1607 
1608 parse_error:
1609 	pr_err("Error parsing pci_devs_to_hide at \"%s\"\n",
1610 	       pci_devs_to_hide + pos);
1611 	return -EINVAL;
1612 }
1613 
1614 #ifndef MODULE
1615 /*
1616  * fs_initcall happens before device_initcall
1617  * so xen_pcibk *should* get called first (b/c we
1618  * want to suck up any device before other drivers
1619  * get a chance by being the first pci device
1620  * driver to register)
1621  */
1622 fs_initcall(pcistub_init);
1623 #endif
1624 
1625 #ifdef CONFIG_PCI_IOV
1626 static struct pcistub_device *find_vfs(const struct pci_dev *pdev)
1627 {
1628 	struct pcistub_device *psdev = NULL;
1629 	unsigned long flags;
1630 	bool found = false;
1631 
1632 	spin_lock_irqsave(&pcistub_devices_lock, flags);
1633 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
1634 		if (!psdev->pdev && psdev->dev != pdev
1635 		    && pci_physfn(psdev->dev) == pdev) {
1636 			found = true;
1637 			break;
1638 		}
1639 	}
1640 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
1641 	if (found)
1642 		return psdev;
1643 	return NULL;
1644 }
1645 
1646 static int pci_stub_notifier(struct notifier_block *nb,
1647 			     unsigned long action, void *data)
1648 {
1649 	struct device *dev = data;
1650 	const struct pci_dev *pdev = to_pci_dev(dev);
1651 
1652 	if (action != BUS_NOTIFY_UNBIND_DRIVER)
1653 		return NOTIFY_DONE;
1654 
1655 	if (!pdev->is_physfn)
1656 		return NOTIFY_DONE;
1657 
1658 	for (;;) {
1659 		struct pcistub_device *psdev = find_vfs(pdev);
1660 		if (!psdev)
1661 			break;
1662 		device_release_driver(&psdev->dev->dev);
1663 	}
1664 	return NOTIFY_DONE;
1665 }
1666 
1667 static struct notifier_block pci_stub_nb = {
1668 	.notifier_call = pci_stub_notifier,
1669 };
1670 #endif
1671 
1672 static int __init xen_pcibk_init(void)
1673 {
1674 	int err;
1675 
1676 	if (!xen_initial_domain())
1677 		return -ENODEV;
1678 
1679 	err = xen_pcibk_config_init();
1680 	if (err)
1681 		return err;
1682 
1683 #ifdef MODULE
1684 	err = pcistub_init();
1685 	if (err < 0)
1686 		return err;
1687 #endif
1688 
1689 	pcistub_init_devices_late();
1690 	err = xen_pcibk_xenbus_register();
1691 	if (err)
1692 		pcistub_exit();
1693 #ifdef CONFIG_PCI_IOV
1694 	else
1695 		bus_register_notifier(&pci_bus_type, &pci_stub_nb);
1696 #endif
1697 
1698 	return err;
1699 }
1700 
1701 static void __exit xen_pcibk_cleanup(void)
1702 {
1703 #ifdef CONFIG_PCI_IOV
1704 	bus_unregister_notifier(&pci_bus_type, &pci_stub_nb);
1705 #endif
1706 	xen_pcibk_xenbus_unregister();
1707 	pcistub_exit();
1708 }
1709 
1710 module_init(xen_pcibk_init);
1711 module_exit(xen_pcibk_cleanup);
1712 
1713 MODULE_LICENSE("Dual BSD/GPL");
1714 MODULE_ALIAS("xen-backend:pci");
1715