xref: /openbmc/linux/drivers/xen/xen-pciback/pci_stub.c (revision f8a11425075ff11b4b5784f077cb84f3d2dfb3f0)
1 /*
2  * PCI Stub Driver - Grabs devices in backend to be exported later
3  *
4  * Ryan Wilson <hap9@epoch.ncsc.mil>
5  * Chris Bookholt <hap10@epoch.ncsc.mil>
6  */
7 
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 #define dev_fmt pr_fmt
10 
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/rwsem.h>
14 #include <linux/list.h>
15 #include <linux/spinlock.h>
16 #include <linux/kref.h>
17 #include <linux/pci.h>
18 #include <linux/wait.h>
19 #include <linux/sched.h>
20 #include <linux/atomic.h>
21 #include <xen/events.h>
22 #include <asm/xen/pci.h>
23 #include <asm/xen/hypervisor.h>
24 #include <xen/interface/physdev.h>
25 #include "pciback.h"
26 #include "conf_space.h"
27 #include "conf_space_quirks.h"
28 
29 #define PCISTUB_DRIVER_NAME "pciback"
30 
31 static char *pci_devs_to_hide;
32 wait_queue_head_t xen_pcibk_aer_wait_queue;
33 /*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops,
34 * We want to avoid in middle of AER ops, xen_pcibk devices is being removed
35 */
36 static DECLARE_RWSEM(pcistub_sem);
37 module_param_named(hide, pci_devs_to_hide, charp, 0444);
38 
39 struct pcistub_device_id {
40 	struct list_head slot_list;
41 	int domain;
42 	unsigned char bus;
43 	unsigned int devfn;
44 };
45 static LIST_HEAD(pcistub_device_ids);
46 static DEFINE_SPINLOCK(device_ids_lock);
47 
48 struct pcistub_device {
49 	struct kref kref;
50 	struct list_head dev_list;
51 	spinlock_t lock;
52 
53 	struct pci_dev *dev;
54 	struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */
55 };
56 
57 /* Access to pcistub_devices & seized_devices lists and the initialize_devices
58  * flag must be locked with pcistub_devices_lock
59  */
60 static DEFINE_SPINLOCK(pcistub_devices_lock);
61 static LIST_HEAD(pcistub_devices);
62 
63 /* wait for device_initcall before initializing our devices
64  * (see pcistub_init_devices_late)
65  */
66 static int initialize_devices;
67 static LIST_HEAD(seized_devices);
68 
69 static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
70 {
71 	struct pcistub_device *psdev;
72 
73 	dev_dbg(&dev->dev, "pcistub_device_alloc\n");
74 
75 	psdev = kzalloc(sizeof(*psdev), GFP_KERNEL);
76 	if (!psdev)
77 		return NULL;
78 
79 	psdev->dev = pci_dev_get(dev);
80 	if (!psdev->dev) {
81 		kfree(psdev);
82 		return NULL;
83 	}
84 
85 	kref_init(&psdev->kref);
86 	spin_lock_init(&psdev->lock);
87 
88 	return psdev;
89 }
90 
91 /* Don't call this directly as it's called by pcistub_device_put */
92 static void pcistub_device_release(struct kref *kref)
93 {
94 	struct pcistub_device *psdev;
95 	struct pci_dev *dev;
96 	struct xen_pcibk_dev_data *dev_data;
97 
98 	psdev = container_of(kref, struct pcistub_device, kref);
99 	dev = psdev->dev;
100 	dev_data = pci_get_drvdata(dev);
101 
102 	dev_dbg(&dev->dev, "pcistub_device_release\n");
103 
104 	xen_unregister_device_domain_owner(dev);
105 
106 	/* Call the reset function which does not take lock as this
107 	 * is called from "unbind" which takes a device_lock mutex.
108 	 */
109 	__pci_reset_function_locked(dev);
110 	if (dev_data &&
111 	    pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state))
112 		dev_info(&dev->dev, "Could not reload PCI state\n");
113 	else
114 		pci_restore_state(dev);
115 
116 	if (dev->msix_cap) {
117 		struct physdev_pci_device ppdev = {
118 			.seg = pci_domain_nr(dev->bus),
119 			.bus = dev->bus->number,
120 			.devfn = dev->devfn
121 		};
122 		int err = HYPERVISOR_physdev_op(PHYSDEVOP_release_msix,
123 						&ppdev);
124 
125 		if (err && err != -ENOSYS)
126 			dev_warn(&dev->dev, "MSI-X release failed (%d)\n",
127 				 err);
128 	}
129 
130 	/* Disable the device */
131 	xen_pcibk_reset_device(dev);
132 
133 	kfree(dev_data);
134 	pci_set_drvdata(dev, NULL);
135 
136 	/* Clean-up the device */
137 	xen_pcibk_config_free_dyn_fields(dev);
138 	xen_pcibk_config_free_dev(dev);
139 
140 	pci_clear_dev_assigned(dev);
141 	pci_dev_put(dev);
142 
143 	kfree(psdev);
144 }
145 
146 static inline void pcistub_device_get(struct pcistub_device *psdev)
147 {
148 	kref_get(&psdev->kref);
149 }
150 
151 static inline void pcistub_device_put(struct pcistub_device *psdev)
152 {
153 	kref_put(&psdev->kref, pcistub_device_release);
154 }
155 
156 static struct pcistub_device *pcistub_device_find_locked(int domain, int bus,
157 							 int slot, int func)
158 {
159 	struct pcistub_device *psdev;
160 
161 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
162 		if (psdev->dev != NULL
163 		    && domain == pci_domain_nr(psdev->dev->bus)
164 		    && bus == psdev->dev->bus->number
165 		    && slot == PCI_SLOT(psdev->dev->devfn)
166 		    && func == PCI_FUNC(psdev->dev->devfn)) {
167 			return psdev;
168 		}
169 	}
170 
171 	return NULL;
172 }
173 
174 static struct pcistub_device *pcistub_device_find(int domain, int bus,
175 						  int slot, int func)
176 {
177 	struct pcistub_device *psdev;
178 	unsigned long flags;
179 
180 	spin_lock_irqsave(&pcistub_devices_lock, flags);
181 
182 	psdev = pcistub_device_find_locked(domain, bus, slot, func);
183 	if (psdev)
184 		pcistub_device_get(psdev);
185 
186 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
187 	return psdev;
188 }
189 
190 static struct pci_dev *pcistub_device_get_pci_dev(struct xen_pcibk_device *pdev,
191 						  struct pcistub_device *psdev)
192 {
193 	struct pci_dev *pci_dev = NULL;
194 	unsigned long flags;
195 
196 	pcistub_device_get(psdev);
197 
198 	spin_lock_irqsave(&psdev->lock, flags);
199 	if (!psdev->pdev) {
200 		psdev->pdev = pdev;
201 		pci_dev = psdev->dev;
202 	}
203 	spin_unlock_irqrestore(&psdev->lock, flags);
204 
205 	if (!pci_dev)
206 		pcistub_device_put(psdev);
207 
208 	return pci_dev;
209 }
210 
211 struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev,
212 					    int domain, int bus,
213 					    int slot, int func)
214 {
215 	struct pcistub_device *psdev;
216 	struct pci_dev *found_dev = NULL;
217 	unsigned long flags;
218 
219 	spin_lock_irqsave(&pcistub_devices_lock, flags);
220 
221 	psdev = pcistub_device_find_locked(domain, bus, slot, func);
222 	if (psdev)
223 		found_dev = pcistub_device_get_pci_dev(pdev, psdev);
224 
225 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
226 	return found_dev;
227 }
228 
229 struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
230 				    struct pci_dev *dev)
231 {
232 	struct pcistub_device *psdev;
233 	struct pci_dev *found_dev = NULL;
234 	unsigned long flags;
235 
236 	spin_lock_irqsave(&pcistub_devices_lock, flags);
237 
238 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
239 		if (psdev->dev == dev) {
240 			found_dev = pcistub_device_get_pci_dev(pdev, psdev);
241 			break;
242 		}
243 	}
244 
245 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
246 	return found_dev;
247 }
248 
249 /*
250  * Called when:
251  *  - XenBus state has been reconfigure (pci unplug). See xen_pcibk_remove_device
252  *  - XenBus state has been disconnected (guest shutdown). See xen_pcibk_xenbus_remove
253  *  - 'echo BDF > unbind' on pciback module with no guest attached. See pcistub_remove
254  *  - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
255  *
256  *  As such we have to be careful.
257  *
258  *  To make this easier, the caller has to hold the device lock.
259  */
260 void pcistub_put_pci_dev(struct pci_dev *dev)
261 {
262 	struct pcistub_device *psdev, *found_psdev = NULL;
263 	unsigned long flags;
264 	struct xen_pcibk_dev_data *dev_data;
265 	int ret;
266 
267 	spin_lock_irqsave(&pcistub_devices_lock, flags);
268 
269 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
270 		if (psdev->dev == dev) {
271 			found_psdev = psdev;
272 			break;
273 		}
274 	}
275 
276 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
277 	if (WARN_ON(!found_psdev))
278 		return;
279 
280 	/*hold this lock for avoiding breaking link between
281 	* pcistub and xen_pcibk when AER is in processing
282 	*/
283 	down_write(&pcistub_sem);
284 	/* Cleanup our device
285 	 * (so it's ready for the next domain)
286 	 */
287 	device_lock_assert(&dev->dev);
288 	__pci_reset_function_locked(dev);
289 
290 	dev_data = pci_get_drvdata(dev);
291 	ret = pci_load_saved_state(dev, dev_data->pci_saved_state);
292 	if (!ret) {
293 		/*
294 		 * The usual sequence is pci_save_state & pci_restore_state
295 		 * but the guest might have messed the configuration space up.
296 		 * Use the initial version (when device was bound to us).
297 		 */
298 		pci_restore_state(dev);
299 	} else
300 		dev_info(&dev->dev, "Could not reload PCI state\n");
301 	/* This disables the device. */
302 	xen_pcibk_reset_device(dev);
303 
304 	/* And cleanup up our emulated fields. */
305 	xen_pcibk_config_reset_dev(dev);
306 	xen_pcibk_config_free_dyn_fields(dev);
307 
308 	dev_data->allow_interrupt_control = 0;
309 
310 	xen_unregister_device_domain_owner(dev);
311 
312 	spin_lock_irqsave(&found_psdev->lock, flags);
313 	found_psdev->pdev = NULL;
314 	spin_unlock_irqrestore(&found_psdev->lock, flags);
315 
316 	pcistub_device_put(found_psdev);
317 	up_write(&pcistub_sem);
318 }
319 
320 static int pcistub_match_one(struct pci_dev *dev,
321 			     struct pcistub_device_id *pdev_id)
322 {
323 	/* Match the specified device by domain, bus, slot, func and also if
324 	 * any of the device's parent bridges match.
325 	 */
326 	for (; dev != NULL; dev = dev->bus->self) {
327 		if (pci_domain_nr(dev->bus) == pdev_id->domain
328 		    && dev->bus->number == pdev_id->bus
329 		    && dev->devfn == pdev_id->devfn)
330 			return 1;
331 
332 		/* Sometimes topmost bridge links to itself. */
333 		if (dev == dev->bus->self)
334 			break;
335 	}
336 
337 	return 0;
338 }
339 
340 static int pcistub_match(struct pci_dev *dev)
341 {
342 	struct pcistub_device_id *pdev_id;
343 	unsigned long flags;
344 	int found = 0;
345 
346 	spin_lock_irqsave(&device_ids_lock, flags);
347 	list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
348 		if (pcistub_match_one(dev, pdev_id)) {
349 			found = 1;
350 			break;
351 		}
352 	}
353 	spin_unlock_irqrestore(&device_ids_lock, flags);
354 
355 	return found;
356 }
357 
358 static int pcistub_init_device(struct pci_dev *dev)
359 {
360 	struct xen_pcibk_dev_data *dev_data;
361 	int err = 0;
362 
363 	dev_dbg(&dev->dev, "initializing...\n");
364 
365 	/* The PCI backend is not intended to be a module (or to work with
366 	 * removable PCI devices (yet). If it were, xen_pcibk_config_free()
367 	 * would need to be called somewhere to free the memory allocated
368 	 * here and then to call kfree(pci_get_drvdata(psdev->dev)).
369 	 */
370 	dev_data = kzalloc(sizeof(*dev_data) +  strlen(DRV_NAME "[]")
371 				+ strlen(pci_name(dev)) + 1, GFP_KERNEL);
372 	if (!dev_data) {
373 		err = -ENOMEM;
374 		goto out;
375 	}
376 	pci_set_drvdata(dev, dev_data);
377 
378 	/*
379 	 * Setup name for fake IRQ handler. It will only be enabled
380 	 * once the device is turned on by the guest.
381 	 */
382 	sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev));
383 
384 	dev_dbg(&dev->dev, "initializing config\n");
385 
386 	init_waitqueue_head(&xen_pcibk_aer_wait_queue);
387 	err = xen_pcibk_config_init_dev(dev);
388 	if (err)
389 		goto out;
390 
391 	/* HACK: Force device (& ACPI) to determine what IRQ it's on - we
392 	 * must do this here because pcibios_enable_device may specify
393 	 * the pci device's true irq (and possibly its other resources)
394 	 * if they differ from what's in the configuration space.
395 	 * This makes the assumption that the device's resources won't
396 	 * change after this point (otherwise this code may break!)
397 	 */
398 	dev_dbg(&dev->dev, "enabling device\n");
399 	err = pci_enable_device(dev);
400 	if (err)
401 		goto config_release;
402 
403 	if (dev->msix_cap) {
404 		struct physdev_pci_device ppdev = {
405 			.seg = pci_domain_nr(dev->bus),
406 			.bus = dev->bus->number,
407 			.devfn = dev->devfn
408 		};
409 
410 		err = HYPERVISOR_physdev_op(PHYSDEVOP_prepare_msix, &ppdev);
411 		if (err && err != -ENOSYS)
412 			dev_err(&dev->dev, "MSI-X preparation failed (%d)\n",
413 				err);
414 	}
415 
416 	/* We need the device active to save the state. */
417 	dev_dbg(&dev->dev, "save state of device\n");
418 	pci_save_state(dev);
419 	dev_data->pci_saved_state = pci_store_saved_state(dev);
420 	if (!dev_data->pci_saved_state)
421 		dev_err(&dev->dev, "Could not store PCI conf saved state!\n");
422 	else {
423 		dev_dbg(&dev->dev, "resetting (FLR, D3, etc) the device\n");
424 		__pci_reset_function_locked(dev);
425 		pci_restore_state(dev);
426 	}
427 	/* Now disable the device (this also ensures some private device
428 	 * data is setup before we export)
429 	 */
430 	dev_dbg(&dev->dev, "reset device\n");
431 	xen_pcibk_reset_device(dev);
432 
433 	pci_set_dev_assigned(dev);
434 	return 0;
435 
436 config_release:
437 	xen_pcibk_config_free_dev(dev);
438 
439 out:
440 	pci_set_drvdata(dev, NULL);
441 	kfree(dev_data);
442 	return err;
443 }
444 
445 /*
446  * Because some initialization still happens on
447  * devices during fs_initcall, we need to defer
448  * full initialization of our devices until
449  * device_initcall.
450  */
451 static int __init pcistub_init_devices_late(void)
452 {
453 	struct pcistub_device *psdev;
454 	unsigned long flags;
455 	int err = 0;
456 
457 	spin_lock_irqsave(&pcistub_devices_lock, flags);
458 
459 	while (!list_empty(&seized_devices)) {
460 		psdev = container_of(seized_devices.next,
461 				     struct pcistub_device, dev_list);
462 		list_del(&psdev->dev_list);
463 
464 		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
465 
466 		err = pcistub_init_device(psdev->dev);
467 		if (err) {
468 			dev_err(&psdev->dev->dev,
469 				"error %d initializing device\n", err);
470 			kfree(psdev);
471 			psdev = NULL;
472 		}
473 
474 		spin_lock_irqsave(&pcistub_devices_lock, flags);
475 
476 		if (psdev)
477 			list_add_tail(&psdev->dev_list, &pcistub_devices);
478 	}
479 
480 	initialize_devices = 1;
481 
482 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
483 
484 	return 0;
485 }
486 
487 static void pcistub_device_id_add_list(struct pcistub_device_id *new,
488 				       int domain, int bus, unsigned int devfn)
489 {
490 	struct pcistub_device_id *pci_dev_id;
491 	unsigned long flags;
492 	int found = 0;
493 
494 	spin_lock_irqsave(&device_ids_lock, flags);
495 
496 	list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
497 		if (pci_dev_id->domain == domain && pci_dev_id->bus == bus &&
498 		    pci_dev_id->devfn == devfn) {
499 			found = 1;
500 			break;
501 		}
502 	}
503 
504 	if (!found) {
505 		new->domain = domain;
506 		new->bus = bus;
507 		new->devfn = devfn;
508 		list_add_tail(&new->slot_list, &pcistub_device_ids);
509 	}
510 
511 	spin_unlock_irqrestore(&device_ids_lock, flags);
512 
513 	if (found)
514 		kfree(new);
515 }
516 
517 static int pcistub_seize(struct pci_dev *dev,
518 			 struct pcistub_device_id *pci_dev_id)
519 {
520 	struct pcistub_device *psdev;
521 	unsigned long flags;
522 	int err = 0;
523 
524 	psdev = pcistub_device_alloc(dev);
525 	if (!psdev) {
526 		kfree(pci_dev_id);
527 		return -ENOMEM;
528 	}
529 
530 	spin_lock_irqsave(&pcistub_devices_lock, flags);
531 
532 	if (initialize_devices) {
533 		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
534 
535 		/* don't want irqs disabled when calling pcistub_init_device */
536 		err = pcistub_init_device(psdev->dev);
537 
538 		spin_lock_irqsave(&pcistub_devices_lock, flags);
539 
540 		if (!err)
541 			list_add(&psdev->dev_list, &pcistub_devices);
542 	} else {
543 		dev_dbg(&dev->dev, "deferring initialization\n");
544 		list_add(&psdev->dev_list, &seized_devices);
545 	}
546 
547 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
548 
549 	if (err) {
550 		kfree(pci_dev_id);
551 		pcistub_device_put(psdev);
552 	} else if (pci_dev_id)
553 		pcistub_device_id_add_list(pci_dev_id, pci_domain_nr(dev->bus),
554 					   dev->bus->number, dev->devfn);
555 
556 	return err;
557 }
558 
559 /* Called when 'bind'. This means we must _NOT_ call pci_reset_function or
560  * other functions that take the sysfs lock. */
561 static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id)
562 {
563 	int err = 0, match;
564 	struct pcistub_device_id *pci_dev_id = NULL;
565 
566 	dev_dbg(&dev->dev, "probing...\n");
567 
568 	match = pcistub_match(dev);
569 
570 	if ((dev->driver_override &&
571 	     !strcmp(dev->driver_override, PCISTUB_DRIVER_NAME)) ||
572 	    match) {
573 
574 		if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
575 		    && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
576 			dev_err(&dev->dev, "can't export pci devices that "
577 				"don't have a normal (0) or bridge (1) "
578 				"header type!\n");
579 			err = -ENODEV;
580 			goto out;
581 		}
582 
583 		if (!match) {
584 			pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
585 			if (!pci_dev_id) {
586 				err = -ENOMEM;
587 				goto out;
588 			}
589 		}
590 
591 		dev_info(&dev->dev, "seizing device\n");
592 		err = pcistub_seize(dev, pci_dev_id);
593 	} else
594 		/* Didn't find the device */
595 		err = -ENODEV;
596 
597 out:
598 	return err;
599 }
600 
601 /* Called when 'unbind'. This means we must _NOT_ call pci_reset_function or
602  * other functions that take the sysfs lock. */
603 static void pcistub_remove(struct pci_dev *dev)
604 {
605 	struct pcistub_device *psdev, *found_psdev = NULL;
606 	unsigned long flags;
607 
608 	dev_dbg(&dev->dev, "removing\n");
609 
610 	spin_lock_irqsave(&pcistub_devices_lock, flags);
611 
612 	xen_pcibk_config_quirk_release(dev);
613 
614 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
615 		if (psdev->dev == dev) {
616 			found_psdev = psdev;
617 			break;
618 		}
619 	}
620 
621 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
622 
623 	if (found_psdev) {
624 		dev_dbg(&dev->dev, "found device to remove %s\n",
625 			found_psdev->pdev ? "- in-use" : "");
626 
627 		if (found_psdev->pdev) {
628 			int domid = xen_find_device_domain_owner(dev);
629 
630 			dev_warn(&dev->dev, "****** removing device %s while still in-use by domain %d! ******\n",
631 			       pci_name(found_psdev->dev), domid);
632 			dev_warn(&dev->dev, "****** driver domain may still access this device's i/o resources!\n");
633 			dev_warn(&dev->dev, "****** shutdown driver domain before binding device\n");
634 			dev_warn(&dev->dev, "****** to other drivers or domains\n");
635 
636 			/* N.B. This ends up calling pcistub_put_pci_dev which ends up
637 			 * doing the FLR. */
638 			xen_pcibk_release_pci_dev(found_psdev->pdev,
639 						found_psdev->dev,
640 						false /* caller holds the lock. */);
641 		}
642 
643 		spin_lock_irqsave(&pcistub_devices_lock, flags);
644 		list_del(&found_psdev->dev_list);
645 		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
646 
647 		/* the final put for releasing from the list */
648 		pcistub_device_put(found_psdev);
649 	}
650 }
651 
652 static const struct pci_device_id pcistub_ids[] = {
653 	{
654 	 .vendor = PCI_ANY_ID,
655 	 .device = PCI_ANY_ID,
656 	 .subvendor = PCI_ANY_ID,
657 	 .subdevice = PCI_ANY_ID,
658 	 },
659 	{0,},
660 };
661 
662 #define PCI_NODENAME_MAX 40
663 static void kill_domain_by_device(struct pcistub_device *psdev)
664 {
665 	struct xenbus_transaction xbt;
666 	int err;
667 	char nodename[PCI_NODENAME_MAX];
668 
669 	BUG_ON(!psdev);
670 	snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
671 		psdev->pdev->xdev->otherend_id);
672 
673 again:
674 	err = xenbus_transaction_start(&xbt);
675 	if (err) {
676 		dev_err(&psdev->dev->dev,
677 			"error %d when start xenbus transaction\n", err);
678 		return;
679 	}
680 	/*PV AER handlers will set this flag*/
681 	xenbus_printf(xbt, nodename, "aerState" , "aerfail");
682 	err = xenbus_transaction_end(xbt, 0);
683 	if (err) {
684 		if (err == -EAGAIN)
685 			goto again;
686 		dev_err(&psdev->dev->dev,
687 			"error %d when end xenbus transaction\n", err);
688 		return;
689 	}
690 }
691 
692 /* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
693  * backend need to have cooperation. In xen_pcibk, those steps will do similar
694  * jobs: send service request and waiting for front_end response.
695 */
696 static pci_ers_result_t common_process(struct pcistub_device *psdev,
697 				       pci_channel_state_t state, int aer_cmd,
698 				       pci_ers_result_t result)
699 {
700 	pci_ers_result_t res = result;
701 	struct xen_pcie_aer_op *aer_op;
702 	struct xen_pcibk_device *pdev = psdev->pdev;
703 	struct xen_pci_sharedinfo *sh_info = pdev->sh_info;
704 	int ret;
705 
706 	/*with PV AER drivers*/
707 	aer_op = &(sh_info->aer_op);
708 	aer_op->cmd = aer_cmd ;
709 	/*useful for error_detected callback*/
710 	aer_op->err = state;
711 	/*pcifront_end BDF*/
712 	ret = xen_pcibk_get_pcifront_dev(psdev->dev, psdev->pdev,
713 		&aer_op->domain, &aer_op->bus, &aer_op->devfn);
714 	if (!ret) {
715 		dev_err(&psdev->dev->dev, "failed to get pcifront device\n");
716 		return PCI_ERS_RESULT_NONE;
717 	}
718 	wmb();
719 
720 	dev_dbg(&psdev->dev->dev, "aer_op %x dom %x bus %x devfn %x\n",
721 			aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
722 	/*local flag to mark there's aer request, xen_pcibk callback will use
723 	* this flag to judge whether we need to check pci-front give aer
724 	* service ack signal
725 	*/
726 	set_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
727 
728 	/*It is possible that a pcifront conf_read_write ops request invokes
729 	* the callback which cause the spurious execution of wake_up.
730 	* Yet it is harmless and better than a spinlock here
731 	*/
732 	set_bit(_XEN_PCIB_active,
733 		(unsigned long *)&sh_info->flags);
734 	wmb();
735 	notify_remote_via_irq(pdev->evtchn_irq);
736 
737 	/* Enable IRQ to signal "request done". */
738 	xen_pcibk_lateeoi(pdev, 0);
739 
740 	ret = wait_event_timeout(xen_pcibk_aer_wait_queue,
741 				 !(test_bit(_XEN_PCIB_active, (unsigned long *)
742 				 &sh_info->flags)), 300*HZ);
743 
744 	/* Enable IRQ for pcifront request if not already active. */
745 	if (!test_bit(_PDEVF_op_active, &pdev->flags))
746 		xen_pcibk_lateeoi(pdev, 0);
747 
748 	if (!ret) {
749 		if (test_bit(_XEN_PCIB_active,
750 			(unsigned long *)&sh_info->flags)) {
751 			dev_err(&psdev->dev->dev,
752 				"pcifront aer process not responding!\n");
753 			clear_bit(_XEN_PCIB_active,
754 			  (unsigned long *)&sh_info->flags);
755 			aer_op->err = PCI_ERS_RESULT_NONE;
756 			return res;
757 		}
758 	}
759 	clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
760 
761 	res = (pci_ers_result_t)aer_op->err;
762 	return res;
763 }
764 
765 /*
766 * xen_pcibk_slot_reset: it will send the slot_reset request to  pcifront in case
767 * of the device driver could provide this service, and then wait for pcifront
768 * ack.
769 * @dev: pointer to PCI devices
770 * return value is used by aer_core do_recovery policy
771 */
772 static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev)
773 {
774 	struct pcistub_device *psdev;
775 	pci_ers_result_t result;
776 
777 	result = PCI_ERS_RESULT_RECOVERED;
778 	dev_dbg(&dev->dev, "xen_pcibk_slot_reset(bus:%x,devfn:%x)\n",
779 		dev->bus->number, dev->devfn);
780 
781 	down_write(&pcistub_sem);
782 	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
783 				dev->bus->number,
784 				PCI_SLOT(dev->devfn),
785 				PCI_FUNC(dev->devfn));
786 
787 	if (!psdev || !psdev->pdev) {
788 		dev_err(&dev->dev, "device is not found/assigned\n");
789 		goto end;
790 	}
791 
792 	if (!psdev->pdev->sh_info) {
793 		dev_err(&dev->dev, "device is not connected or owned"
794 			" by HVM, kill it\n");
795 		kill_domain_by_device(psdev);
796 		goto end;
797 	}
798 
799 	if (!test_bit(_XEN_PCIB_AERHANDLER,
800 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
801 		dev_err(&dev->dev,
802 			"guest with no AER driver should have been killed\n");
803 		goto end;
804 	}
805 	result = common_process(psdev, pci_channel_io_normal, XEN_PCI_OP_aer_slotreset, result);
806 
807 	if (result == PCI_ERS_RESULT_NONE ||
808 		result == PCI_ERS_RESULT_DISCONNECT) {
809 		dev_dbg(&dev->dev,
810 			"No AER slot_reset service or disconnected!\n");
811 		kill_domain_by_device(psdev);
812 	}
813 end:
814 	if (psdev)
815 		pcistub_device_put(psdev);
816 	up_write(&pcistub_sem);
817 	return result;
818 
819 }
820 
821 
822 /*xen_pcibk_mmio_enabled: it will send the mmio_enabled request to  pcifront
823 * in case of the device driver could provide this service, and then wait
824 * for pcifront ack
825 * @dev: pointer to PCI devices
826 * return value is used by aer_core do_recovery policy
827 */
828 
829 static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev)
830 {
831 	struct pcistub_device *psdev;
832 	pci_ers_result_t result;
833 
834 	result = PCI_ERS_RESULT_RECOVERED;
835 	dev_dbg(&dev->dev, "xen_pcibk_mmio_enabled(bus:%x,devfn:%x)\n",
836 		dev->bus->number, dev->devfn);
837 
838 	down_write(&pcistub_sem);
839 	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
840 				dev->bus->number,
841 				PCI_SLOT(dev->devfn),
842 				PCI_FUNC(dev->devfn));
843 
844 	if (!psdev || !psdev->pdev) {
845 		dev_err(&dev->dev, "device is not found/assigned\n");
846 		goto end;
847 	}
848 
849 	if (!psdev->pdev->sh_info) {
850 		dev_err(&dev->dev, "device is not connected or owned"
851 			" by HVM, kill it\n");
852 		kill_domain_by_device(psdev);
853 		goto end;
854 	}
855 
856 	if (!test_bit(_XEN_PCIB_AERHANDLER,
857 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
858 		dev_err(&dev->dev,
859 			"guest with no AER driver should have been killed\n");
860 		goto end;
861 	}
862 	result = common_process(psdev, pci_channel_io_normal, XEN_PCI_OP_aer_mmio, result);
863 
864 	if (result == PCI_ERS_RESULT_NONE ||
865 		result == PCI_ERS_RESULT_DISCONNECT) {
866 		dev_dbg(&dev->dev,
867 			"No AER mmio_enabled service or disconnected!\n");
868 		kill_domain_by_device(psdev);
869 	}
870 end:
871 	if (psdev)
872 		pcistub_device_put(psdev);
873 	up_write(&pcistub_sem);
874 	return result;
875 }
876 
877 /*xen_pcibk_error_detected: it will send the error_detected request to  pcifront
878 * in case of the device driver could provide this service, and then wait
879 * for pcifront ack.
880 * @dev: pointer to PCI devices
881 * @error: the current PCI connection state
882 * return value is used by aer_core do_recovery policy
883 */
884 
885 static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev,
886 	pci_channel_state_t error)
887 {
888 	struct pcistub_device *psdev;
889 	pci_ers_result_t result;
890 
891 	result = PCI_ERS_RESULT_CAN_RECOVER;
892 	dev_dbg(&dev->dev, "xen_pcibk_error_detected(bus:%x,devfn:%x)\n",
893 		dev->bus->number, dev->devfn);
894 
895 	down_write(&pcistub_sem);
896 	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
897 				dev->bus->number,
898 				PCI_SLOT(dev->devfn),
899 				PCI_FUNC(dev->devfn));
900 
901 	if (!psdev || !psdev->pdev) {
902 		dev_err(&dev->dev, "device is not found/assigned\n");
903 		goto end;
904 	}
905 
906 	if (!psdev->pdev->sh_info) {
907 		dev_err(&dev->dev, "device is not connected or owned"
908 			" by HVM, kill it\n");
909 		kill_domain_by_device(psdev);
910 		goto end;
911 	}
912 
913 	/*Guest owns the device yet no aer handler regiested, kill guest*/
914 	if (!test_bit(_XEN_PCIB_AERHANDLER,
915 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
916 		dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
917 		kill_domain_by_device(psdev);
918 		goto end;
919 	}
920 	result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result);
921 
922 	if (result == PCI_ERS_RESULT_NONE ||
923 		result == PCI_ERS_RESULT_DISCONNECT) {
924 		dev_dbg(&dev->dev,
925 			"No AER error_detected service or disconnected!\n");
926 		kill_domain_by_device(psdev);
927 	}
928 end:
929 	if (psdev)
930 		pcistub_device_put(psdev);
931 	up_write(&pcistub_sem);
932 	return result;
933 }
934 
935 /*xen_pcibk_error_resume: it will send the error_resume request to  pcifront
936 * in case of the device driver could provide this service, and then wait
937 * for pcifront ack.
938 * @dev: pointer to PCI devices
939 */
940 
941 static void xen_pcibk_error_resume(struct pci_dev *dev)
942 {
943 	struct pcistub_device *psdev;
944 
945 	dev_dbg(&dev->dev, "xen_pcibk_error_resume(bus:%x,devfn:%x)\n",
946 		dev->bus->number, dev->devfn);
947 
948 	down_write(&pcistub_sem);
949 	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
950 				dev->bus->number,
951 				PCI_SLOT(dev->devfn),
952 				PCI_FUNC(dev->devfn));
953 
954 	if (!psdev || !psdev->pdev) {
955 		dev_err(&dev->dev, "device is not found/assigned\n");
956 		goto end;
957 	}
958 
959 	if (!psdev->pdev->sh_info) {
960 		dev_err(&dev->dev, "device is not connected or owned"
961 			" by HVM, kill it\n");
962 		kill_domain_by_device(psdev);
963 		goto end;
964 	}
965 
966 	if (!test_bit(_XEN_PCIB_AERHANDLER,
967 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
968 		dev_err(&dev->dev,
969 			"guest with no AER driver should have been killed\n");
970 		kill_domain_by_device(psdev);
971 		goto end;
972 	}
973 	common_process(psdev, pci_channel_io_normal, XEN_PCI_OP_aer_resume,
974 		       PCI_ERS_RESULT_RECOVERED);
975 end:
976 	if (psdev)
977 		pcistub_device_put(psdev);
978 	up_write(&pcistub_sem);
979 	return;
980 }
981 
982 /*add xen_pcibk AER handling*/
983 static const struct pci_error_handlers xen_pcibk_error_handler = {
984 	.error_detected = xen_pcibk_error_detected,
985 	.mmio_enabled = xen_pcibk_mmio_enabled,
986 	.slot_reset = xen_pcibk_slot_reset,
987 	.resume = xen_pcibk_error_resume,
988 };
989 
990 /*
991  * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
992  * for a normal device. I don't want it to be loaded automatically.
993  */
994 
995 static struct pci_driver xen_pcibk_pci_driver = {
996 	/* The name should be xen_pciback, but until the tools are updated
997 	 * we will keep it as pciback. */
998 	.name = PCISTUB_DRIVER_NAME,
999 	.id_table = pcistub_ids,
1000 	.probe = pcistub_probe,
1001 	.remove = pcistub_remove,
1002 	.err_handler = &xen_pcibk_error_handler,
1003 };
1004 
1005 static inline int str_to_slot(const char *buf, int *domain, int *bus,
1006 			      int *slot, int *func)
1007 {
1008 	int parsed = 0;
1009 
1010 	switch (sscanf(buf, " %x:%x:%x.%x %n", domain, bus, slot, func,
1011 		       &parsed)) {
1012 	case 3:
1013 		*func = -1;
1014 		sscanf(buf, " %x:%x:%x.* %n", domain, bus, slot, &parsed);
1015 		break;
1016 	case 2:
1017 		*slot = *func = -1;
1018 		sscanf(buf, " %x:%x:*.* %n", domain, bus, &parsed);
1019 		break;
1020 	}
1021 	if (parsed && !buf[parsed])
1022 		return 0;
1023 
1024 	/* try again without domain */
1025 	*domain = 0;
1026 	switch (sscanf(buf, " %x:%x.%x %n", bus, slot, func, &parsed)) {
1027 	case 2:
1028 		*func = -1;
1029 		sscanf(buf, " %x:%x.* %n", bus, slot, &parsed);
1030 		break;
1031 	case 1:
1032 		*slot = *func = -1;
1033 		sscanf(buf, " %x:*.* %n", bus, &parsed);
1034 		break;
1035 	}
1036 	if (parsed && !buf[parsed])
1037 		return 0;
1038 
1039 	return -EINVAL;
1040 }
1041 
1042 static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
1043 			       *slot, int *func, int *reg, int *size, int *mask)
1044 {
1045 	int parsed = 0;
1046 
1047 	sscanf(buf, " %x:%x:%x.%x-%x:%x:%x %n", domain, bus, slot, func,
1048 	       reg, size, mask, &parsed);
1049 	if (parsed && !buf[parsed])
1050 		return 0;
1051 
1052 	/* try again without domain */
1053 	*domain = 0;
1054 	sscanf(buf, " %x:%x.%x-%x:%x:%x %n", bus, slot, func, reg, size,
1055 	       mask, &parsed);
1056 	if (parsed && !buf[parsed])
1057 		return 0;
1058 
1059 	return -EINVAL;
1060 }
1061 
1062 static int pcistub_device_id_add(int domain, int bus, int slot, int func)
1063 {
1064 	struct pcistub_device_id *pci_dev_id;
1065 	int rc = 0, devfn = PCI_DEVFN(slot, func);
1066 
1067 	if (slot < 0) {
1068 		for (slot = 0; !rc && slot < 32; ++slot)
1069 			rc = pcistub_device_id_add(domain, bus, slot, func);
1070 		return rc;
1071 	}
1072 
1073 	if (func < 0) {
1074 		for (func = 0; !rc && func < 8; ++func)
1075 			rc = pcistub_device_id_add(domain, bus, slot, func);
1076 		return rc;
1077 	}
1078 
1079 	if ((
1080 #if !defined(MODULE) /* pci_domains_supported is not being exported */ \
1081     || !defined(CONFIG_PCI_DOMAINS)
1082 	     !pci_domains_supported ? domain :
1083 #endif
1084 	     domain < 0 || domain > 0xffff)
1085 	    || bus < 0 || bus > 0xff
1086 	    || PCI_SLOT(devfn) != slot
1087 	    || PCI_FUNC(devfn) != func)
1088 		return -EINVAL;
1089 
1090 	pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
1091 	if (!pci_dev_id)
1092 		return -ENOMEM;
1093 
1094 	pr_debug("wants to seize %04x:%02x:%02x.%d\n",
1095 		 domain, bus, slot, func);
1096 
1097 	pcistub_device_id_add_list(pci_dev_id, domain, bus, devfn);
1098 
1099 	return 0;
1100 }
1101 
1102 static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
1103 {
1104 	struct pcistub_device_id *pci_dev_id, *t;
1105 	int err = -ENOENT;
1106 	unsigned long flags;
1107 
1108 	spin_lock_irqsave(&device_ids_lock, flags);
1109 	list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids,
1110 				 slot_list) {
1111 		if (pci_dev_id->domain == domain && pci_dev_id->bus == bus
1112 		    && (slot < 0 || PCI_SLOT(pci_dev_id->devfn) == slot)
1113 		    && (func < 0 || PCI_FUNC(pci_dev_id->devfn) == func)) {
1114 			/* Don't break; here because it's possible the same
1115 			 * slot could be in the list more than once
1116 			 */
1117 			list_del(&pci_dev_id->slot_list);
1118 			kfree(pci_dev_id);
1119 
1120 			err = 0;
1121 
1122 			pr_debug("removed %04x:%02x:%02x.%d from seize list\n",
1123 				 domain, bus, slot, func);
1124 		}
1125 	}
1126 	spin_unlock_irqrestore(&device_ids_lock, flags);
1127 
1128 	return err;
1129 }
1130 
1131 static int pcistub_reg_add(int domain, int bus, int slot, int func,
1132 			   unsigned int reg, unsigned int size,
1133 			   unsigned int mask)
1134 {
1135 	int err = 0;
1136 	struct pcistub_device *psdev;
1137 	struct pci_dev *dev;
1138 	struct config_field *field;
1139 
1140 	if (reg > 0xfff || (size < 4 && (mask >> (size * 8))))
1141 		return -EINVAL;
1142 
1143 	psdev = pcistub_device_find(domain, bus, slot, func);
1144 	if (!psdev) {
1145 		err = -ENODEV;
1146 		goto out;
1147 	}
1148 	dev = psdev->dev;
1149 
1150 	field = kzalloc(sizeof(*field), GFP_KERNEL);
1151 	if (!field) {
1152 		err = -ENOMEM;
1153 		goto out;
1154 	}
1155 
1156 	field->offset = reg;
1157 	field->size = size;
1158 	field->mask = mask;
1159 	field->init = NULL;
1160 	field->reset = NULL;
1161 	field->release = NULL;
1162 	field->clean = xen_pcibk_config_field_free;
1163 
1164 	err = xen_pcibk_config_quirks_add_field(dev, field);
1165 	if (err)
1166 		kfree(field);
1167 out:
1168 	if (psdev)
1169 		pcistub_device_put(psdev);
1170 	return err;
1171 }
1172 
1173 static ssize_t new_slot_store(struct device_driver *drv, const char *buf,
1174 			      size_t count)
1175 {
1176 	int domain, bus, slot, func;
1177 	int err;
1178 
1179 	err = str_to_slot(buf, &domain, &bus, &slot, &func);
1180 	if (err)
1181 		goto out;
1182 
1183 	err = pcistub_device_id_add(domain, bus, slot, func);
1184 
1185 out:
1186 	if (!err)
1187 		err = count;
1188 	return err;
1189 }
1190 static DRIVER_ATTR_WO(new_slot);
1191 
1192 static ssize_t remove_slot_store(struct device_driver *drv, const char *buf,
1193 				 size_t count)
1194 {
1195 	int domain, bus, slot, func;
1196 	int err;
1197 
1198 	err = str_to_slot(buf, &domain, &bus, &slot, &func);
1199 	if (err)
1200 		goto out;
1201 
1202 	err = pcistub_device_id_remove(domain, bus, slot, func);
1203 
1204 out:
1205 	if (!err)
1206 		err = count;
1207 	return err;
1208 }
1209 static DRIVER_ATTR_WO(remove_slot);
1210 
1211 static ssize_t slots_show(struct device_driver *drv, char *buf)
1212 {
1213 	struct pcistub_device_id *pci_dev_id;
1214 	size_t count = 0;
1215 	unsigned long flags;
1216 
1217 	spin_lock_irqsave(&device_ids_lock, flags);
1218 	list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
1219 		if (count >= PAGE_SIZE)
1220 			break;
1221 
1222 		count += scnprintf(buf + count, PAGE_SIZE - count,
1223 				   "%04x:%02x:%02x.%d\n",
1224 				   pci_dev_id->domain, pci_dev_id->bus,
1225 				   PCI_SLOT(pci_dev_id->devfn),
1226 				   PCI_FUNC(pci_dev_id->devfn));
1227 	}
1228 	spin_unlock_irqrestore(&device_ids_lock, flags);
1229 
1230 	return count;
1231 }
1232 static DRIVER_ATTR_RO(slots);
1233 
1234 static ssize_t irq_handlers_show(struct device_driver *drv, char *buf)
1235 {
1236 	struct pcistub_device *psdev;
1237 	struct xen_pcibk_dev_data *dev_data;
1238 	size_t count = 0;
1239 	unsigned long flags;
1240 
1241 	spin_lock_irqsave(&pcistub_devices_lock, flags);
1242 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
1243 		if (count >= PAGE_SIZE)
1244 			break;
1245 		if (!psdev->dev)
1246 			continue;
1247 		dev_data = pci_get_drvdata(psdev->dev);
1248 		if (!dev_data)
1249 			continue;
1250 		count +=
1251 		    scnprintf(buf + count, PAGE_SIZE - count,
1252 			      "%s:%s:%sing:%ld\n",
1253 			      pci_name(psdev->dev),
1254 			      dev_data->isr_on ? "on" : "off",
1255 			      dev_data->ack_intr ? "ack" : "not ack",
1256 			      dev_data->handled);
1257 	}
1258 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
1259 	return count;
1260 }
1261 static DRIVER_ATTR_RO(irq_handlers);
1262 
1263 static ssize_t irq_handler_state_store(struct device_driver *drv,
1264 				       const char *buf, size_t count)
1265 {
1266 	struct pcistub_device *psdev;
1267 	struct xen_pcibk_dev_data *dev_data;
1268 	int domain, bus, slot, func;
1269 	int err;
1270 
1271 	err = str_to_slot(buf, &domain, &bus, &slot, &func);
1272 	if (err)
1273 		return err;
1274 
1275 	psdev = pcistub_device_find(domain, bus, slot, func);
1276 	if (!psdev) {
1277 		err = -ENOENT;
1278 		goto out;
1279 	}
1280 
1281 	dev_data = pci_get_drvdata(psdev->dev);
1282 	if (!dev_data) {
1283 		err = -ENOENT;
1284 		goto out;
1285 	}
1286 
1287 	dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n",
1288 		dev_data->irq_name, dev_data->isr_on,
1289 		!dev_data->isr_on);
1290 
1291 	dev_data->isr_on = !(dev_data->isr_on);
1292 	if (dev_data->isr_on)
1293 		dev_data->ack_intr = 1;
1294 out:
1295 	if (psdev)
1296 		pcistub_device_put(psdev);
1297 	if (!err)
1298 		err = count;
1299 	return err;
1300 }
1301 static DRIVER_ATTR_WO(irq_handler_state);
1302 
1303 static ssize_t quirks_store(struct device_driver *drv, const char *buf,
1304 			    size_t count)
1305 {
1306 	int domain, bus, slot, func, reg, size, mask;
1307 	int err;
1308 
1309 	err = str_to_quirk(buf, &domain, &bus, &slot, &func, &reg, &size,
1310 			   &mask);
1311 	if (err)
1312 		goto out;
1313 
1314 	err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
1315 
1316 out:
1317 	if (!err)
1318 		err = count;
1319 	return err;
1320 }
1321 
1322 static ssize_t quirks_show(struct device_driver *drv, char *buf)
1323 {
1324 	int count = 0;
1325 	unsigned long flags;
1326 	struct xen_pcibk_config_quirk *quirk;
1327 	struct xen_pcibk_dev_data *dev_data;
1328 	const struct config_field *field;
1329 	const struct config_field_entry *cfg_entry;
1330 
1331 	spin_lock_irqsave(&device_ids_lock, flags);
1332 	list_for_each_entry(quirk, &xen_pcibk_quirks, quirks_list) {
1333 		if (count >= PAGE_SIZE)
1334 			goto out;
1335 
1336 		count += scnprintf(buf + count, PAGE_SIZE - count,
1337 				   "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
1338 				   quirk->pdev->bus->number,
1339 				   PCI_SLOT(quirk->pdev->devfn),
1340 				   PCI_FUNC(quirk->pdev->devfn),
1341 				   quirk->devid.vendor, quirk->devid.device,
1342 				   quirk->devid.subvendor,
1343 				   quirk->devid.subdevice);
1344 
1345 		dev_data = pci_get_drvdata(quirk->pdev);
1346 
1347 		list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
1348 			field = cfg_entry->field;
1349 			if (count >= PAGE_SIZE)
1350 				goto out;
1351 
1352 			count += scnprintf(buf + count, PAGE_SIZE - count,
1353 					   "\t\t%08x:%01x:%08x\n",
1354 					   cfg_entry->base_offset +
1355 					   field->offset, field->size,
1356 					   field->mask);
1357 		}
1358 	}
1359 
1360 out:
1361 	spin_unlock_irqrestore(&device_ids_lock, flags);
1362 
1363 	return count;
1364 }
1365 static DRIVER_ATTR_RW(quirks);
1366 
1367 static ssize_t permissive_store(struct device_driver *drv, const char *buf,
1368 				size_t count)
1369 {
1370 	int domain, bus, slot, func;
1371 	int err;
1372 	struct pcistub_device *psdev;
1373 	struct xen_pcibk_dev_data *dev_data;
1374 
1375 	err = str_to_slot(buf, &domain, &bus, &slot, &func);
1376 	if (err)
1377 		goto out;
1378 
1379 	psdev = pcistub_device_find(domain, bus, slot, func);
1380 	if (!psdev) {
1381 		err = -ENODEV;
1382 		goto out;
1383 	}
1384 
1385 	dev_data = pci_get_drvdata(psdev->dev);
1386 	/* the driver data for a device should never be null at this point */
1387 	if (!dev_data) {
1388 		err = -ENXIO;
1389 		goto release;
1390 	}
1391 	if (!dev_data->permissive) {
1392 		dev_data->permissive = 1;
1393 		/* Let user know that what they're doing could be unsafe */
1394 		dev_warn(&psdev->dev->dev, "enabling permissive mode "
1395 			 "configuration space accesses!\n");
1396 		dev_warn(&psdev->dev->dev,
1397 			 "permissive mode is potentially unsafe!\n");
1398 	}
1399 release:
1400 	pcistub_device_put(psdev);
1401 out:
1402 	if (!err)
1403 		err = count;
1404 	return err;
1405 }
1406 
1407 static ssize_t permissive_show(struct device_driver *drv, char *buf)
1408 {
1409 	struct pcistub_device *psdev;
1410 	struct xen_pcibk_dev_data *dev_data;
1411 	size_t count = 0;
1412 	unsigned long flags;
1413 	spin_lock_irqsave(&pcistub_devices_lock, flags);
1414 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
1415 		if (count >= PAGE_SIZE)
1416 			break;
1417 		if (!psdev->dev)
1418 			continue;
1419 		dev_data = pci_get_drvdata(psdev->dev);
1420 		if (!dev_data || !dev_data->permissive)
1421 			continue;
1422 		count +=
1423 		    scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
1424 			      pci_name(psdev->dev));
1425 	}
1426 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
1427 	return count;
1428 }
1429 static DRIVER_ATTR_RW(permissive);
1430 
1431 static ssize_t allow_interrupt_control_store(struct device_driver *drv,
1432 					     const char *buf, size_t count)
1433 {
1434 	int domain, bus, slot, func;
1435 	int err;
1436 	struct pcistub_device *psdev;
1437 	struct xen_pcibk_dev_data *dev_data;
1438 
1439 	err = str_to_slot(buf, &domain, &bus, &slot, &func);
1440 	if (err)
1441 		goto out;
1442 
1443 	psdev = pcistub_device_find(domain, bus, slot, func);
1444 	if (!psdev) {
1445 		err = -ENODEV;
1446 		goto out;
1447 	}
1448 
1449 	dev_data = pci_get_drvdata(psdev->dev);
1450 	/* the driver data for a device should never be null at this point */
1451 	if (!dev_data) {
1452 		err = -ENXIO;
1453 		goto release;
1454 	}
1455 	dev_data->allow_interrupt_control = 1;
1456 release:
1457 	pcistub_device_put(psdev);
1458 out:
1459 	if (!err)
1460 		err = count;
1461 	return err;
1462 }
1463 
1464 static ssize_t allow_interrupt_control_show(struct device_driver *drv,
1465 					    char *buf)
1466 {
1467 	struct pcistub_device *psdev;
1468 	struct xen_pcibk_dev_data *dev_data;
1469 	size_t count = 0;
1470 	unsigned long flags;
1471 
1472 	spin_lock_irqsave(&pcistub_devices_lock, flags);
1473 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
1474 		if (count >= PAGE_SIZE)
1475 			break;
1476 		if (!psdev->dev)
1477 			continue;
1478 		dev_data = pci_get_drvdata(psdev->dev);
1479 		if (!dev_data || !dev_data->allow_interrupt_control)
1480 			continue;
1481 		count +=
1482 		    scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
1483 			      pci_name(psdev->dev));
1484 	}
1485 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
1486 	return count;
1487 }
1488 static DRIVER_ATTR_RW(allow_interrupt_control);
1489 
1490 static void pcistub_exit(void)
1491 {
1492 	driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_new_slot);
1493 	driver_remove_file(&xen_pcibk_pci_driver.driver,
1494 			   &driver_attr_remove_slot);
1495 	driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_slots);
1496 	driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_quirks);
1497 	driver_remove_file(&xen_pcibk_pci_driver.driver,
1498 			   &driver_attr_permissive);
1499 	driver_remove_file(&xen_pcibk_pci_driver.driver,
1500 			   &driver_attr_allow_interrupt_control);
1501 	driver_remove_file(&xen_pcibk_pci_driver.driver,
1502 			   &driver_attr_irq_handlers);
1503 	driver_remove_file(&xen_pcibk_pci_driver.driver,
1504 			   &driver_attr_irq_handler_state);
1505 	pci_unregister_driver(&xen_pcibk_pci_driver);
1506 }
1507 
1508 static int __init pcistub_init(void)
1509 {
1510 	int pos = 0;
1511 	int err = 0;
1512 	int domain, bus, slot, func;
1513 	int parsed;
1514 
1515 	if (pci_devs_to_hide && *pci_devs_to_hide) {
1516 		do {
1517 			parsed = 0;
1518 
1519 			err = sscanf(pci_devs_to_hide + pos,
1520 				     " (%x:%x:%x.%x) %n",
1521 				     &domain, &bus, &slot, &func, &parsed);
1522 			switch (err) {
1523 			case 3:
1524 				func = -1;
1525 				sscanf(pci_devs_to_hide + pos,
1526 				       " (%x:%x:%x.*) %n",
1527 				       &domain, &bus, &slot, &parsed);
1528 				break;
1529 			case 2:
1530 				slot = func = -1;
1531 				sscanf(pci_devs_to_hide + pos,
1532 				       " (%x:%x:*.*) %n",
1533 				       &domain, &bus, &parsed);
1534 				break;
1535 			}
1536 
1537 			if (!parsed) {
1538 				domain = 0;
1539 				err = sscanf(pci_devs_to_hide + pos,
1540 					     " (%x:%x.%x) %n",
1541 					     &bus, &slot, &func, &parsed);
1542 				switch (err) {
1543 				case 2:
1544 					func = -1;
1545 					sscanf(pci_devs_to_hide + pos,
1546 					       " (%x:%x.*) %n",
1547 					       &bus, &slot, &parsed);
1548 					break;
1549 				case 1:
1550 					slot = func = -1;
1551 					sscanf(pci_devs_to_hide + pos,
1552 					       " (%x:*.*) %n",
1553 					       &bus, &parsed);
1554 					break;
1555 				}
1556 			}
1557 
1558 			if (parsed <= 0)
1559 				goto parse_error;
1560 
1561 			err = pcistub_device_id_add(domain, bus, slot, func);
1562 			if (err)
1563 				goto out;
1564 
1565 			pos += parsed;
1566 		} while (pci_devs_to_hide[pos]);
1567 	}
1568 
1569 	/* If we're the first PCI Device Driver to register, we're the
1570 	 * first one to get offered PCI devices as they become
1571 	 * available (and thus we can be the first to grab them)
1572 	 */
1573 	err = pci_register_driver(&xen_pcibk_pci_driver);
1574 	if (err < 0)
1575 		goto out;
1576 
1577 	err = driver_create_file(&xen_pcibk_pci_driver.driver,
1578 				 &driver_attr_new_slot);
1579 	if (!err)
1580 		err = driver_create_file(&xen_pcibk_pci_driver.driver,
1581 					 &driver_attr_remove_slot);
1582 	if (!err)
1583 		err = driver_create_file(&xen_pcibk_pci_driver.driver,
1584 					 &driver_attr_slots);
1585 	if (!err)
1586 		err = driver_create_file(&xen_pcibk_pci_driver.driver,
1587 					 &driver_attr_quirks);
1588 	if (!err)
1589 		err = driver_create_file(&xen_pcibk_pci_driver.driver,
1590 					 &driver_attr_permissive);
1591 	if (!err)
1592 		err = driver_create_file(&xen_pcibk_pci_driver.driver,
1593 					 &driver_attr_allow_interrupt_control);
1594 
1595 	if (!err)
1596 		err = driver_create_file(&xen_pcibk_pci_driver.driver,
1597 					 &driver_attr_irq_handlers);
1598 	if (!err)
1599 		err = driver_create_file(&xen_pcibk_pci_driver.driver,
1600 					&driver_attr_irq_handler_state);
1601 	if (err)
1602 		pcistub_exit();
1603 
1604 out:
1605 	return err;
1606 
1607 parse_error:
1608 	pr_err("Error parsing pci_devs_to_hide at \"%s\"\n",
1609 	       pci_devs_to_hide + pos);
1610 	return -EINVAL;
1611 }
1612 
1613 #ifndef MODULE
1614 /*
1615  * fs_initcall happens before device_initcall
1616  * so xen_pcibk *should* get called first (b/c we
1617  * want to suck up any device before other drivers
1618  * get a chance by being the first pci device
1619  * driver to register)
1620  */
1621 fs_initcall(pcistub_init);
1622 #endif
1623 
1624 #ifdef CONFIG_PCI_IOV
1625 static struct pcistub_device *find_vfs(const struct pci_dev *pdev)
1626 {
1627 	struct pcistub_device *psdev = NULL;
1628 	unsigned long flags;
1629 	bool found = false;
1630 
1631 	spin_lock_irqsave(&pcistub_devices_lock, flags);
1632 	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
1633 		if (!psdev->pdev && psdev->dev != pdev
1634 		    && pci_physfn(psdev->dev) == pdev) {
1635 			found = true;
1636 			break;
1637 		}
1638 	}
1639 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
1640 	if (found)
1641 		return psdev;
1642 	return NULL;
1643 }
1644 
1645 static int pci_stub_notifier(struct notifier_block *nb,
1646 			     unsigned long action, void *data)
1647 {
1648 	struct device *dev = data;
1649 	const struct pci_dev *pdev = to_pci_dev(dev);
1650 
1651 	if (action != BUS_NOTIFY_UNBIND_DRIVER)
1652 		return NOTIFY_DONE;
1653 
1654 	if (!pdev->is_physfn)
1655 		return NOTIFY_DONE;
1656 
1657 	for (;;) {
1658 		struct pcistub_device *psdev = find_vfs(pdev);
1659 		if (!psdev)
1660 			break;
1661 		device_release_driver(&psdev->dev->dev);
1662 	}
1663 	return NOTIFY_DONE;
1664 }
1665 
1666 static struct notifier_block pci_stub_nb = {
1667 	.notifier_call = pci_stub_notifier,
1668 };
1669 #endif
1670 
1671 static int __init xen_pcibk_init(void)
1672 {
1673 	int err;
1674 
1675 	if (!xen_initial_domain())
1676 		return -ENODEV;
1677 
1678 	err = xen_pcibk_config_init();
1679 	if (err)
1680 		return err;
1681 
1682 #ifdef MODULE
1683 	err = pcistub_init();
1684 	if (err < 0)
1685 		return err;
1686 #endif
1687 
1688 	pcistub_init_devices_late();
1689 	err = xen_pcibk_xenbus_register();
1690 	if (err)
1691 		pcistub_exit();
1692 #ifdef CONFIG_PCI_IOV
1693 	else
1694 		bus_register_notifier(&pci_bus_type, &pci_stub_nb);
1695 #endif
1696 
1697 	return err;
1698 }
1699 
1700 static void __exit xen_pcibk_cleanup(void)
1701 {
1702 #ifdef CONFIG_PCI_IOV
1703 	bus_unregister_notifier(&pci_bus_type, &pci_stub_nb);
1704 #endif
1705 	xen_pcibk_xenbus_unregister();
1706 	pcistub_exit();
1707 }
1708 
1709 module_init(xen_pcibk_init);
1710 module_exit(xen_pcibk_cleanup);
1711 
1712 MODULE_LICENSE("Dual BSD/GPL");
1713 MODULE_ALIAS("xen-backend:pci");
1714