xref: /openbmc/linux/drivers/pci/hotplug/pnv_php.c (revision 99fee508)
1 /*
2  * PCI Hotplug Driver for PowerPC PowerNV platform.
3  *
4  * Copyright Gavin Shan, IBM Corporation 2016.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11 
12 #include <linux/libfdt.h>
13 #include <linux/module.h>
14 #include <linux/pci.h>
15 #include <linux/pci_hotplug.h>
16 
17 #include <asm/opal.h>
18 #include <asm/pnv-pci.h>
19 #include <asm/ppc-pci.h>
20 
21 #define DRIVER_VERSION	"0.1"
22 #define DRIVER_AUTHOR	"Gavin Shan, IBM Corporation"
23 #define DRIVER_DESC	"PowerPC PowerNV PCI Hotplug Driver"
24 
25 struct pnv_php_event {
26 	bool			added;
27 	struct pnv_php_slot	*php_slot;
28 	struct work_struct	work;
29 };
30 
31 static LIST_HEAD(pnv_php_slot_list);
32 static DEFINE_SPINLOCK(pnv_php_lock);
33 
34 static void pnv_php_register(struct device_node *dn);
35 static void pnv_php_unregister_one(struct device_node *dn);
36 static void pnv_php_unregister(struct device_node *dn);
37 
38 static void pnv_php_disable_irq(struct pnv_php_slot *php_slot,
39 				bool disable_device)
40 {
41 	struct pci_dev *pdev = php_slot->pdev;
42 	int irq = php_slot->irq;
43 	u16 ctrl;
44 
45 	if (php_slot->irq > 0) {
46 		pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &ctrl);
47 		ctrl &= ~(PCI_EXP_SLTCTL_HPIE |
48 			  PCI_EXP_SLTCTL_PDCE |
49 			  PCI_EXP_SLTCTL_DLLSCE);
50 		pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, ctrl);
51 
52 		free_irq(php_slot->irq, php_slot);
53 		php_slot->irq = 0;
54 	}
55 
56 	if (php_slot->wq) {
57 		destroy_workqueue(php_slot->wq);
58 		php_slot->wq = NULL;
59 	}
60 
61 	if (disable_device || irq > 0) {
62 		if (pdev->msix_enabled)
63 			pci_disable_msix(pdev);
64 		else if (pdev->msi_enabled)
65 			pci_disable_msi(pdev);
66 
67 		pci_disable_device(pdev);
68 	}
69 }
70 
71 static void pnv_php_free_slot(struct kref *kref)
72 {
73 	struct pnv_php_slot *php_slot = container_of(kref,
74 					struct pnv_php_slot, kref);
75 
76 	WARN_ON(!list_empty(&php_slot->children));
77 	pnv_php_disable_irq(php_slot, false);
78 	kfree(php_slot->name);
79 	kfree(php_slot);
80 }
81 
82 static inline void pnv_php_put_slot(struct pnv_php_slot *php_slot)
83 {
84 
85 	if (!php_slot)
86 		return;
87 
88 	kref_put(&php_slot->kref, pnv_php_free_slot);
89 }
90 
91 static struct pnv_php_slot *pnv_php_match(struct device_node *dn,
92 					  struct pnv_php_slot *php_slot)
93 {
94 	struct pnv_php_slot *target, *tmp;
95 
96 	if (php_slot->dn == dn) {
97 		kref_get(&php_slot->kref);
98 		return php_slot;
99 	}
100 
101 	list_for_each_entry(tmp, &php_slot->children, link) {
102 		target = pnv_php_match(dn, tmp);
103 		if (target)
104 			return target;
105 	}
106 
107 	return NULL;
108 }
109 
110 struct pnv_php_slot *pnv_php_find_slot(struct device_node *dn)
111 {
112 	struct pnv_php_slot *php_slot, *tmp;
113 	unsigned long flags;
114 
115 	spin_lock_irqsave(&pnv_php_lock, flags);
116 	list_for_each_entry(tmp, &pnv_php_slot_list, link) {
117 		php_slot = pnv_php_match(dn, tmp);
118 		if (php_slot) {
119 			spin_unlock_irqrestore(&pnv_php_lock, flags);
120 			return php_slot;
121 		}
122 	}
123 	spin_unlock_irqrestore(&pnv_php_lock, flags);
124 
125 	return NULL;
126 }
127 EXPORT_SYMBOL_GPL(pnv_php_find_slot);
128 
129 /*
130  * Remove pdn for all children of the indicated device node.
131  * The function should remove pdn in a depth-first manner.
132  */
133 static void pnv_php_rmv_pdns(struct device_node *dn)
134 {
135 	struct device_node *child;
136 
137 	for_each_child_of_node(dn, child) {
138 		pnv_php_rmv_pdns(child);
139 
140 		pci_remove_device_node_info(child);
141 	}
142 }
143 
144 /*
145  * Detach all child nodes of the indicated device nodes. The
146  * function should handle device nodes in depth-first manner.
147  *
148  * We should not invoke of_node_release() as the memory for
149  * individual device node is part of large memory block. The
150  * large block is allocated from memblock (system bootup) or
151  * kmalloc() when unflattening the device tree by OF changeset.
152  * We can not free the large block allocated from memblock. For
153  * later case, it should be released at once.
154  */
155 static void pnv_php_detach_device_nodes(struct device_node *parent)
156 {
157 	struct device_node *dn;
158 	int refcount;
159 
160 	for_each_child_of_node(parent, dn) {
161 		pnv_php_detach_device_nodes(dn);
162 
163 		of_node_put(dn);
164 		refcount = kref_read(&dn->kobj.kref);
165 		if (refcount != 1)
166 			pr_warn("Invalid refcount %d on <%pOF>\n",
167 				refcount, dn);
168 
169 		of_detach_node(dn);
170 	}
171 }
172 
173 static void pnv_php_rmv_devtree(struct pnv_php_slot *php_slot)
174 {
175 	pnv_php_rmv_pdns(php_slot->dn);
176 
177 	/*
178 	 * Decrease the refcount if the device nodes were created
179 	 * through OF changeset before detaching them.
180 	 */
181 	if (php_slot->fdt)
182 		of_changeset_destroy(&php_slot->ocs);
183 	pnv_php_detach_device_nodes(php_slot->dn);
184 
185 	if (php_slot->fdt) {
186 		kfree(php_slot->dt);
187 		kfree(php_slot->fdt);
188 		php_slot->dt        = NULL;
189 		php_slot->dn->child = NULL;
190 		php_slot->fdt       = NULL;
191 	}
192 }
193 
194 /*
195  * As the nodes in OF changeset are applied in reverse order, we
196  * need revert the nodes in advance so that we have correct node
197  * order after the changeset is applied.
198  */
199 static void pnv_php_reverse_nodes(struct device_node *parent)
200 {
201 	struct device_node *child, *next;
202 
203 	/* In-depth first */
204 	for_each_child_of_node(parent, child)
205 		pnv_php_reverse_nodes(child);
206 
207 	/* Reverse the nodes in the child list */
208 	child = parent->child;
209 	parent->child = NULL;
210 	while (child) {
211 		next = child->sibling;
212 
213 		child->sibling = parent->child;
214 		parent->child = child;
215 		child = next;
216 	}
217 }
218 
219 static int pnv_php_populate_changeset(struct of_changeset *ocs,
220 				      struct device_node *dn)
221 {
222 	struct device_node *child;
223 	int ret = 0;
224 
225 	for_each_child_of_node(dn, child) {
226 		ret = of_changeset_attach_node(ocs, child);
227 		if (ret)
228 			break;
229 
230 		ret = pnv_php_populate_changeset(ocs, child);
231 		if (ret)
232 			break;
233 	}
234 
235 	return ret;
236 }
237 
238 static void *pnv_php_add_one_pdn(struct device_node *dn, void *data)
239 {
240 	struct pci_controller *hose = (struct pci_controller *)data;
241 	struct pci_dn *pdn;
242 
243 	pdn = pci_add_device_node_info(hose, dn);
244 	if (!pdn)
245 		return ERR_PTR(-ENOMEM);
246 
247 	return NULL;
248 }
249 
250 static void pnv_php_add_pdns(struct pnv_php_slot *slot)
251 {
252 	struct pci_controller *hose = pci_bus_to_host(slot->bus);
253 
254 	pci_traverse_device_nodes(slot->dn, pnv_php_add_one_pdn, hose);
255 }
256 
257 static int pnv_php_add_devtree(struct pnv_php_slot *php_slot)
258 {
259 	void *fdt, *fdt1, *dt;
260 	int ret;
261 
262 	/* We don't know the FDT blob size. We try to get it through
263 	 * maximal memory chunk and then copy it to another chunk that
264 	 * fits the real size.
265 	 */
266 	fdt1 = kzalloc(0x10000, GFP_KERNEL);
267 	if (!fdt1) {
268 		ret = -ENOMEM;
269 		dev_warn(&php_slot->pdev->dev, "Cannot alloc FDT blob\n");
270 		goto out;
271 	}
272 
273 	ret = pnv_pci_get_device_tree(php_slot->dn->phandle, fdt1, 0x10000);
274 	if (ret) {
275 		dev_warn(&php_slot->pdev->dev, "Error %d getting FDT blob\n",
276 			 ret);
277 		goto free_fdt1;
278 	}
279 
280 	fdt = kzalloc(fdt_totalsize(fdt1), GFP_KERNEL);
281 	if (!fdt) {
282 		ret = -ENOMEM;
283 		dev_warn(&php_slot->pdev->dev, "Cannot %d bytes memory\n",
284 			 fdt_totalsize(fdt1));
285 		goto free_fdt1;
286 	}
287 
288 	/* Unflatten device tree blob */
289 	memcpy(fdt, fdt1, fdt_totalsize(fdt1));
290 	dt = of_fdt_unflatten_tree(fdt, php_slot->dn, NULL);
291 	if (!dt) {
292 		ret = -EINVAL;
293 		dev_warn(&php_slot->pdev->dev, "Cannot unflatten FDT\n");
294 		goto free_fdt;
295 	}
296 
297 	/* Initialize and apply the changeset */
298 	of_changeset_init(&php_slot->ocs);
299 	pnv_php_reverse_nodes(php_slot->dn);
300 	ret = pnv_php_populate_changeset(&php_slot->ocs, php_slot->dn);
301 	if (ret) {
302 		pnv_php_reverse_nodes(php_slot->dn);
303 		dev_warn(&php_slot->pdev->dev, "Error %d populating changeset\n",
304 			 ret);
305 		goto free_dt;
306 	}
307 
308 	php_slot->dn->child = NULL;
309 	ret = of_changeset_apply(&php_slot->ocs);
310 	if (ret) {
311 		dev_warn(&php_slot->pdev->dev, "Error %d applying changeset\n",
312 			 ret);
313 		goto destroy_changeset;
314 	}
315 
316 	/* Add device node firmware data */
317 	pnv_php_add_pdns(php_slot);
318 	php_slot->fdt = fdt;
319 	php_slot->dt  = dt;
320 	kfree(fdt1);
321 	goto out;
322 
323 destroy_changeset:
324 	of_changeset_destroy(&php_slot->ocs);
325 free_dt:
326 	kfree(dt);
327 	php_slot->dn->child = NULL;
328 free_fdt:
329 	kfree(fdt);
330 free_fdt1:
331 	kfree(fdt1);
332 out:
333 	return ret;
334 }
335 
336 int pnv_php_set_slot_power_state(struct hotplug_slot *slot,
337 				 uint8_t state)
338 {
339 	struct pnv_php_slot *php_slot = slot->private;
340 	struct opal_msg msg;
341 	int ret;
342 
343 	ret = pnv_pci_set_power_state(php_slot->id, state, &msg);
344 	if (ret > 0) {
345 		if (be64_to_cpu(msg.params[1]) != php_slot->dn->phandle	||
346 		    be64_to_cpu(msg.params[2]) != state			||
347 		    be64_to_cpu(msg.params[3]) != OPAL_SUCCESS) {
348 			dev_warn(&php_slot->pdev->dev, "Wrong msg (%lld, %lld, %lld)\n",
349 				 be64_to_cpu(msg.params[1]),
350 				 be64_to_cpu(msg.params[2]),
351 				 be64_to_cpu(msg.params[3]));
352 			return -ENOMSG;
353 		}
354 	} else if (ret < 0) {
355 		dev_warn(&php_slot->pdev->dev, "Error %d powering %s\n",
356 			 ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off");
357 		return ret;
358 	}
359 
360 	if (state == OPAL_PCI_SLOT_POWER_OFF || state == OPAL_PCI_SLOT_OFFLINE)
361 		pnv_php_rmv_devtree(php_slot);
362 	else
363 		ret = pnv_php_add_devtree(php_slot);
364 
365 	return ret;
366 }
367 EXPORT_SYMBOL_GPL(pnv_php_set_slot_power_state);
368 
369 static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state)
370 {
371 	struct pnv_php_slot *php_slot = slot->private;
372 	uint8_t power_state = OPAL_PCI_SLOT_POWER_ON;
373 	int ret;
374 
375 	/*
376 	 * Retrieve power status from firmware. If we fail
377 	 * getting that, the power status fails back to
378 	 * be on.
379 	 */
380 	ret = pnv_pci_get_power_state(php_slot->id, &power_state);
381 	if (ret) {
382 		dev_warn(&php_slot->pdev->dev, "Error %d getting power status\n",
383 			 ret);
384 	} else {
385 		*state = power_state;
386 		slot->info->power_status = power_state;
387 	}
388 
389 	return 0;
390 }
391 
392 static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state)
393 {
394 	struct pnv_php_slot *php_slot = slot->private;
395 	uint8_t presence = OPAL_PCI_SLOT_EMPTY;
396 	int ret;
397 
398 	/*
399 	 * Retrieve presence status from firmware. If we can't
400 	 * get that, it will fail back to be empty.
401 	 */
402 	ret = pnv_pci_get_presence_state(php_slot->id, &presence);
403 	if (ret >= 0) {
404 		*state = presence;
405 		slot->info->adapter_status = presence;
406 		ret = 0;
407 	} else {
408 		dev_warn(&php_slot->pdev->dev, "Error %d getting presence\n",
409 			 ret);
410 	}
411 
412 	return ret;
413 }
414 
415 static int pnv_php_set_attention_state(struct hotplug_slot *slot, u8 state)
416 {
417 	/* FIXME: Make it real once firmware supports it */
418 	slot->info->attention_status = state;
419 
420 	return 0;
421 }
422 
423 static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan)
424 {
425 	struct hotplug_slot *slot = &php_slot->slot;
426 	uint8_t presence = OPAL_PCI_SLOT_EMPTY;
427 	uint8_t power_status = OPAL_PCI_SLOT_POWER_ON;
428 	int ret;
429 
430 	/* Check if the slot has been configured */
431 	if (php_slot->state != PNV_PHP_STATE_REGISTERED)
432 		return 0;
433 
434 	/* Retrieve slot presence status */
435 	ret = pnv_php_get_adapter_state(slot, &presence);
436 	if (ret)
437 		return ret;
438 
439 	/*
440 	 * Proceed if there have nothing behind the slot. However,
441 	 * we should leave the slot in registered state at the
442 	 * beginning. Otherwise, the PCI devices inserted afterwards
443 	 * won't be probed and populated.
444 	 */
445 	if (presence == OPAL_PCI_SLOT_EMPTY) {
446 		if (!php_slot->power_state_check) {
447 			php_slot->power_state_check = true;
448 
449 			return 0;
450 		}
451 
452 		goto scan;
453 	}
454 
455 	/*
456 	 * If the power supply to the slot is off, we can't detect
457 	 * adapter presence state. That means we have to turn the
458 	 * slot on before going to probe slot's presence state.
459 	 *
460 	 * On the first time, we don't change the power status to
461 	 * boost system boot with assumption that the firmware
462 	 * supplies consistent slot power status: empty slot always
463 	 * has its power off and non-empty slot has its power on.
464 	 */
465 	if (!php_slot->power_state_check) {
466 		php_slot->power_state_check = true;
467 
468 		ret = pnv_php_get_power_state(slot, &power_status);
469 		if (ret)
470 			return ret;
471 
472 		if (power_status != OPAL_PCI_SLOT_POWER_ON)
473 			return 0;
474 	}
475 
476 	/* Check the power status. Scan the slot if it is already on */
477 	ret = pnv_php_get_power_state(slot, &power_status);
478 	if (ret)
479 		return ret;
480 
481 	if (power_status == OPAL_PCI_SLOT_POWER_ON)
482 		goto scan;
483 
484 	/* Power is off, turn it on and then scan the slot */
485 	ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_ON);
486 	if (ret)
487 		return ret;
488 
489 scan:
490 	if (presence == OPAL_PCI_SLOT_PRESENT) {
491 		if (rescan) {
492 			pci_lock_rescan_remove();
493 			pci_hp_add_devices(php_slot->bus);
494 			pci_unlock_rescan_remove();
495 		}
496 
497 		/* Rescan for child hotpluggable slots */
498 		php_slot->state = PNV_PHP_STATE_POPULATED;
499 		if (rescan)
500 			pnv_php_register(php_slot->dn);
501 	} else {
502 		php_slot->state = PNV_PHP_STATE_POPULATED;
503 	}
504 
505 	return 0;
506 }
507 
508 static int pnv_php_enable_slot(struct hotplug_slot *slot)
509 {
510 	struct pnv_php_slot *php_slot = container_of(slot,
511 						     struct pnv_php_slot, slot);
512 
513 	return pnv_php_enable(php_slot, true);
514 }
515 
516 static int pnv_php_disable_slot(struct hotplug_slot *slot)
517 {
518 	struct pnv_php_slot *php_slot = slot->private;
519 	int ret;
520 
521 	if (php_slot->state != PNV_PHP_STATE_POPULATED)
522 		return 0;
523 
524 	/* Remove all devices behind the slot */
525 	pci_lock_rescan_remove();
526 	pci_hp_remove_devices(php_slot->bus);
527 	pci_unlock_rescan_remove();
528 
529 	/* Detach the child hotpluggable slots */
530 	pnv_php_unregister(php_slot->dn);
531 
532 	/* Notify firmware and remove device nodes */
533 	ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_OFF);
534 
535 	php_slot->state = PNV_PHP_STATE_REGISTERED;
536 	return ret;
537 }
538 
539 static struct hotplug_slot_ops php_slot_ops = {
540 	.get_power_status	= pnv_php_get_power_state,
541 	.get_adapter_status	= pnv_php_get_adapter_state,
542 	.set_attention_status	= pnv_php_set_attention_state,
543 	.enable_slot		= pnv_php_enable_slot,
544 	.disable_slot		= pnv_php_disable_slot,
545 };
546 
547 static void pnv_php_release(struct hotplug_slot *slot)
548 {
549 	struct pnv_php_slot *php_slot = slot->private;
550 	unsigned long flags;
551 
552 	/* Remove from global or child list */
553 	spin_lock_irqsave(&pnv_php_lock, flags);
554 	list_del(&php_slot->link);
555 	spin_unlock_irqrestore(&pnv_php_lock, flags);
556 
557 	/* Detach from parent */
558 	pnv_php_put_slot(php_slot);
559 	pnv_php_put_slot(php_slot->parent);
560 }
561 
562 static struct pnv_php_slot *pnv_php_alloc_slot(struct device_node *dn)
563 {
564 	struct pnv_php_slot *php_slot;
565 	struct pci_bus *bus;
566 	const char *label;
567 	uint64_t id;
568 	int ret;
569 
570 	ret = of_property_read_string(dn, "ibm,slot-label", &label);
571 	if (ret)
572 		return NULL;
573 
574 	if (pnv_pci_get_slot_id(dn, &id))
575 		return NULL;
576 
577 	bus = pci_find_bus_by_node(dn);
578 	if (!bus)
579 		return NULL;
580 
581 	php_slot = kzalloc(sizeof(*php_slot), GFP_KERNEL);
582 	if (!php_slot)
583 		return NULL;
584 
585 	php_slot->name = kstrdup(label, GFP_KERNEL);
586 	if (!php_slot->name) {
587 		kfree(php_slot);
588 		return NULL;
589 	}
590 
591 	if (dn->child && PCI_DN(dn->child))
592 		php_slot->slot_no = PCI_SLOT(PCI_DN(dn->child)->devfn);
593 	else
594 		php_slot->slot_no = -1;   /* Placeholder slot */
595 
596 	kref_init(&php_slot->kref);
597 	php_slot->state	                = PNV_PHP_STATE_INITIALIZED;
598 	php_slot->dn	                = dn;
599 	php_slot->pdev	                = bus->self;
600 	php_slot->bus	                = bus;
601 	php_slot->id	                = id;
602 	php_slot->power_state_check     = false;
603 	php_slot->slot.ops              = &php_slot_ops;
604 	php_slot->slot.info             = &php_slot->slot_info;
605 	php_slot->slot.release          = pnv_php_release;
606 	php_slot->slot.private          = php_slot;
607 
608 	INIT_LIST_HEAD(&php_slot->children);
609 	INIT_LIST_HEAD(&php_slot->link);
610 
611 	return php_slot;
612 }
613 
614 static int pnv_php_register_slot(struct pnv_php_slot *php_slot)
615 {
616 	struct pnv_php_slot *parent;
617 	struct device_node *dn = php_slot->dn;
618 	unsigned long flags;
619 	int ret;
620 
621 	/* Check if the slot is registered or not */
622 	parent = pnv_php_find_slot(php_slot->dn);
623 	if (parent) {
624 		pnv_php_put_slot(parent);
625 		return -EEXIST;
626 	}
627 
628 	/* Register PCI slot */
629 	ret = pci_hp_register(&php_slot->slot, php_slot->bus,
630 			      php_slot->slot_no, php_slot->name);
631 	if (ret) {
632 		dev_warn(&php_slot->pdev->dev, "Error %d registering slot\n",
633 			 ret);
634 		return ret;
635 	}
636 
637 	/* Attach to the parent's child list or global list */
638 	while ((dn = of_get_parent(dn))) {
639 		if (!PCI_DN(dn)) {
640 			of_node_put(dn);
641 			break;
642 		}
643 
644 		parent = pnv_php_find_slot(dn);
645 		if (parent) {
646 			of_node_put(dn);
647 			break;
648 		}
649 
650 		of_node_put(dn);
651 	}
652 
653 	spin_lock_irqsave(&pnv_php_lock, flags);
654 	php_slot->parent = parent;
655 	if (parent)
656 		list_add_tail(&php_slot->link, &parent->children);
657 	else
658 		list_add_tail(&php_slot->link, &pnv_php_slot_list);
659 	spin_unlock_irqrestore(&pnv_php_lock, flags);
660 
661 	php_slot->state = PNV_PHP_STATE_REGISTERED;
662 	return 0;
663 }
664 
665 static int pnv_php_enable_msix(struct pnv_php_slot *php_slot)
666 {
667 	struct pci_dev *pdev = php_slot->pdev;
668 	struct msix_entry entry;
669 	int nr_entries, ret;
670 	u16 pcie_flag;
671 
672 	/* Get total number of MSIx entries */
673 	nr_entries = pci_msix_vec_count(pdev);
674 	if (nr_entries < 0)
675 		return nr_entries;
676 
677 	/* Check hotplug MSIx entry is in range */
678 	pcie_capability_read_word(pdev, PCI_EXP_FLAGS, &pcie_flag);
679 	entry.entry = (pcie_flag & PCI_EXP_FLAGS_IRQ) >> 9;
680 	if (entry.entry >= nr_entries)
681 		return -ERANGE;
682 
683 	/* Enable MSIx */
684 	ret = pci_enable_msix_exact(pdev, &entry, 1);
685 	if (ret) {
686 		dev_warn(&pdev->dev, "Error %d enabling MSIx\n", ret);
687 		return ret;
688 	}
689 
690 	return entry.vector;
691 }
692 
693 static void pnv_php_event_handler(struct work_struct *work)
694 {
695 	struct pnv_php_event *event =
696 		container_of(work, struct pnv_php_event, work);
697 	struct pnv_php_slot *php_slot = event->php_slot;
698 
699 	if (event->added)
700 		pnv_php_enable_slot(&php_slot->slot);
701 	else
702 		pnv_php_disable_slot(&php_slot->slot);
703 
704 	kfree(event);
705 }
706 
707 static irqreturn_t pnv_php_interrupt(int irq, void *data)
708 {
709 	struct pnv_php_slot *php_slot = data;
710 	struct pci_dev *pchild, *pdev = php_slot->pdev;
711 	struct eeh_dev *edev;
712 	struct eeh_pe *pe;
713 	struct pnv_php_event *event;
714 	u16 sts, lsts;
715 	u8 presence;
716 	bool added;
717 	unsigned long flags;
718 	int ret;
719 
720 	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &sts);
721 	sts &= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
722 	pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, sts);
723 	if (sts & PCI_EXP_SLTSTA_DLLSC) {
724 		pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lsts);
725 		added = !!(lsts & PCI_EXP_LNKSTA_DLLLA);
726 	} else if (!(php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) &&
727 		   (sts & PCI_EXP_SLTSTA_PDC)) {
728 		ret = pnv_pci_get_presence_state(php_slot->id, &presence);
729 		if (ret) {
730 			dev_warn(&pdev->dev, "PCI slot [%s] error %d getting presence (0x%04x), to retry the operation.\n",
731 				 php_slot->name, ret, sts);
732 			return IRQ_HANDLED;
733 		}
734 
735 		added = !!(presence == OPAL_PCI_SLOT_PRESENT);
736 	} else {
737 		return IRQ_NONE;
738 	}
739 
740 	/* Freeze the removed PE to avoid unexpected error reporting */
741 	if (!added) {
742 		pchild = list_first_entry_or_null(&php_slot->bus->devices,
743 						  struct pci_dev, bus_list);
744 		edev = pchild ? pci_dev_to_eeh_dev(pchild) : NULL;
745 		pe = edev ? edev->pe : NULL;
746 		if (pe) {
747 			eeh_serialize_lock(&flags);
748 			eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
749 			eeh_serialize_unlock(flags);
750 			eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE);
751 		}
752 	}
753 
754 	/*
755 	 * The PE is left in frozen state if the event is missed. It's
756 	 * fine as the PCI devices (PE) aren't functional any more.
757 	 */
758 	event = kzalloc(sizeof(*event), GFP_ATOMIC);
759 	if (!event) {
760 		dev_warn(&pdev->dev, "PCI slot [%s] missed hotplug event 0x%04x\n",
761 			 php_slot->name, sts);
762 		return IRQ_HANDLED;
763 	}
764 
765 	dev_info(&pdev->dev, "PCI slot [%s] %s (IRQ: %d)\n",
766 		 php_slot->name, added ? "added" : "removed", irq);
767 	INIT_WORK(&event->work, pnv_php_event_handler);
768 	event->added = added;
769 	event->php_slot = php_slot;
770 	queue_work(php_slot->wq, &event->work);
771 
772 	return IRQ_HANDLED;
773 }
774 
775 static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq)
776 {
777 	struct pci_dev *pdev = php_slot->pdev;
778 	u32 broken_pdc = 0;
779 	u16 sts, ctrl;
780 	int ret;
781 
782 	/* Allocate workqueue */
783 	php_slot->wq = alloc_workqueue("pciehp-%s", 0, 0, php_slot->name);
784 	if (!php_slot->wq) {
785 		dev_warn(&pdev->dev, "Cannot alloc workqueue\n");
786 		pnv_php_disable_irq(php_slot, true);
787 		return;
788 	}
789 
790 	/* Check PDC (Presence Detection Change) is broken or not */
791 	ret = of_property_read_u32(php_slot->dn, "ibm,slot-broken-pdc",
792 				   &broken_pdc);
793 	if (!ret && broken_pdc)
794 		php_slot->flags |= PNV_PHP_FLAG_BROKEN_PDC;
795 
796 	/* Clear pending interrupts */
797 	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &sts);
798 	if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC)
799 		sts |= PCI_EXP_SLTSTA_DLLSC;
800 	else
801 		sts |= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
802 	pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, sts);
803 
804 	/* Request the interrupt */
805 	ret = request_irq(irq, pnv_php_interrupt, IRQF_SHARED,
806 			  php_slot->name, php_slot);
807 	if (ret) {
808 		pnv_php_disable_irq(php_slot, true);
809 		dev_warn(&pdev->dev, "Error %d enabling IRQ %d\n", ret, irq);
810 		return;
811 	}
812 
813 	/* Enable the interrupts */
814 	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &ctrl);
815 	if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) {
816 		ctrl &= ~PCI_EXP_SLTCTL_PDCE;
817 		ctrl |= (PCI_EXP_SLTCTL_HPIE |
818 			 PCI_EXP_SLTCTL_DLLSCE);
819 	} else {
820 		ctrl |= (PCI_EXP_SLTCTL_HPIE |
821 			 PCI_EXP_SLTCTL_PDCE |
822 			 PCI_EXP_SLTCTL_DLLSCE);
823 	}
824 	pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, ctrl);
825 
826 	/* The interrupt is initialized successfully when @irq is valid */
827 	php_slot->irq = irq;
828 }
829 
830 static void pnv_php_enable_irq(struct pnv_php_slot *php_slot)
831 {
832 	struct pci_dev *pdev = php_slot->pdev;
833 	int irq, ret;
834 
835 	/*
836 	 * The MSI/MSIx interrupt might have been occupied by other
837 	 * drivers. Don't populate the surprise hotplug capability
838 	 * in that case.
839 	 */
840 	if (pci_dev_msi_enabled(pdev))
841 		return;
842 
843 	ret = pci_enable_device(pdev);
844 	if (ret) {
845 		dev_warn(&pdev->dev, "Error %d enabling device\n", ret);
846 		return;
847 	}
848 
849 	pci_set_master(pdev);
850 
851 	/* Enable MSIx interrupt */
852 	irq = pnv_php_enable_msix(php_slot);
853 	if (irq > 0) {
854 		pnv_php_init_irq(php_slot, irq);
855 		return;
856 	}
857 
858 	/*
859 	 * Use MSI if MSIx doesn't work. Fail back to legacy INTx
860 	 * if MSI doesn't work either
861 	 */
862 	ret = pci_enable_msi(pdev);
863 	if (!ret || pdev->irq) {
864 		irq = pdev->irq;
865 		pnv_php_init_irq(php_slot, irq);
866 	}
867 }
868 
869 static int pnv_php_register_one(struct device_node *dn)
870 {
871 	struct pnv_php_slot *php_slot;
872 	u32 prop32;
873 	int ret;
874 
875 	/* Check if it's hotpluggable slot */
876 	ret = of_property_read_u32(dn, "ibm,slot-pluggable", &prop32);
877 	if (ret || !prop32)
878 		return -ENXIO;
879 
880 	ret = of_property_read_u32(dn, "ibm,reset-by-firmware", &prop32);
881 	if (ret || !prop32)
882 		return -ENXIO;
883 
884 	php_slot = pnv_php_alloc_slot(dn);
885 	if (!php_slot)
886 		return -ENODEV;
887 
888 	ret = pnv_php_register_slot(php_slot);
889 	if (ret)
890 		goto free_slot;
891 
892 	ret = pnv_php_enable(php_slot, false);
893 	if (ret)
894 		goto unregister_slot;
895 
896 	/* Enable interrupt if the slot supports surprise hotplug */
897 	ret = of_property_read_u32(dn, "ibm,slot-surprise-pluggable", &prop32);
898 	if (!ret && prop32)
899 		pnv_php_enable_irq(php_slot);
900 
901 	return 0;
902 
903 unregister_slot:
904 	pnv_php_unregister_one(php_slot->dn);
905 free_slot:
906 	pnv_php_put_slot(php_slot);
907 	return ret;
908 }
909 
910 static void pnv_php_register(struct device_node *dn)
911 {
912 	struct device_node *child;
913 
914 	/*
915 	 * The parent slots should be registered before their
916 	 * child slots.
917 	 */
918 	for_each_child_of_node(dn, child) {
919 		pnv_php_register_one(child);
920 		pnv_php_register(child);
921 	}
922 }
923 
924 static void pnv_php_unregister_one(struct device_node *dn)
925 {
926 	struct pnv_php_slot *php_slot;
927 
928 	php_slot = pnv_php_find_slot(dn);
929 	if (!php_slot)
930 		return;
931 
932 	php_slot->state = PNV_PHP_STATE_OFFLINE;
933 	pnv_php_put_slot(php_slot);
934 	pci_hp_deregister(&php_slot->slot);
935 }
936 
937 static void pnv_php_unregister(struct device_node *dn)
938 {
939 	struct device_node *child;
940 
941 	/* The child slots should go before their parent slots */
942 	for_each_child_of_node(dn, child) {
943 		pnv_php_unregister(child);
944 		pnv_php_unregister_one(child);
945 	}
946 }
947 
948 static int __init pnv_php_init(void)
949 {
950 	struct device_node *dn;
951 
952 	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
953 	for_each_compatible_node(dn, NULL, "ibm,ioda2-phb")
954 		pnv_php_register(dn);
955 
956 	return 0;
957 }
958 
959 static void __exit pnv_php_exit(void)
960 {
961 	struct device_node *dn;
962 
963 	for_each_compatible_node(dn, NULL, "ibm,ioda2-phb")
964 		pnv_php_unregister(dn);
965 }
966 
967 module_init(pnv_php_init);
968 module_exit(pnv_php_exit);
969 
970 MODULE_VERSION(DRIVER_VERSION);
971 MODULE_LICENSE("GPL v2");
972 MODULE_AUTHOR(DRIVER_AUTHOR);
973 MODULE_DESCRIPTION(DRIVER_DESC);
974