xref: /openbmc/linux/arch/s390/pci/pci.c (revision 56b5b1c7)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright IBM Corp. 2012
4  *
5  * Author(s):
6  *   Jan Glauber <jang@linux.vnet.ibm.com>
7  *
8  * The System z PCI code is a rewrite from a prototype by
9  * the following people (Kudoz!):
10  *   Alexander Schmidt
11  *   Christoph Raisch
12  *   Hannes Hering
13  *   Hoang-Nam Nguyen
14  *   Jan-Bernd Themann
15  *   Stefan Roscher
16  *   Thomas Klein
17  */
18 
19 #define KMSG_COMPONENT "zpci"
20 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
21 
22 #include <linux/kernel.h>
23 #include <linux/slab.h>
24 #include <linux/err.h>
25 #include <linux/export.h>
26 #include <linux/delay.h>
27 #include <linux/seq_file.h>
28 #include <linux/jump_label.h>
29 #include <linux/pci.h>
30 #include <linux/printk.h>
31 
32 #include <asm/isc.h>
33 #include <asm/airq.h>
34 #include <asm/facility.h>
35 #include <asm/pci_insn.h>
36 #include <asm/pci_clp.h>
37 #include <asm/pci_dma.h>
38 
39 #include "pci_bus.h"
40 #include "pci_iov.h"
41 
42 /* list of all detected zpci devices */
43 static LIST_HEAD(zpci_list);
44 static DEFINE_SPINLOCK(zpci_list_lock);
45 
46 static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE);
47 static DEFINE_SPINLOCK(zpci_domain_lock);
48 
49 #define ZPCI_IOMAP_ENTRIES						\
50 	min(((unsigned long) ZPCI_NR_DEVICES * PCI_STD_NUM_BARS / 2),	\
51 	    ZPCI_IOMAP_MAX_ENTRIES)
52 
53 unsigned int s390_pci_no_rid;
54 
55 static DEFINE_SPINLOCK(zpci_iomap_lock);
56 static unsigned long *zpci_iomap_bitmap;
57 struct zpci_iomap_entry *zpci_iomap_start;
58 EXPORT_SYMBOL_GPL(zpci_iomap_start);
59 
60 DEFINE_STATIC_KEY_FALSE(have_mio);
61 
62 static struct kmem_cache *zdev_fmb_cache;
63 
64 struct zpci_dev *get_zdev_by_fid(u32 fid)
65 {
66 	struct zpci_dev *tmp, *zdev = NULL;
67 
68 	spin_lock(&zpci_list_lock);
69 	list_for_each_entry(tmp, &zpci_list, entry) {
70 		if (tmp->fid == fid) {
71 			zdev = tmp;
72 			break;
73 		}
74 	}
75 	spin_unlock(&zpci_list_lock);
76 	return zdev;
77 }
78 
79 void zpci_remove_reserved_devices(void)
80 {
81 	struct zpci_dev *tmp, *zdev;
82 	enum zpci_state state;
83 	LIST_HEAD(remove);
84 
85 	spin_lock(&zpci_list_lock);
86 	list_for_each_entry_safe(zdev, tmp, &zpci_list, entry) {
87 		if (zdev->state == ZPCI_FN_STATE_STANDBY &&
88 		    !clp_get_state(zdev->fid, &state) &&
89 		    state == ZPCI_FN_STATE_RESERVED)
90 			list_move_tail(&zdev->entry, &remove);
91 	}
92 	spin_unlock(&zpci_list_lock);
93 
94 	list_for_each_entry_safe(zdev, tmp, &remove, entry)
95 		zpci_device_reserved(zdev);
96 }
97 
98 int pci_domain_nr(struct pci_bus *bus)
99 {
100 	return ((struct zpci_bus *) bus->sysdata)->domain_nr;
101 }
102 EXPORT_SYMBOL_GPL(pci_domain_nr);
103 
104 int pci_proc_domain(struct pci_bus *bus)
105 {
106 	return pci_domain_nr(bus);
107 }
108 EXPORT_SYMBOL_GPL(pci_proc_domain);
109 
110 /* Modify PCI: Register I/O address translation parameters */
111 int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
112 		       u64 base, u64 limit, u64 iota)
113 {
114 	u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT);
115 	struct zpci_fib fib = {0};
116 	u8 cc, status;
117 
118 	WARN_ON_ONCE(iota & 0x3fff);
119 	fib.pba = base;
120 	fib.pal = limit;
121 	fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
122 	cc = zpci_mod_fc(req, &fib, &status);
123 	if (cc)
124 		zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
125 	return cc;
126 }
127 
128 /* Modify PCI: Unregister I/O address translation parameters */
129 int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
130 {
131 	u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_DEREG_IOAT);
132 	struct zpci_fib fib = {0};
133 	u8 cc, status;
134 
135 	cc = zpci_mod_fc(req, &fib, &status);
136 	if (cc)
137 		zpci_dbg(3, "unreg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
138 	return cc;
139 }
140 
141 /* Modify PCI: Set PCI function measurement parameters */
142 int zpci_fmb_enable_device(struct zpci_dev *zdev)
143 {
144 	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
145 	struct zpci_fib fib = {0};
146 	u8 cc, status;
147 
148 	if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length)
149 		return -EINVAL;
150 
151 	zdev->fmb = kmem_cache_zalloc(zdev_fmb_cache, GFP_KERNEL);
152 	if (!zdev->fmb)
153 		return -ENOMEM;
154 	WARN_ON((u64) zdev->fmb & 0xf);
155 
156 	/* reset software counters */
157 	atomic64_set(&zdev->allocated_pages, 0);
158 	atomic64_set(&zdev->mapped_pages, 0);
159 	atomic64_set(&zdev->unmapped_pages, 0);
160 
161 	fib.fmb_addr = virt_to_phys(zdev->fmb);
162 	cc = zpci_mod_fc(req, &fib, &status);
163 	if (cc) {
164 		kmem_cache_free(zdev_fmb_cache, zdev->fmb);
165 		zdev->fmb = NULL;
166 	}
167 	return cc ? -EIO : 0;
168 }
169 
170 /* Modify PCI: Disable PCI function measurement */
171 int zpci_fmb_disable_device(struct zpci_dev *zdev)
172 {
173 	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
174 	struct zpci_fib fib = {0};
175 	u8 cc, status;
176 
177 	if (!zdev->fmb)
178 		return -EINVAL;
179 
180 	/* Function measurement is disabled if fmb address is zero */
181 	cc = zpci_mod_fc(req, &fib, &status);
182 	if (cc == 3) /* Function already gone. */
183 		cc = 0;
184 
185 	if (!cc) {
186 		kmem_cache_free(zdev_fmb_cache, zdev->fmb);
187 		zdev->fmb = NULL;
188 	}
189 	return cc ? -EIO : 0;
190 }
191 
192 static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
193 {
194 	u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len);
195 	u64 data;
196 	int rc;
197 
198 	rc = __zpci_load(&data, req, offset);
199 	if (!rc) {
200 		data = le64_to_cpu((__force __le64) data);
201 		data >>= (8 - len) * 8;
202 		*val = (u32) data;
203 	} else
204 		*val = 0xffffffff;
205 	return rc;
206 }
207 
208 static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
209 {
210 	u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len);
211 	u64 data = val;
212 	int rc;
213 
214 	data <<= (8 - len) * 8;
215 	data = (__force u64) cpu_to_le64(data);
216 	rc = __zpci_store(data, req, offset);
217 	return rc;
218 }
219 
220 resource_size_t pcibios_align_resource(void *data, const struct resource *res,
221 				       resource_size_t size,
222 				       resource_size_t align)
223 {
224 	return 0;
225 }
226 
227 /* combine single writes by using store-block insn */
228 void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
229 {
230        zpci_memcpy_toio(to, from, count);
231 }
232 
233 static void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot)
234 {
235 	unsigned long offset, vaddr;
236 	struct vm_struct *area;
237 	phys_addr_t last_addr;
238 
239 	last_addr = addr + size - 1;
240 	if (!size || last_addr < addr)
241 		return NULL;
242 
243 	if (!static_branch_unlikely(&have_mio))
244 		return (void __iomem *) addr;
245 
246 	offset = addr & ~PAGE_MASK;
247 	addr &= PAGE_MASK;
248 	size = PAGE_ALIGN(size + offset);
249 	area = get_vm_area(size, VM_IOREMAP);
250 	if (!area)
251 		return NULL;
252 
253 	vaddr = (unsigned long) area->addr;
254 	if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) {
255 		free_vm_area(area);
256 		return NULL;
257 	}
258 	return (void __iomem *) ((unsigned long) area->addr + offset);
259 }
260 
261 void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot)
262 {
263 	return __ioremap(addr, size, __pgprot(prot));
264 }
265 EXPORT_SYMBOL(ioremap_prot);
266 
267 void __iomem *ioremap(phys_addr_t addr, size_t size)
268 {
269 	return __ioremap(addr, size, PAGE_KERNEL);
270 }
271 EXPORT_SYMBOL(ioremap);
272 
273 void __iomem *ioremap_wc(phys_addr_t addr, size_t size)
274 {
275 	return __ioremap(addr, size, pgprot_writecombine(PAGE_KERNEL));
276 }
277 EXPORT_SYMBOL(ioremap_wc);
278 
279 void __iomem *ioremap_wt(phys_addr_t addr, size_t size)
280 {
281 	return __ioremap(addr, size, pgprot_writethrough(PAGE_KERNEL));
282 }
283 EXPORT_SYMBOL(ioremap_wt);
284 
285 void iounmap(volatile void __iomem *addr)
286 {
287 	if (static_branch_likely(&have_mio))
288 		vunmap((__force void *) ((unsigned long) addr & PAGE_MASK));
289 }
290 EXPORT_SYMBOL(iounmap);
291 
292 /* Create a virtual mapping cookie for a PCI BAR */
293 static void __iomem *pci_iomap_range_fh(struct pci_dev *pdev, int bar,
294 					unsigned long offset, unsigned long max)
295 {
296 	struct zpci_dev *zdev =	to_zpci(pdev);
297 	int idx;
298 
299 	idx = zdev->bars[bar].map_idx;
300 	spin_lock(&zpci_iomap_lock);
301 	/* Detect overrun */
302 	WARN_ON(!++zpci_iomap_start[idx].count);
303 	zpci_iomap_start[idx].fh = zdev->fh;
304 	zpci_iomap_start[idx].bar = bar;
305 	spin_unlock(&zpci_iomap_lock);
306 
307 	return (void __iomem *) ZPCI_ADDR(idx) + offset;
308 }
309 
310 static void __iomem *pci_iomap_range_mio(struct pci_dev *pdev, int bar,
311 					 unsigned long offset,
312 					 unsigned long max)
313 {
314 	unsigned long barsize = pci_resource_len(pdev, bar);
315 	struct zpci_dev *zdev = to_zpci(pdev);
316 	void __iomem *iova;
317 
318 	iova = ioremap((unsigned long) zdev->bars[bar].mio_wt, barsize);
319 	return iova ? iova + offset : iova;
320 }
321 
322 void __iomem *pci_iomap_range(struct pci_dev *pdev, int bar,
323 			      unsigned long offset, unsigned long max)
324 {
325 	if (bar >= PCI_STD_NUM_BARS || !pci_resource_len(pdev, bar))
326 		return NULL;
327 
328 	if (static_branch_likely(&have_mio))
329 		return pci_iomap_range_mio(pdev, bar, offset, max);
330 	else
331 		return pci_iomap_range_fh(pdev, bar, offset, max);
332 }
333 EXPORT_SYMBOL(pci_iomap_range);
334 
335 void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
336 {
337 	return pci_iomap_range(dev, bar, 0, maxlen);
338 }
339 EXPORT_SYMBOL(pci_iomap);
340 
341 static void __iomem *pci_iomap_wc_range_mio(struct pci_dev *pdev, int bar,
342 					    unsigned long offset, unsigned long max)
343 {
344 	unsigned long barsize = pci_resource_len(pdev, bar);
345 	struct zpci_dev *zdev = to_zpci(pdev);
346 	void __iomem *iova;
347 
348 	iova = ioremap((unsigned long) zdev->bars[bar].mio_wb, barsize);
349 	return iova ? iova + offset : iova;
350 }
351 
352 void __iomem *pci_iomap_wc_range(struct pci_dev *pdev, int bar,
353 				 unsigned long offset, unsigned long max)
354 {
355 	if (bar >= PCI_STD_NUM_BARS || !pci_resource_len(pdev, bar))
356 		return NULL;
357 
358 	if (static_branch_likely(&have_mio))
359 		return pci_iomap_wc_range_mio(pdev, bar, offset, max);
360 	else
361 		return pci_iomap_range_fh(pdev, bar, offset, max);
362 }
363 EXPORT_SYMBOL(pci_iomap_wc_range);
364 
365 void __iomem *pci_iomap_wc(struct pci_dev *dev, int bar, unsigned long maxlen)
366 {
367 	return pci_iomap_wc_range(dev, bar, 0, maxlen);
368 }
369 EXPORT_SYMBOL(pci_iomap_wc);
370 
371 static void pci_iounmap_fh(struct pci_dev *pdev, void __iomem *addr)
372 {
373 	unsigned int idx = ZPCI_IDX(addr);
374 
375 	spin_lock(&zpci_iomap_lock);
376 	/* Detect underrun */
377 	WARN_ON(!zpci_iomap_start[idx].count);
378 	if (!--zpci_iomap_start[idx].count) {
379 		zpci_iomap_start[idx].fh = 0;
380 		zpci_iomap_start[idx].bar = 0;
381 	}
382 	spin_unlock(&zpci_iomap_lock);
383 }
384 
385 static void pci_iounmap_mio(struct pci_dev *pdev, void __iomem *addr)
386 {
387 	iounmap(addr);
388 }
389 
390 void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
391 {
392 	if (static_branch_likely(&have_mio))
393 		pci_iounmap_mio(pdev, addr);
394 	else
395 		pci_iounmap_fh(pdev, addr);
396 }
397 EXPORT_SYMBOL(pci_iounmap);
398 
399 static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
400 		    int size, u32 *val)
401 {
402 	struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn);
403 
404 	return (zdev) ? zpci_cfg_load(zdev, where, val, size) : -ENODEV;
405 }
406 
407 static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
408 		     int size, u32 val)
409 {
410 	struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn);
411 
412 	return (zdev) ? zpci_cfg_store(zdev, where, val, size) : -ENODEV;
413 }
414 
415 static struct pci_ops pci_root_ops = {
416 	.read = pci_read,
417 	.write = pci_write,
418 };
419 
420 static void zpci_map_resources(struct pci_dev *pdev)
421 {
422 	struct zpci_dev *zdev = to_zpci(pdev);
423 	resource_size_t len;
424 	int i;
425 
426 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
427 		len = pci_resource_len(pdev, i);
428 		if (!len)
429 			continue;
430 
431 		if (zpci_use_mio(zdev))
432 			pdev->resource[i].start =
433 				(resource_size_t __force) zdev->bars[i].mio_wt;
434 		else
435 			pdev->resource[i].start = (resource_size_t __force)
436 				pci_iomap_range_fh(pdev, i, 0, 0);
437 		pdev->resource[i].end = pdev->resource[i].start + len - 1;
438 	}
439 
440 	zpci_iov_map_resources(pdev);
441 }
442 
443 static void zpci_unmap_resources(struct pci_dev *pdev)
444 {
445 	struct zpci_dev *zdev = to_zpci(pdev);
446 	resource_size_t len;
447 	int i;
448 
449 	if (zpci_use_mio(zdev))
450 		return;
451 
452 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
453 		len = pci_resource_len(pdev, i);
454 		if (!len)
455 			continue;
456 		pci_iounmap_fh(pdev, (void __iomem __force *)
457 			       pdev->resource[i].start);
458 	}
459 }
460 
461 static int zpci_alloc_iomap(struct zpci_dev *zdev)
462 {
463 	unsigned long entry;
464 
465 	spin_lock(&zpci_iomap_lock);
466 	entry = find_first_zero_bit(zpci_iomap_bitmap, ZPCI_IOMAP_ENTRIES);
467 	if (entry == ZPCI_IOMAP_ENTRIES) {
468 		spin_unlock(&zpci_iomap_lock);
469 		return -ENOSPC;
470 	}
471 	set_bit(entry, zpci_iomap_bitmap);
472 	spin_unlock(&zpci_iomap_lock);
473 	return entry;
474 }
475 
476 static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
477 {
478 	spin_lock(&zpci_iomap_lock);
479 	memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry));
480 	clear_bit(entry, zpci_iomap_bitmap);
481 	spin_unlock(&zpci_iomap_lock);
482 }
483 
484 static void zpci_do_update_iomap_fh(struct zpci_dev *zdev, u32 fh)
485 {
486 	int bar, idx;
487 
488 	spin_lock(&zpci_iomap_lock);
489 	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
490 		if (!zdev->bars[bar].size)
491 			continue;
492 		idx = zdev->bars[bar].map_idx;
493 		if (!zpci_iomap_start[idx].count)
494 			continue;
495 		WRITE_ONCE(zpci_iomap_start[idx].fh, zdev->fh);
496 	}
497 	spin_unlock(&zpci_iomap_lock);
498 }
499 
500 void zpci_update_fh(struct zpci_dev *zdev, u32 fh)
501 {
502 	if (!fh || zdev->fh == fh)
503 		return;
504 
505 	zdev->fh = fh;
506 	if (zpci_use_mio(zdev))
507 		return;
508 	if (zdev->has_resources && zdev_enabled(zdev))
509 		zpci_do_update_iomap_fh(zdev, fh);
510 }
511 
512 static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
513 				    unsigned long size, unsigned long flags)
514 {
515 	struct resource *r;
516 
517 	r = kzalloc(sizeof(*r), GFP_KERNEL);
518 	if (!r)
519 		return NULL;
520 
521 	r->start = start;
522 	r->end = r->start + size - 1;
523 	r->flags = flags;
524 	r->name = zdev->res_name;
525 
526 	if (request_resource(&iomem_resource, r)) {
527 		kfree(r);
528 		return NULL;
529 	}
530 	return r;
531 }
532 
533 int zpci_setup_bus_resources(struct zpci_dev *zdev,
534 			     struct list_head *resources)
535 {
536 	unsigned long addr, size, flags;
537 	struct resource *res;
538 	int i, entry;
539 
540 	snprintf(zdev->res_name, sizeof(zdev->res_name),
541 		 "PCI Bus %04x:%02x", zdev->uid, ZPCI_BUS_NR);
542 
543 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
544 		if (!zdev->bars[i].size)
545 			continue;
546 		entry = zpci_alloc_iomap(zdev);
547 		if (entry < 0)
548 			return entry;
549 		zdev->bars[i].map_idx = entry;
550 
551 		/* only MMIO is supported */
552 		flags = IORESOURCE_MEM;
553 		if (zdev->bars[i].val & 8)
554 			flags |= IORESOURCE_PREFETCH;
555 		if (zdev->bars[i].val & 4)
556 			flags |= IORESOURCE_MEM_64;
557 
558 		if (zpci_use_mio(zdev))
559 			addr = (unsigned long) zdev->bars[i].mio_wt;
560 		else
561 			addr = ZPCI_ADDR(entry);
562 		size = 1UL << zdev->bars[i].size;
563 
564 		res = __alloc_res(zdev, addr, size, flags);
565 		if (!res) {
566 			zpci_free_iomap(zdev, entry);
567 			return -ENOMEM;
568 		}
569 		zdev->bars[i].res = res;
570 		pci_add_resource(resources, res);
571 	}
572 	zdev->has_resources = 1;
573 
574 	return 0;
575 }
576 
577 static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
578 {
579 	int i;
580 
581 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
582 		if (!zdev->bars[i].size || !zdev->bars[i].res)
583 			continue;
584 
585 		zpci_free_iomap(zdev, zdev->bars[i].map_idx);
586 		release_resource(zdev->bars[i].res);
587 		kfree(zdev->bars[i].res);
588 	}
589 	zdev->has_resources = 0;
590 }
591 
592 int pcibios_device_add(struct pci_dev *pdev)
593 {
594 	struct zpci_dev *zdev = to_zpci(pdev);
595 	struct resource *res;
596 	int i;
597 
598 	/* The pdev has a reference to the zdev via its bus */
599 	zpci_zdev_get(zdev);
600 	if (pdev->is_physfn)
601 		pdev->no_vf_scan = 1;
602 
603 	pdev->dev.groups = zpci_attr_groups;
604 	pdev->dev.dma_ops = &s390_pci_dma_ops;
605 	zpci_map_resources(pdev);
606 
607 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
608 		res = &pdev->resource[i];
609 		if (res->parent || !res->flags)
610 			continue;
611 		pci_claim_resource(pdev, i);
612 	}
613 
614 	return 0;
615 }
616 
617 void pcibios_release_device(struct pci_dev *pdev)
618 {
619 	struct zpci_dev *zdev = to_zpci(pdev);
620 
621 	zpci_unmap_resources(pdev);
622 	zpci_zdev_put(zdev);
623 }
624 
625 int pcibios_enable_device(struct pci_dev *pdev, int mask)
626 {
627 	struct zpci_dev *zdev = to_zpci(pdev);
628 
629 	zpci_debug_init_device(zdev, dev_name(&pdev->dev));
630 	zpci_fmb_enable_device(zdev);
631 
632 	return pci_enable_resources(pdev, mask);
633 }
634 
635 void pcibios_disable_device(struct pci_dev *pdev)
636 {
637 	struct zpci_dev *zdev = to_zpci(pdev);
638 
639 	zpci_fmb_disable_device(zdev);
640 	zpci_debug_exit_device(zdev);
641 }
642 
643 static int __zpci_register_domain(int domain)
644 {
645 	spin_lock(&zpci_domain_lock);
646 	if (test_bit(domain, zpci_domain)) {
647 		spin_unlock(&zpci_domain_lock);
648 		pr_err("Domain %04x is already assigned\n", domain);
649 		return -EEXIST;
650 	}
651 	set_bit(domain, zpci_domain);
652 	spin_unlock(&zpci_domain_lock);
653 	return domain;
654 }
655 
656 static int __zpci_alloc_domain(void)
657 {
658 	int domain;
659 
660 	spin_lock(&zpci_domain_lock);
661 	/*
662 	 * We can always auto allocate domains below ZPCI_NR_DEVICES.
663 	 * There is either a free domain or we have reached the maximum in
664 	 * which case we would have bailed earlier.
665 	 */
666 	domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
667 	set_bit(domain, zpci_domain);
668 	spin_unlock(&zpci_domain_lock);
669 	return domain;
670 }
671 
672 int zpci_alloc_domain(int domain)
673 {
674 	if (zpci_unique_uid) {
675 		if (domain)
676 			return __zpci_register_domain(domain);
677 		pr_warn("UID checking was active but no UID is provided: switching to automatic domain allocation\n");
678 		update_uid_checking(false);
679 	}
680 	return __zpci_alloc_domain();
681 }
682 
683 void zpci_free_domain(int domain)
684 {
685 	spin_lock(&zpci_domain_lock);
686 	clear_bit(domain, zpci_domain);
687 	spin_unlock(&zpci_domain_lock);
688 }
689 
690 
691 int zpci_enable_device(struct zpci_dev *zdev)
692 {
693 	u32 fh = zdev->fh;
694 	int rc = 0;
695 
696 	if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES))
697 		rc = -EIO;
698 	else
699 		zpci_update_fh(zdev, fh);
700 	return rc;
701 }
702 
703 int zpci_disable_device(struct zpci_dev *zdev)
704 {
705 	u32 fh = zdev->fh;
706 	int cc, rc = 0;
707 
708 	cc = clp_disable_fh(zdev, &fh);
709 	if (!cc) {
710 		zpci_update_fh(zdev, fh);
711 	} else if (cc == CLP_RC_SETPCIFN_ALRDY) {
712 		pr_info("Disabling PCI function %08x had no effect as it was already disabled\n",
713 			zdev->fid);
714 		/* Function is already disabled - update handle */
715 		rc = clp_refresh_fh(zdev->fid, &fh);
716 		if (!rc) {
717 			zpci_update_fh(zdev, fh);
718 			rc = -EINVAL;
719 		}
720 	} else {
721 		rc = -EIO;
722 	}
723 	return rc;
724 }
725 
726 /**
727  * zpci_hot_reset_device - perform a reset of the given zPCI function
728  * @zdev: the slot which should be reset
729  *
730  * Performs a low level reset of the zPCI function. The reset is low level in
731  * the sense that the zPCI function can be reset without detaching it from the
732  * common PCI subsystem. The reset may be performed while under control of
733  * either DMA or IOMMU APIs in which case the existing DMA/IOMMU translation
734  * table is reinstated at the end of the reset.
735  *
736  * After the reset the functions internal state is reset to an initial state
737  * equivalent to its state during boot when first probing a driver.
738  * Consequently after reset the PCI function requires re-initialization via the
739  * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors()
740  * and enabling the function via e.g.pci_enablde_device_flags().The caller
741  * must guard against concurrent reset attempts.
742  *
743  * In most cases this function should not be called directly but through
744  * pci_reset_function() or pci_reset_bus() which handle the save/restore and
745  * locking.
746  *
747  * Return: 0 on success and an error value otherwise
748  */
749 int zpci_hot_reset_device(struct zpci_dev *zdev)
750 {
751 	int rc;
752 
753 	zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
754 	if (zdev_enabled(zdev)) {
755 		/* Disables device access, DMAs and IRQs (reset state) */
756 		rc = zpci_disable_device(zdev);
757 		/*
758 		 * Due to a z/VM vs LPAR inconsistency in the error state the
759 		 * FH may indicate an enabled device but disable says the
760 		 * device is already disabled don't treat it as an error here.
761 		 */
762 		if (rc == -EINVAL)
763 			rc = 0;
764 		if (rc)
765 			return rc;
766 	}
767 
768 	rc = zpci_enable_device(zdev);
769 	if (rc)
770 		return rc;
771 
772 	if (zdev->dma_table)
773 		rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
774 					virt_to_phys(zdev->dma_table));
775 	else
776 		rc = zpci_dma_init_device(zdev);
777 	if (rc) {
778 		zpci_disable_device(zdev);
779 		return rc;
780 	}
781 
782 	return 0;
783 }
784 
785 /**
786  * zpci_create_device() - Create a new zpci_dev and add it to the zbus
787  * @fid: Function ID of the device to be created
788  * @fh: Current Function Handle of the device to be created
789  * @state: Initial state after creation either Standby or Configured
790  *
791  * Creates a new zpci device and adds it to its, possibly newly created, zbus
792  * as well as zpci_list.
793  *
794  * Returns: the zdev on success or an error pointer otherwise
795  */
796 struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
797 {
798 	struct zpci_dev *zdev;
799 	int rc;
800 
801 	zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, state);
802 	zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
803 	if (!zdev)
804 		return ERR_PTR(-ENOMEM);
805 
806 	/* FID and Function Handle are the static/dynamic identifiers */
807 	zdev->fid = fid;
808 	zdev->fh = fh;
809 
810 	/* Query function properties and update zdev */
811 	rc = clp_query_pci_fn(zdev);
812 	if (rc)
813 		goto error;
814 	zdev->state =  state;
815 
816 	kref_init(&zdev->kref);
817 	mutex_init(&zdev->lock);
818 
819 	rc = zpci_init_iommu(zdev);
820 	if (rc)
821 		goto error;
822 
823 	rc = zpci_bus_device_register(zdev, &pci_root_ops);
824 	if (rc)
825 		goto error_destroy_iommu;
826 
827 	spin_lock(&zpci_list_lock);
828 	list_add_tail(&zdev->entry, &zpci_list);
829 	spin_unlock(&zpci_list_lock);
830 
831 	return zdev;
832 
833 error_destroy_iommu:
834 	zpci_destroy_iommu(zdev);
835 error:
836 	zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc);
837 	kfree(zdev);
838 	return ERR_PTR(rc);
839 }
840 
841 bool zpci_is_device_configured(struct zpci_dev *zdev)
842 {
843 	enum zpci_state state = zdev->state;
844 
845 	return state != ZPCI_FN_STATE_RESERVED &&
846 		state != ZPCI_FN_STATE_STANDBY;
847 }
848 
849 /**
850  * zpci_scan_configured_device() - Scan a freshly configured zpci_dev
851  * @zdev: The zpci_dev to be configured
852  * @fh: The general function handle supplied by the platform
853  *
854  * Given a device in the configuration state Configured, enables, scans and
855  * adds it to the common code PCI subsystem if possible. If the PCI device is
856  * parked because we can not yet create a PCI bus because we have not seen
857  * function 0, it is ignored but will be scanned once function 0 appears.
858  * If any failure occurs, the zpci_dev is left disabled.
859  *
860  * Return: 0 on success, or an error code otherwise
861  */
862 int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh)
863 {
864 	int rc;
865 
866 	zpci_update_fh(zdev, fh);
867 	/* the PCI function will be scanned once function 0 appears */
868 	if (!zdev->zbus->bus)
869 		return 0;
870 
871 	/* For function 0 on a multi-function bus scan whole bus as we might
872 	 * have to pick up existing functions waiting for it to allow creating
873 	 * the PCI bus
874 	 */
875 	if (zdev->devfn == 0 && zdev->zbus->multifunction)
876 		rc = zpci_bus_scan_bus(zdev->zbus);
877 	else
878 		rc = zpci_bus_scan_device(zdev);
879 
880 	return rc;
881 }
882 
883 /**
884  * zpci_deconfigure_device() - Deconfigure a zpci_dev
885  * @zdev: The zpci_dev to configure
886  *
887  * Deconfigure a zPCI function that is currently configured and possibly known
888  * to the common code PCI subsystem.
889  * If any failure occurs the device is left as is.
890  *
891  * Return: 0 on success, or an error code otherwise
892  */
893 int zpci_deconfigure_device(struct zpci_dev *zdev)
894 {
895 	int rc;
896 
897 	if (zdev->zbus->bus)
898 		zpci_bus_remove_device(zdev, false);
899 
900 	if (zdev->dma_table) {
901 		rc = zpci_dma_exit_device(zdev);
902 		if (rc)
903 			return rc;
904 	}
905 	if (zdev_enabled(zdev)) {
906 		rc = zpci_disable_device(zdev);
907 		if (rc)
908 			return rc;
909 	}
910 
911 	rc = sclp_pci_deconfigure(zdev->fid);
912 	zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, rc);
913 	if (rc)
914 		return rc;
915 	zdev->state = ZPCI_FN_STATE_STANDBY;
916 
917 	return 0;
918 }
919 
920 /**
921  * zpci_device_reserved() - Mark device as resverved
922  * @zdev: the zpci_dev that was reserved
923  *
924  * Handle the case that a given zPCI function was reserved by another system.
925  * After a call to this function the zpci_dev can not be found via
926  * get_zdev_by_fid() anymore but may still be accessible via existing
927  * references though it will not be functional anymore.
928  */
929 void zpci_device_reserved(struct zpci_dev *zdev)
930 {
931 	if (zdev->has_hp_slot)
932 		zpci_exit_slot(zdev);
933 	/*
934 	 * Remove device from zpci_list as it is going away. This also
935 	 * makes sure we ignore subsequent zPCI events for this device.
936 	 */
937 	spin_lock(&zpci_list_lock);
938 	list_del(&zdev->entry);
939 	spin_unlock(&zpci_list_lock);
940 	zdev->state = ZPCI_FN_STATE_RESERVED;
941 	zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
942 	zpci_zdev_put(zdev);
943 }
944 
945 void zpci_release_device(struct kref *kref)
946 {
947 	struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
948 	int ret;
949 
950 	if (zdev->zbus->bus)
951 		zpci_bus_remove_device(zdev, false);
952 
953 	if (zdev->dma_table)
954 		zpci_dma_exit_device(zdev);
955 	if (zdev_enabled(zdev))
956 		zpci_disable_device(zdev);
957 
958 	switch (zdev->state) {
959 	case ZPCI_FN_STATE_CONFIGURED:
960 		ret = sclp_pci_deconfigure(zdev->fid);
961 		zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret);
962 		fallthrough;
963 	case ZPCI_FN_STATE_STANDBY:
964 		if (zdev->has_hp_slot)
965 			zpci_exit_slot(zdev);
966 		spin_lock(&zpci_list_lock);
967 		list_del(&zdev->entry);
968 		spin_unlock(&zpci_list_lock);
969 		zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
970 		fallthrough;
971 	case ZPCI_FN_STATE_RESERVED:
972 		if (zdev->has_resources)
973 			zpci_cleanup_bus_resources(zdev);
974 		zpci_bus_device_unregister(zdev);
975 		zpci_destroy_iommu(zdev);
976 		fallthrough;
977 	default:
978 		break;
979 	}
980 	zpci_dbg(3, "rem fid:%x\n", zdev->fid);
981 	kfree(zdev);
982 }
983 
984 int zpci_report_error(struct pci_dev *pdev,
985 		      struct zpci_report_error_header *report)
986 {
987 	struct zpci_dev *zdev = to_zpci(pdev);
988 
989 	return sclp_pci_report(report, zdev->fh, zdev->fid);
990 }
991 EXPORT_SYMBOL(zpci_report_error);
992 
993 /**
994  * zpci_clear_error_state() - Clears the zPCI error state of the device
995  * @zdev: The zdev for which the zPCI error state should be reset
996  *
997  * Clear the zPCI error state of the device. If clearing the zPCI error state
998  * fails the device is left in the error state. In this case it may make sense
999  * to call zpci_io_perm_failure() on the associated pdev if it exists.
1000  *
1001  * Returns: 0 on success, -EIO otherwise
1002  */
1003 int zpci_clear_error_state(struct zpci_dev *zdev)
1004 {
1005 	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_ERROR);
1006 	struct zpci_fib fib = {0};
1007 	u8 status;
1008 	int cc;
1009 
1010 	cc = zpci_mod_fc(req, &fib, &status);
1011 	if (cc) {
1012 		zpci_dbg(3, "ces fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
1013 		return -EIO;
1014 	}
1015 
1016 	return 0;
1017 }
1018 
1019 /**
1020  * zpci_reset_load_store_blocked() - Re-enables L/S from error state
1021  * @zdev: The zdev for which to unblock load/store access
1022  *
1023  * Re-enables load/store access for a PCI function in the error state while
1024  * keeping DMA blocked. In this state drivers can poke MMIO space to determine
1025  * if error recovery is possible while catching any rogue DMA access from the
1026  * device.
1027  *
1028  * Returns: 0 on success, -EIO otherwise
1029  */
1030 int zpci_reset_load_store_blocked(struct zpci_dev *zdev)
1031 {
1032 	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_BLOCK);
1033 	struct zpci_fib fib = {0};
1034 	u8 status;
1035 	int cc;
1036 
1037 	cc = zpci_mod_fc(req, &fib, &status);
1038 	if (cc) {
1039 		zpci_dbg(3, "rls fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
1040 		return -EIO;
1041 	}
1042 
1043 	return 0;
1044 }
1045 
1046 static int zpci_mem_init(void)
1047 {
1048 	BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
1049 		     __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb));
1050 
1051 	zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
1052 					   __alignof__(struct zpci_fmb), 0, NULL);
1053 	if (!zdev_fmb_cache)
1054 		goto error_fmb;
1055 
1056 	zpci_iomap_start = kcalloc(ZPCI_IOMAP_ENTRIES,
1057 				   sizeof(*zpci_iomap_start), GFP_KERNEL);
1058 	if (!zpci_iomap_start)
1059 		goto error_iomap;
1060 
1061 	zpci_iomap_bitmap = kcalloc(BITS_TO_LONGS(ZPCI_IOMAP_ENTRIES),
1062 				    sizeof(*zpci_iomap_bitmap), GFP_KERNEL);
1063 	if (!zpci_iomap_bitmap)
1064 		goto error_iomap_bitmap;
1065 
1066 	if (static_branch_likely(&have_mio))
1067 		clp_setup_writeback_mio();
1068 
1069 	return 0;
1070 error_iomap_bitmap:
1071 	kfree(zpci_iomap_start);
1072 error_iomap:
1073 	kmem_cache_destroy(zdev_fmb_cache);
1074 error_fmb:
1075 	return -ENOMEM;
1076 }
1077 
1078 static void zpci_mem_exit(void)
1079 {
1080 	kfree(zpci_iomap_bitmap);
1081 	kfree(zpci_iomap_start);
1082 	kmem_cache_destroy(zdev_fmb_cache);
1083 }
1084 
1085 static unsigned int s390_pci_probe __initdata = 1;
1086 unsigned int s390_pci_force_floating __initdata;
1087 static unsigned int s390_pci_initialized;
1088 
1089 char * __init pcibios_setup(char *str)
1090 {
1091 	if (!strcmp(str, "off")) {
1092 		s390_pci_probe = 0;
1093 		return NULL;
1094 	}
1095 	if (!strcmp(str, "nomio")) {
1096 		S390_lowcore.machine_flags &= ~MACHINE_FLAG_PCI_MIO;
1097 		return NULL;
1098 	}
1099 	if (!strcmp(str, "force_floating")) {
1100 		s390_pci_force_floating = 1;
1101 		return NULL;
1102 	}
1103 	if (!strcmp(str, "norid")) {
1104 		s390_pci_no_rid = 1;
1105 		return NULL;
1106 	}
1107 	return str;
1108 }
1109 
1110 bool zpci_is_enabled(void)
1111 {
1112 	return s390_pci_initialized;
1113 }
1114 
1115 static int __init pci_base_init(void)
1116 {
1117 	int rc;
1118 
1119 	if (!s390_pci_probe)
1120 		return 0;
1121 
1122 	if (!test_facility(69) || !test_facility(71)) {
1123 		pr_info("PCI is not supported because CPU facilities 69 or 71 are not available\n");
1124 		return 0;
1125 	}
1126 
1127 	if (MACHINE_HAS_PCI_MIO) {
1128 		static_branch_enable(&have_mio);
1129 		ctl_set_bit(2, 5);
1130 	}
1131 
1132 	rc = zpci_debug_init();
1133 	if (rc)
1134 		goto out;
1135 
1136 	rc = zpci_mem_init();
1137 	if (rc)
1138 		goto out_mem;
1139 
1140 	rc = zpci_irq_init();
1141 	if (rc)
1142 		goto out_irq;
1143 
1144 	rc = zpci_dma_init();
1145 	if (rc)
1146 		goto out_dma;
1147 
1148 	rc = clp_scan_pci_devices();
1149 	if (rc)
1150 		goto out_find;
1151 	zpci_bus_scan_busses();
1152 
1153 	s390_pci_initialized = 1;
1154 	return 0;
1155 
1156 out_find:
1157 	zpci_dma_exit();
1158 out_dma:
1159 	zpci_irq_exit();
1160 out_irq:
1161 	zpci_mem_exit();
1162 out_mem:
1163 	zpci_debug_exit();
1164 out:
1165 	return rc;
1166 }
1167 subsys_initcall_sync(pci_base_init);
1168