xref: /openbmc/linux/drivers/misc/ocxl/link.c (revision 5ef3166e8a32d78dfa985a323aa45ed485ff663a)
1*5ef3166eSFrederic Barrat // SPDX-License-Identifier: GPL-2.0+
2*5ef3166eSFrederic Barrat // Copyright 2017 IBM Corp.
3*5ef3166eSFrederic Barrat #include <linux/sched/mm.h>
4*5ef3166eSFrederic Barrat #include <linux/mutex.h>
5*5ef3166eSFrederic Barrat #include <linux/mmu_context.h>
6*5ef3166eSFrederic Barrat #include <asm/copro.h>
7*5ef3166eSFrederic Barrat #include <asm/pnv-ocxl.h>
8*5ef3166eSFrederic Barrat #include "ocxl_internal.h"
9*5ef3166eSFrederic Barrat 
10*5ef3166eSFrederic Barrat 
11*5ef3166eSFrederic Barrat #define SPA_PASID_BITS		15
12*5ef3166eSFrederic Barrat #define SPA_PASID_MAX		((1 << SPA_PASID_BITS) - 1)
13*5ef3166eSFrederic Barrat #define SPA_PE_MASK		SPA_PASID_MAX
14*5ef3166eSFrederic Barrat #define SPA_SPA_SIZE_LOG	22 /* Each SPA is 4 Mb */
15*5ef3166eSFrederic Barrat 
16*5ef3166eSFrederic Barrat #define SPA_CFG_SF		(1ull << (63-0))
17*5ef3166eSFrederic Barrat #define SPA_CFG_TA		(1ull << (63-1))
18*5ef3166eSFrederic Barrat #define SPA_CFG_HV		(1ull << (63-3))
19*5ef3166eSFrederic Barrat #define SPA_CFG_UV		(1ull << (63-4))
20*5ef3166eSFrederic Barrat #define SPA_CFG_XLAT_hpt	(0ull << (63-6)) /* Hashed page table (HPT) mode */
21*5ef3166eSFrederic Barrat #define SPA_CFG_XLAT_roh	(2ull << (63-6)) /* Radix on HPT mode */
22*5ef3166eSFrederic Barrat #define SPA_CFG_XLAT_ror	(3ull << (63-6)) /* Radix on Radix mode */
23*5ef3166eSFrederic Barrat #define SPA_CFG_PR		(1ull << (63-49))
24*5ef3166eSFrederic Barrat #define SPA_CFG_TC		(1ull << (63-54))
25*5ef3166eSFrederic Barrat #define SPA_CFG_DR		(1ull << (63-59))
26*5ef3166eSFrederic Barrat 
27*5ef3166eSFrederic Barrat #define SPA_XSL_TF		(1ull << (63-3))  /* Translation fault */
28*5ef3166eSFrederic Barrat #define SPA_XSL_S		(1ull << (63-38)) /* Store operation */
29*5ef3166eSFrederic Barrat 
30*5ef3166eSFrederic Barrat #define SPA_PE_VALID		0x80000000
31*5ef3166eSFrederic Barrat 
32*5ef3166eSFrederic Barrat 
33*5ef3166eSFrederic Barrat struct pe_data {
34*5ef3166eSFrederic Barrat 	struct mm_struct *mm;
35*5ef3166eSFrederic Barrat 	/* callback to trigger when a translation fault occurs */
36*5ef3166eSFrederic Barrat 	void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr);
37*5ef3166eSFrederic Barrat 	/* opaque pointer to be passed to the above callback */
38*5ef3166eSFrederic Barrat 	void *xsl_err_data;
39*5ef3166eSFrederic Barrat 	struct rcu_head rcu;
40*5ef3166eSFrederic Barrat };
41*5ef3166eSFrederic Barrat 
42*5ef3166eSFrederic Barrat struct spa {
43*5ef3166eSFrederic Barrat 	struct ocxl_process_element *spa_mem;
44*5ef3166eSFrederic Barrat 	int spa_order;
45*5ef3166eSFrederic Barrat 	struct mutex spa_lock;
46*5ef3166eSFrederic Barrat 	struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */
47*5ef3166eSFrederic Barrat 	char *irq_name;
48*5ef3166eSFrederic Barrat 	int virq;
49*5ef3166eSFrederic Barrat 	void __iomem *reg_dsisr;
50*5ef3166eSFrederic Barrat 	void __iomem *reg_dar;
51*5ef3166eSFrederic Barrat 	void __iomem *reg_tfc;
52*5ef3166eSFrederic Barrat 	void __iomem *reg_pe_handle;
53*5ef3166eSFrederic Barrat 	/*
54*5ef3166eSFrederic Barrat 	 * The following field are used by the memory fault
55*5ef3166eSFrederic Barrat 	 * interrupt handler. We can only have one interrupt at a
56*5ef3166eSFrederic Barrat 	 * time. The NPU won't raise another interrupt until the
57*5ef3166eSFrederic Barrat 	 * previous one has been ack'd by writing to the TFC register
58*5ef3166eSFrederic Barrat 	 */
59*5ef3166eSFrederic Barrat 	struct xsl_fault {
60*5ef3166eSFrederic Barrat 		struct work_struct fault_work;
61*5ef3166eSFrederic Barrat 		u64 pe;
62*5ef3166eSFrederic Barrat 		u64 dsisr;
63*5ef3166eSFrederic Barrat 		u64 dar;
64*5ef3166eSFrederic Barrat 		struct pe_data pe_data;
65*5ef3166eSFrederic Barrat 	} xsl_fault;
66*5ef3166eSFrederic Barrat };
67*5ef3166eSFrederic Barrat 
68*5ef3166eSFrederic Barrat /*
69*5ef3166eSFrederic Barrat  * A opencapi link can be used be by several PCI functions. We have
70*5ef3166eSFrederic Barrat  * one link per device slot.
71*5ef3166eSFrederic Barrat  *
72*5ef3166eSFrederic Barrat  * A linked list of opencapi links should suffice, as there's a
73*5ef3166eSFrederic Barrat  * limited number of opencapi slots on a system and lookup is only
74*5ef3166eSFrederic Barrat  * done when the device is probed
75*5ef3166eSFrederic Barrat  */
76*5ef3166eSFrederic Barrat struct link {
77*5ef3166eSFrederic Barrat 	struct list_head list;
78*5ef3166eSFrederic Barrat 	struct kref ref;
79*5ef3166eSFrederic Barrat 	int domain;
80*5ef3166eSFrederic Barrat 	int bus;
81*5ef3166eSFrederic Barrat 	int dev;
82*5ef3166eSFrederic Barrat 	atomic_t irq_available;
83*5ef3166eSFrederic Barrat 	struct spa *spa;
84*5ef3166eSFrederic Barrat 	void *platform_data;
85*5ef3166eSFrederic Barrat };
86*5ef3166eSFrederic Barrat static struct list_head links_list = LIST_HEAD_INIT(links_list);
87*5ef3166eSFrederic Barrat static DEFINE_MUTEX(links_list_lock);
88*5ef3166eSFrederic Barrat 
89*5ef3166eSFrederic Barrat enum xsl_response {
90*5ef3166eSFrederic Barrat 	CONTINUE,
91*5ef3166eSFrederic Barrat 	ADDRESS_ERROR,
92*5ef3166eSFrederic Barrat 	RESTART,
93*5ef3166eSFrederic Barrat };
94*5ef3166eSFrederic Barrat 
95*5ef3166eSFrederic Barrat 
96*5ef3166eSFrederic Barrat static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe)
97*5ef3166eSFrederic Barrat {
98*5ef3166eSFrederic Barrat 	u64 reg;
99*5ef3166eSFrederic Barrat 
100*5ef3166eSFrederic Barrat 	*dsisr = in_be64(spa->reg_dsisr);
101*5ef3166eSFrederic Barrat 	*dar = in_be64(spa->reg_dar);
102*5ef3166eSFrederic Barrat 	reg = in_be64(spa->reg_pe_handle);
103*5ef3166eSFrederic Barrat 	*pe = reg & SPA_PE_MASK;
104*5ef3166eSFrederic Barrat }
105*5ef3166eSFrederic Barrat 
106*5ef3166eSFrederic Barrat static void ack_irq(struct spa *spa, enum xsl_response r)
107*5ef3166eSFrederic Barrat {
108*5ef3166eSFrederic Barrat 	u64 reg = 0;
109*5ef3166eSFrederic Barrat 
110*5ef3166eSFrederic Barrat 	/* continue is not supported */
111*5ef3166eSFrederic Barrat 	if (r == RESTART)
112*5ef3166eSFrederic Barrat 		reg = PPC_BIT(31);
113*5ef3166eSFrederic Barrat 	else if (r == ADDRESS_ERROR)
114*5ef3166eSFrederic Barrat 		reg = PPC_BIT(30);
115*5ef3166eSFrederic Barrat 	else
116*5ef3166eSFrederic Barrat 		WARN(1, "Invalid irq response %d\n", r);
117*5ef3166eSFrederic Barrat 
118*5ef3166eSFrederic Barrat 	if (reg)
119*5ef3166eSFrederic Barrat 		out_be64(spa->reg_tfc, reg);
120*5ef3166eSFrederic Barrat }
121*5ef3166eSFrederic Barrat 
122*5ef3166eSFrederic Barrat static void xsl_fault_handler_bh(struct work_struct *fault_work)
123*5ef3166eSFrederic Barrat {
124*5ef3166eSFrederic Barrat 	unsigned int flt = 0;
125*5ef3166eSFrederic Barrat 	unsigned long access, flags, inv_flags = 0;
126*5ef3166eSFrederic Barrat 	enum xsl_response r;
127*5ef3166eSFrederic Barrat 	struct xsl_fault *fault = container_of(fault_work, struct xsl_fault,
128*5ef3166eSFrederic Barrat 					fault_work);
129*5ef3166eSFrederic Barrat 	struct spa *spa = container_of(fault, struct spa, xsl_fault);
130*5ef3166eSFrederic Barrat 
131*5ef3166eSFrederic Barrat 	int rc;
132*5ef3166eSFrederic Barrat 
133*5ef3166eSFrederic Barrat 	/*
134*5ef3166eSFrederic Barrat 	 * We need to release a reference on the mm whenever exiting this
135*5ef3166eSFrederic Barrat 	 * function (taken in the memory fault interrupt handler)
136*5ef3166eSFrederic Barrat 	 */
137*5ef3166eSFrederic Barrat 	rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr,
138*5ef3166eSFrederic Barrat 				&flt);
139*5ef3166eSFrederic Barrat 	if (rc) {
140*5ef3166eSFrederic Barrat 		pr_debug("copro_handle_mm_fault failed: %d\n", rc);
141*5ef3166eSFrederic Barrat 		if (fault->pe_data.xsl_err_cb) {
142*5ef3166eSFrederic Barrat 			fault->pe_data.xsl_err_cb(
143*5ef3166eSFrederic Barrat 				fault->pe_data.xsl_err_data,
144*5ef3166eSFrederic Barrat 				fault->dar, fault->dsisr);
145*5ef3166eSFrederic Barrat 		}
146*5ef3166eSFrederic Barrat 		r = ADDRESS_ERROR;
147*5ef3166eSFrederic Barrat 		goto ack;
148*5ef3166eSFrederic Barrat 	}
149*5ef3166eSFrederic Barrat 
150*5ef3166eSFrederic Barrat 	if (!radix_enabled()) {
151*5ef3166eSFrederic Barrat 		/*
152*5ef3166eSFrederic Barrat 		 * update_mmu_cache() will not have loaded the hash
153*5ef3166eSFrederic Barrat 		 * since current->trap is not a 0x400 or 0x300, so
154*5ef3166eSFrederic Barrat 		 * just call hash_page_mm() here.
155*5ef3166eSFrederic Barrat 		 */
156*5ef3166eSFrederic Barrat 		access = _PAGE_PRESENT | _PAGE_READ;
157*5ef3166eSFrederic Barrat 		if (fault->dsisr & SPA_XSL_S)
158*5ef3166eSFrederic Barrat 			access |= _PAGE_WRITE;
159*5ef3166eSFrederic Barrat 
160*5ef3166eSFrederic Barrat 		if (REGION_ID(fault->dar) != USER_REGION_ID)
161*5ef3166eSFrederic Barrat 			access |= _PAGE_PRIVILEGED;
162*5ef3166eSFrederic Barrat 
163*5ef3166eSFrederic Barrat 		local_irq_save(flags);
164*5ef3166eSFrederic Barrat 		hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300,
165*5ef3166eSFrederic Barrat 			inv_flags);
166*5ef3166eSFrederic Barrat 		local_irq_restore(flags);
167*5ef3166eSFrederic Barrat 	}
168*5ef3166eSFrederic Barrat 	r = RESTART;
169*5ef3166eSFrederic Barrat ack:
170*5ef3166eSFrederic Barrat 	mmdrop(fault->pe_data.mm);
171*5ef3166eSFrederic Barrat 	ack_irq(spa, r);
172*5ef3166eSFrederic Barrat }
173*5ef3166eSFrederic Barrat 
174*5ef3166eSFrederic Barrat static irqreturn_t xsl_fault_handler(int irq, void *data)
175*5ef3166eSFrederic Barrat {
176*5ef3166eSFrederic Barrat 	struct link *link = (struct link *) data;
177*5ef3166eSFrederic Barrat 	struct spa *spa = link->spa;
178*5ef3166eSFrederic Barrat 	u64 dsisr, dar, pe_handle;
179*5ef3166eSFrederic Barrat 	struct pe_data *pe_data;
180*5ef3166eSFrederic Barrat 	struct ocxl_process_element *pe;
181*5ef3166eSFrederic Barrat 	int lpid, pid, tid;
182*5ef3166eSFrederic Barrat 
183*5ef3166eSFrederic Barrat 	read_irq(spa, &dsisr, &dar, &pe_handle);
184*5ef3166eSFrederic Barrat 
185*5ef3166eSFrederic Barrat 	WARN_ON(pe_handle > SPA_PE_MASK);
186*5ef3166eSFrederic Barrat 	pe = spa->spa_mem + pe_handle;
187*5ef3166eSFrederic Barrat 	lpid = be32_to_cpu(pe->lpid);
188*5ef3166eSFrederic Barrat 	pid = be32_to_cpu(pe->pid);
189*5ef3166eSFrederic Barrat 	tid = be32_to_cpu(pe->tid);
190*5ef3166eSFrederic Barrat 	/* We could be reading all null values here if the PE is being
191*5ef3166eSFrederic Barrat 	 * removed while an interrupt kicks in. It's not supposed to
192*5ef3166eSFrederic Barrat 	 * happen if the driver notified the AFU to terminate the
193*5ef3166eSFrederic Barrat 	 * PASID, and the AFU waited for pending operations before
194*5ef3166eSFrederic Barrat 	 * acknowledging. But even if it happens, we won't find a
195*5ef3166eSFrederic Barrat 	 * memory context below and fail silently, so it should be ok.
196*5ef3166eSFrederic Barrat 	 */
197*5ef3166eSFrederic Barrat 	if (!(dsisr & SPA_XSL_TF)) {
198*5ef3166eSFrederic Barrat 		WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr);
199*5ef3166eSFrederic Barrat 		ack_irq(spa, ADDRESS_ERROR);
200*5ef3166eSFrederic Barrat 		return IRQ_HANDLED;
201*5ef3166eSFrederic Barrat 	}
202*5ef3166eSFrederic Barrat 
203*5ef3166eSFrederic Barrat 	rcu_read_lock();
204*5ef3166eSFrederic Barrat 	pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle);
205*5ef3166eSFrederic Barrat 	if (!pe_data) {
206*5ef3166eSFrederic Barrat 		/*
207*5ef3166eSFrederic Barrat 		 * Could only happen if the driver didn't notify the
208*5ef3166eSFrederic Barrat 		 * AFU about PASID termination before removing the PE,
209*5ef3166eSFrederic Barrat 		 * or the AFU didn't wait for all memory access to
210*5ef3166eSFrederic Barrat 		 * have completed.
211*5ef3166eSFrederic Barrat 		 *
212*5ef3166eSFrederic Barrat 		 * Either way, we fail early, but we shouldn't log an
213*5ef3166eSFrederic Barrat 		 * error message, as it is a valid (if unexpected)
214*5ef3166eSFrederic Barrat 		 * scenario
215*5ef3166eSFrederic Barrat 		 */
216*5ef3166eSFrederic Barrat 		rcu_read_unlock();
217*5ef3166eSFrederic Barrat 		pr_debug("Unknown mm context for xsl interrupt\n");
218*5ef3166eSFrederic Barrat 		ack_irq(spa, ADDRESS_ERROR);
219*5ef3166eSFrederic Barrat 		return IRQ_HANDLED;
220*5ef3166eSFrederic Barrat 	}
221*5ef3166eSFrederic Barrat 	WARN_ON(pe_data->mm->context.id != pid);
222*5ef3166eSFrederic Barrat 
223*5ef3166eSFrederic Barrat 	spa->xsl_fault.pe = pe_handle;
224*5ef3166eSFrederic Barrat 	spa->xsl_fault.dar = dar;
225*5ef3166eSFrederic Barrat 	spa->xsl_fault.dsisr = dsisr;
226*5ef3166eSFrederic Barrat 	spa->xsl_fault.pe_data = *pe_data;
227*5ef3166eSFrederic Barrat 	mmgrab(pe_data->mm); /* mm count is released by bottom half */
228*5ef3166eSFrederic Barrat 
229*5ef3166eSFrederic Barrat 	rcu_read_unlock();
230*5ef3166eSFrederic Barrat 	schedule_work(&spa->xsl_fault.fault_work);
231*5ef3166eSFrederic Barrat 	return IRQ_HANDLED;
232*5ef3166eSFrederic Barrat }
233*5ef3166eSFrederic Barrat 
234*5ef3166eSFrederic Barrat static void unmap_irq_registers(struct spa *spa)
235*5ef3166eSFrederic Barrat {
236*5ef3166eSFrederic Barrat 	pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc,
237*5ef3166eSFrederic Barrat 				spa->reg_pe_handle);
238*5ef3166eSFrederic Barrat }
239*5ef3166eSFrederic Barrat 
240*5ef3166eSFrederic Barrat static int map_irq_registers(struct pci_dev *dev, struct spa *spa)
241*5ef3166eSFrederic Barrat {
242*5ef3166eSFrederic Barrat 	return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar,
243*5ef3166eSFrederic Barrat 				&spa->reg_tfc, &spa->reg_pe_handle);
244*5ef3166eSFrederic Barrat }
245*5ef3166eSFrederic Barrat 
246*5ef3166eSFrederic Barrat static int setup_xsl_irq(struct pci_dev *dev, struct link *link)
247*5ef3166eSFrederic Barrat {
248*5ef3166eSFrederic Barrat 	struct spa *spa = link->spa;
249*5ef3166eSFrederic Barrat 	int rc;
250*5ef3166eSFrederic Barrat 	int hwirq;
251*5ef3166eSFrederic Barrat 
252*5ef3166eSFrederic Barrat 	rc = pnv_ocxl_get_xsl_irq(dev, &hwirq);
253*5ef3166eSFrederic Barrat 	if (rc)
254*5ef3166eSFrederic Barrat 		return rc;
255*5ef3166eSFrederic Barrat 
256*5ef3166eSFrederic Barrat 	rc = map_irq_registers(dev, spa);
257*5ef3166eSFrederic Barrat 	if (rc)
258*5ef3166eSFrederic Barrat 		return rc;
259*5ef3166eSFrederic Barrat 
260*5ef3166eSFrederic Barrat 	spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x",
261*5ef3166eSFrederic Barrat 				link->domain, link->bus, link->dev);
262*5ef3166eSFrederic Barrat 	if (!spa->irq_name) {
263*5ef3166eSFrederic Barrat 		unmap_irq_registers(spa);
264*5ef3166eSFrederic Barrat 		dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n");
265*5ef3166eSFrederic Barrat 		return -ENOMEM;
266*5ef3166eSFrederic Barrat 	}
267*5ef3166eSFrederic Barrat 	/*
268*5ef3166eSFrederic Barrat 	 * At some point, we'll need to look into allowing a higher
269*5ef3166eSFrederic Barrat 	 * number of interrupts. Could we have an IRQ domain per link?
270*5ef3166eSFrederic Barrat 	 */
271*5ef3166eSFrederic Barrat 	spa->virq = irq_create_mapping(NULL, hwirq);
272*5ef3166eSFrederic Barrat 	if (!spa->virq) {
273*5ef3166eSFrederic Barrat 		kfree(spa->irq_name);
274*5ef3166eSFrederic Barrat 		unmap_irq_registers(spa);
275*5ef3166eSFrederic Barrat 		dev_err(&dev->dev,
276*5ef3166eSFrederic Barrat 			"irq_create_mapping failed for translation interrupt\n");
277*5ef3166eSFrederic Barrat 		return -EINVAL;
278*5ef3166eSFrederic Barrat 	}
279*5ef3166eSFrederic Barrat 
280*5ef3166eSFrederic Barrat 	dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq);
281*5ef3166eSFrederic Barrat 
282*5ef3166eSFrederic Barrat 	rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name,
283*5ef3166eSFrederic Barrat 			link);
284*5ef3166eSFrederic Barrat 	if (rc) {
285*5ef3166eSFrederic Barrat 		irq_dispose_mapping(spa->virq);
286*5ef3166eSFrederic Barrat 		kfree(spa->irq_name);
287*5ef3166eSFrederic Barrat 		unmap_irq_registers(spa);
288*5ef3166eSFrederic Barrat 		dev_err(&dev->dev,
289*5ef3166eSFrederic Barrat 			"request_irq failed for translation interrupt: %d\n",
290*5ef3166eSFrederic Barrat 			rc);
291*5ef3166eSFrederic Barrat 		return -EINVAL;
292*5ef3166eSFrederic Barrat 	}
293*5ef3166eSFrederic Barrat 	return 0;
294*5ef3166eSFrederic Barrat }
295*5ef3166eSFrederic Barrat 
296*5ef3166eSFrederic Barrat static void release_xsl_irq(struct link *link)
297*5ef3166eSFrederic Barrat {
298*5ef3166eSFrederic Barrat 	struct spa *spa = link->spa;
299*5ef3166eSFrederic Barrat 
300*5ef3166eSFrederic Barrat 	if (spa->virq) {
301*5ef3166eSFrederic Barrat 		free_irq(spa->virq, link);
302*5ef3166eSFrederic Barrat 		irq_dispose_mapping(spa->virq);
303*5ef3166eSFrederic Barrat 	}
304*5ef3166eSFrederic Barrat 	kfree(spa->irq_name);
305*5ef3166eSFrederic Barrat 	unmap_irq_registers(spa);
306*5ef3166eSFrederic Barrat }
307*5ef3166eSFrederic Barrat 
308*5ef3166eSFrederic Barrat static int alloc_spa(struct pci_dev *dev, struct link *link)
309*5ef3166eSFrederic Barrat {
310*5ef3166eSFrederic Barrat 	struct spa *spa;
311*5ef3166eSFrederic Barrat 
312*5ef3166eSFrederic Barrat 	spa = kzalloc(sizeof(struct spa), GFP_KERNEL);
313*5ef3166eSFrederic Barrat 	if (!spa)
314*5ef3166eSFrederic Barrat 		return -ENOMEM;
315*5ef3166eSFrederic Barrat 
316*5ef3166eSFrederic Barrat 	mutex_init(&spa->spa_lock);
317*5ef3166eSFrederic Barrat 	INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL);
318*5ef3166eSFrederic Barrat 	INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh);
319*5ef3166eSFrederic Barrat 
320*5ef3166eSFrederic Barrat 	spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT;
321*5ef3166eSFrederic Barrat 	spa->spa_mem = (struct ocxl_process_element *)
322*5ef3166eSFrederic Barrat 		__get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order);
323*5ef3166eSFrederic Barrat 	if (!spa->spa_mem) {
324*5ef3166eSFrederic Barrat 		dev_err(&dev->dev, "Can't allocate Shared Process Area\n");
325*5ef3166eSFrederic Barrat 		kfree(spa);
326*5ef3166eSFrederic Barrat 		return -ENOMEM;
327*5ef3166eSFrederic Barrat 	}
328*5ef3166eSFrederic Barrat 	pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus,
329*5ef3166eSFrederic Barrat 		link->dev, spa->spa_mem);
330*5ef3166eSFrederic Barrat 
331*5ef3166eSFrederic Barrat 	link->spa = spa;
332*5ef3166eSFrederic Barrat 	return 0;
333*5ef3166eSFrederic Barrat }
334*5ef3166eSFrederic Barrat 
335*5ef3166eSFrederic Barrat static void free_spa(struct link *link)
336*5ef3166eSFrederic Barrat {
337*5ef3166eSFrederic Barrat 	struct spa *spa = link->spa;
338*5ef3166eSFrederic Barrat 
339*5ef3166eSFrederic Barrat 	pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus,
340*5ef3166eSFrederic Barrat 		link->dev);
341*5ef3166eSFrederic Barrat 
342*5ef3166eSFrederic Barrat 	if (spa && spa->spa_mem) {
343*5ef3166eSFrederic Barrat 		free_pages((unsigned long) spa->spa_mem, spa->spa_order);
344*5ef3166eSFrederic Barrat 		kfree(spa);
345*5ef3166eSFrederic Barrat 		link->spa = NULL;
346*5ef3166eSFrederic Barrat 	}
347*5ef3166eSFrederic Barrat }
348*5ef3166eSFrederic Barrat 
349*5ef3166eSFrederic Barrat static int alloc_link(struct pci_dev *dev, int PE_mask, struct link **out_link)
350*5ef3166eSFrederic Barrat {
351*5ef3166eSFrederic Barrat 	struct link *link;
352*5ef3166eSFrederic Barrat 	int rc;
353*5ef3166eSFrederic Barrat 
354*5ef3166eSFrederic Barrat 	link = kzalloc(sizeof(struct link), GFP_KERNEL);
355*5ef3166eSFrederic Barrat 	if (!link)
356*5ef3166eSFrederic Barrat 		return -ENOMEM;
357*5ef3166eSFrederic Barrat 
358*5ef3166eSFrederic Barrat 	kref_init(&link->ref);
359*5ef3166eSFrederic Barrat 	link->domain = pci_domain_nr(dev->bus);
360*5ef3166eSFrederic Barrat 	link->bus = dev->bus->number;
361*5ef3166eSFrederic Barrat 	link->dev = PCI_SLOT(dev->devfn);
362*5ef3166eSFrederic Barrat 	atomic_set(&link->irq_available, MAX_IRQ_PER_LINK);
363*5ef3166eSFrederic Barrat 
364*5ef3166eSFrederic Barrat 	rc = alloc_spa(dev, link);
365*5ef3166eSFrederic Barrat 	if (rc)
366*5ef3166eSFrederic Barrat 		goto err_free;
367*5ef3166eSFrederic Barrat 
368*5ef3166eSFrederic Barrat 	rc = setup_xsl_irq(dev, link);
369*5ef3166eSFrederic Barrat 	if (rc)
370*5ef3166eSFrederic Barrat 		goto err_spa;
371*5ef3166eSFrederic Barrat 
372*5ef3166eSFrederic Barrat 	/* platform specific hook */
373*5ef3166eSFrederic Barrat 	rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask,
374*5ef3166eSFrederic Barrat 				&link->platform_data);
375*5ef3166eSFrederic Barrat 	if (rc)
376*5ef3166eSFrederic Barrat 		goto err_xsl_irq;
377*5ef3166eSFrederic Barrat 
378*5ef3166eSFrederic Barrat 	*out_link = link;
379*5ef3166eSFrederic Barrat 	return 0;
380*5ef3166eSFrederic Barrat 
381*5ef3166eSFrederic Barrat err_xsl_irq:
382*5ef3166eSFrederic Barrat 	release_xsl_irq(link);
383*5ef3166eSFrederic Barrat err_spa:
384*5ef3166eSFrederic Barrat 	free_spa(link);
385*5ef3166eSFrederic Barrat err_free:
386*5ef3166eSFrederic Barrat 	kfree(link);
387*5ef3166eSFrederic Barrat 	return rc;
388*5ef3166eSFrederic Barrat }
389*5ef3166eSFrederic Barrat 
390*5ef3166eSFrederic Barrat static void free_link(struct link *link)
391*5ef3166eSFrederic Barrat {
392*5ef3166eSFrederic Barrat 	release_xsl_irq(link);
393*5ef3166eSFrederic Barrat 	free_spa(link);
394*5ef3166eSFrederic Barrat 	kfree(link);
395*5ef3166eSFrederic Barrat }
396*5ef3166eSFrederic Barrat 
397*5ef3166eSFrederic Barrat int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle)
398*5ef3166eSFrederic Barrat {
399*5ef3166eSFrederic Barrat 	int rc = 0;
400*5ef3166eSFrederic Barrat 	struct link *link;
401*5ef3166eSFrederic Barrat 
402*5ef3166eSFrederic Barrat 	mutex_lock(&links_list_lock);
403*5ef3166eSFrederic Barrat 	list_for_each_entry(link, &links_list, list) {
404*5ef3166eSFrederic Barrat 		/* The functions of a device all share the same link */
405*5ef3166eSFrederic Barrat 		if (link->domain == pci_domain_nr(dev->bus) &&
406*5ef3166eSFrederic Barrat 			link->bus == dev->bus->number &&
407*5ef3166eSFrederic Barrat 			link->dev == PCI_SLOT(dev->devfn)) {
408*5ef3166eSFrederic Barrat 			kref_get(&link->ref);
409*5ef3166eSFrederic Barrat 			*link_handle = link;
410*5ef3166eSFrederic Barrat 			goto unlock;
411*5ef3166eSFrederic Barrat 		}
412*5ef3166eSFrederic Barrat 	}
413*5ef3166eSFrederic Barrat 	rc = alloc_link(dev, PE_mask, &link);
414*5ef3166eSFrederic Barrat 	if (rc)
415*5ef3166eSFrederic Barrat 		goto unlock;
416*5ef3166eSFrederic Barrat 
417*5ef3166eSFrederic Barrat 	list_add(&link->list, &links_list);
418*5ef3166eSFrederic Barrat 	*link_handle = link;
419*5ef3166eSFrederic Barrat unlock:
420*5ef3166eSFrederic Barrat 	mutex_unlock(&links_list_lock);
421*5ef3166eSFrederic Barrat 	return rc;
422*5ef3166eSFrederic Barrat }
423*5ef3166eSFrederic Barrat 
424*5ef3166eSFrederic Barrat static void release_xsl(struct kref *ref)
425*5ef3166eSFrederic Barrat {
426*5ef3166eSFrederic Barrat 	struct link *link = container_of(ref, struct link, ref);
427*5ef3166eSFrederic Barrat 
428*5ef3166eSFrederic Barrat 	list_del(&link->list);
429*5ef3166eSFrederic Barrat 	/* call platform code before releasing data */
430*5ef3166eSFrederic Barrat 	pnv_ocxl_spa_release(link->platform_data);
431*5ef3166eSFrederic Barrat 	free_link(link);
432*5ef3166eSFrederic Barrat }
433*5ef3166eSFrederic Barrat 
434*5ef3166eSFrederic Barrat void ocxl_link_release(struct pci_dev *dev, void *link_handle)
435*5ef3166eSFrederic Barrat {
436*5ef3166eSFrederic Barrat 	struct link *link = (struct link *) link_handle;
437*5ef3166eSFrederic Barrat 
438*5ef3166eSFrederic Barrat 	mutex_lock(&links_list_lock);
439*5ef3166eSFrederic Barrat 	kref_put(&link->ref, release_xsl);
440*5ef3166eSFrederic Barrat 	mutex_unlock(&links_list_lock);
441*5ef3166eSFrederic Barrat }
442*5ef3166eSFrederic Barrat 
443*5ef3166eSFrederic Barrat static u64 calculate_cfg_state(bool kernel)
444*5ef3166eSFrederic Barrat {
445*5ef3166eSFrederic Barrat 	u64 state;
446*5ef3166eSFrederic Barrat 
447*5ef3166eSFrederic Barrat 	state = SPA_CFG_DR;
448*5ef3166eSFrederic Barrat 	if (mfspr(SPRN_LPCR) & LPCR_TC)
449*5ef3166eSFrederic Barrat 		state |= SPA_CFG_TC;
450*5ef3166eSFrederic Barrat 	if (radix_enabled())
451*5ef3166eSFrederic Barrat 		state |= SPA_CFG_XLAT_ror;
452*5ef3166eSFrederic Barrat 	else
453*5ef3166eSFrederic Barrat 		state |= SPA_CFG_XLAT_hpt;
454*5ef3166eSFrederic Barrat 	state |= SPA_CFG_HV;
455*5ef3166eSFrederic Barrat 	if (kernel) {
456*5ef3166eSFrederic Barrat 		if (mfmsr() & MSR_SF)
457*5ef3166eSFrederic Barrat 			state |= SPA_CFG_SF;
458*5ef3166eSFrederic Barrat 	} else {
459*5ef3166eSFrederic Barrat 		state |= SPA_CFG_PR;
460*5ef3166eSFrederic Barrat 		if (!test_tsk_thread_flag(current, TIF_32BIT))
461*5ef3166eSFrederic Barrat 			state |= SPA_CFG_SF;
462*5ef3166eSFrederic Barrat 	}
463*5ef3166eSFrederic Barrat 	return state;
464*5ef3166eSFrederic Barrat }
465*5ef3166eSFrederic Barrat 
466*5ef3166eSFrederic Barrat int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
467*5ef3166eSFrederic Barrat 		u64 amr, struct mm_struct *mm,
468*5ef3166eSFrederic Barrat 		void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
469*5ef3166eSFrederic Barrat 		void *xsl_err_data)
470*5ef3166eSFrederic Barrat {
471*5ef3166eSFrederic Barrat 	struct link *link = (struct link *) link_handle;
472*5ef3166eSFrederic Barrat 	struct spa *spa = link->spa;
473*5ef3166eSFrederic Barrat 	struct ocxl_process_element *pe;
474*5ef3166eSFrederic Barrat 	int pe_handle, rc = 0;
475*5ef3166eSFrederic Barrat 	struct pe_data *pe_data;
476*5ef3166eSFrederic Barrat 
477*5ef3166eSFrederic Barrat 	BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128);
478*5ef3166eSFrederic Barrat 	if (pasid > SPA_PASID_MAX)
479*5ef3166eSFrederic Barrat 		return -EINVAL;
480*5ef3166eSFrederic Barrat 
481*5ef3166eSFrederic Barrat 	mutex_lock(&spa->spa_lock);
482*5ef3166eSFrederic Barrat 	pe_handle = pasid & SPA_PE_MASK;
483*5ef3166eSFrederic Barrat 	pe = spa->spa_mem + pe_handle;
484*5ef3166eSFrederic Barrat 
485*5ef3166eSFrederic Barrat 	if (pe->software_state) {
486*5ef3166eSFrederic Barrat 		rc = -EBUSY;
487*5ef3166eSFrederic Barrat 		goto unlock;
488*5ef3166eSFrederic Barrat 	}
489*5ef3166eSFrederic Barrat 
490*5ef3166eSFrederic Barrat 	pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL);
491*5ef3166eSFrederic Barrat 	if (!pe_data) {
492*5ef3166eSFrederic Barrat 		rc = -ENOMEM;
493*5ef3166eSFrederic Barrat 		goto unlock;
494*5ef3166eSFrederic Barrat 	}
495*5ef3166eSFrederic Barrat 
496*5ef3166eSFrederic Barrat 	pe_data->mm = mm;
497*5ef3166eSFrederic Barrat 	pe_data->xsl_err_cb = xsl_err_cb;
498*5ef3166eSFrederic Barrat 	pe_data->xsl_err_data = xsl_err_data;
499*5ef3166eSFrederic Barrat 
500*5ef3166eSFrederic Barrat 	memset(pe, 0, sizeof(struct ocxl_process_element));
501*5ef3166eSFrederic Barrat 	pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
502*5ef3166eSFrederic Barrat 	pe->lpid = cpu_to_be32(mfspr(SPRN_LPID));
503*5ef3166eSFrederic Barrat 	pe->pid = cpu_to_be32(pidr);
504*5ef3166eSFrederic Barrat 	pe->tid = cpu_to_be32(tidr);
505*5ef3166eSFrederic Barrat 	pe->amr = cpu_to_be64(amr);
506*5ef3166eSFrederic Barrat 	pe->software_state = cpu_to_be32(SPA_PE_VALID);
507*5ef3166eSFrederic Barrat 
508*5ef3166eSFrederic Barrat 	mm_context_add_copro(mm);
509*5ef3166eSFrederic Barrat 	/*
510*5ef3166eSFrederic Barrat 	 * Barrier is to make sure PE is visible in the SPA before it
511*5ef3166eSFrederic Barrat 	 * is used by the device. It also helps with the global TLBI
512*5ef3166eSFrederic Barrat 	 * invalidation
513*5ef3166eSFrederic Barrat 	 */
514*5ef3166eSFrederic Barrat 	mb();
515*5ef3166eSFrederic Barrat 	radix_tree_insert(&spa->pe_tree, pe_handle, pe_data);
516*5ef3166eSFrederic Barrat 
517*5ef3166eSFrederic Barrat 	/*
518*5ef3166eSFrederic Barrat 	 * The mm must stay valid for as long as the device uses it. We
519*5ef3166eSFrederic Barrat 	 * lower the count when the context is removed from the SPA.
520*5ef3166eSFrederic Barrat 	 *
521*5ef3166eSFrederic Barrat 	 * We grab mm_count (and not mm_users), as we don't want to
522*5ef3166eSFrederic Barrat 	 * end up in a circular dependency if a process mmaps its
523*5ef3166eSFrederic Barrat 	 * mmio, therefore incrementing the file ref count when
524*5ef3166eSFrederic Barrat 	 * calling mmap(), and forgets to unmap before exiting. In
525*5ef3166eSFrederic Barrat 	 * that scenario, when the kernel handles the death of the
526*5ef3166eSFrederic Barrat 	 * process, the file is not cleaned because unmap was not
527*5ef3166eSFrederic Barrat 	 * called, and the mm wouldn't be freed because we would still
528*5ef3166eSFrederic Barrat 	 * have a reference on mm_users. Incrementing mm_count solves
529*5ef3166eSFrederic Barrat 	 * the problem.
530*5ef3166eSFrederic Barrat 	 */
531*5ef3166eSFrederic Barrat 	mmgrab(mm);
532*5ef3166eSFrederic Barrat unlock:
533*5ef3166eSFrederic Barrat 	mutex_unlock(&spa->spa_lock);
534*5ef3166eSFrederic Barrat 	return rc;
535*5ef3166eSFrederic Barrat }
536*5ef3166eSFrederic Barrat 
537*5ef3166eSFrederic Barrat int ocxl_link_remove_pe(void *link_handle, int pasid)
538*5ef3166eSFrederic Barrat {
539*5ef3166eSFrederic Barrat 	struct link *link = (struct link *) link_handle;
540*5ef3166eSFrederic Barrat 	struct spa *spa = link->spa;
541*5ef3166eSFrederic Barrat 	struct ocxl_process_element *pe;
542*5ef3166eSFrederic Barrat 	struct pe_data *pe_data;
543*5ef3166eSFrederic Barrat 	int pe_handle, rc;
544*5ef3166eSFrederic Barrat 
545*5ef3166eSFrederic Barrat 	if (pasid > SPA_PASID_MAX)
546*5ef3166eSFrederic Barrat 		return -EINVAL;
547*5ef3166eSFrederic Barrat 
548*5ef3166eSFrederic Barrat 	/*
549*5ef3166eSFrederic Barrat 	 * About synchronization with our memory fault handler:
550*5ef3166eSFrederic Barrat 	 *
551*5ef3166eSFrederic Barrat 	 * Before removing the PE, the driver is supposed to have
552*5ef3166eSFrederic Barrat 	 * notified the AFU, which should have cleaned up and make
553*5ef3166eSFrederic Barrat 	 * sure the PASID is no longer in use, including pending
554*5ef3166eSFrederic Barrat 	 * interrupts. However, there's no way to be sure...
555*5ef3166eSFrederic Barrat 	 *
556*5ef3166eSFrederic Barrat 	 * We clear the PE and remove the context from our radix
557*5ef3166eSFrederic Barrat 	 * tree. From that point on, any new interrupt for that
558*5ef3166eSFrederic Barrat 	 * context will fail silently, which is ok. As mentioned
559*5ef3166eSFrederic Barrat 	 * above, that's not expected, but it could happen if the
560*5ef3166eSFrederic Barrat 	 * driver or AFU didn't do the right thing.
561*5ef3166eSFrederic Barrat 	 *
562*5ef3166eSFrederic Barrat 	 * There could still be a bottom half running, but we don't
563*5ef3166eSFrederic Barrat 	 * need to wait/flush, as it is managing a reference count on
564*5ef3166eSFrederic Barrat 	 * the mm it reads from the radix tree.
565*5ef3166eSFrederic Barrat 	 */
566*5ef3166eSFrederic Barrat 	pe_handle = pasid & SPA_PE_MASK;
567*5ef3166eSFrederic Barrat 	pe = spa->spa_mem + pe_handle;
568*5ef3166eSFrederic Barrat 
569*5ef3166eSFrederic Barrat 	mutex_lock(&spa->spa_lock);
570*5ef3166eSFrederic Barrat 
571*5ef3166eSFrederic Barrat 	if (!(be32_to_cpu(pe->software_state) & SPA_PE_VALID)) {
572*5ef3166eSFrederic Barrat 		rc = -EINVAL;
573*5ef3166eSFrederic Barrat 		goto unlock;
574*5ef3166eSFrederic Barrat 	}
575*5ef3166eSFrederic Barrat 
576*5ef3166eSFrederic Barrat 	memset(pe, 0, sizeof(struct ocxl_process_element));
577*5ef3166eSFrederic Barrat 	/*
578*5ef3166eSFrederic Barrat 	 * The barrier makes sure the PE is removed from the SPA
579*5ef3166eSFrederic Barrat 	 * before we clear the NPU context cache below, so that the
580*5ef3166eSFrederic Barrat 	 * old PE cannot be reloaded erroneously.
581*5ef3166eSFrederic Barrat 	 */
582*5ef3166eSFrederic Barrat 	mb();
583*5ef3166eSFrederic Barrat 
584*5ef3166eSFrederic Barrat 	/*
585*5ef3166eSFrederic Barrat 	 * hook to platform code
586*5ef3166eSFrederic Barrat 	 * On powerpc, the entry needs to be cleared from the context
587*5ef3166eSFrederic Barrat 	 * cache of the NPU.
588*5ef3166eSFrederic Barrat 	 */
589*5ef3166eSFrederic Barrat 	rc = pnv_ocxl_spa_remove_pe(link->platform_data, pe_handle);
590*5ef3166eSFrederic Barrat 	WARN_ON(rc);
591*5ef3166eSFrederic Barrat 
592*5ef3166eSFrederic Barrat 	pe_data = radix_tree_delete(&spa->pe_tree, pe_handle);
593*5ef3166eSFrederic Barrat 	if (!pe_data) {
594*5ef3166eSFrederic Barrat 		WARN(1, "Couldn't find pe data when removing PE\n");
595*5ef3166eSFrederic Barrat 	} else {
596*5ef3166eSFrederic Barrat 		mm_context_remove_copro(pe_data->mm);
597*5ef3166eSFrederic Barrat 		mmdrop(pe_data->mm);
598*5ef3166eSFrederic Barrat 		kfree_rcu(pe_data, rcu);
599*5ef3166eSFrederic Barrat 	}
600*5ef3166eSFrederic Barrat unlock:
601*5ef3166eSFrederic Barrat 	mutex_unlock(&spa->spa_lock);
602*5ef3166eSFrederic Barrat 	return rc;
603*5ef3166eSFrederic Barrat }
604