xref: /openbmc/linux/arch/powerpc/platforms/powernv/ocxl.c (revision 023e41632e065d49bcbe31b3c4b336217f96a271)
1 // SPDX-License-Identifier: GPL-2.0+
2 // Copyright 2017 IBM Corp.
3 #include <asm/pnv-ocxl.h>
4 #include <asm/opal.h>
5 #include <asm/xive.h>
6 #include <misc/ocxl-config.h>
7 #include "pci.h"
8 
9 #define PNV_OCXL_TL_P9_RECV_CAP		0x000000000000000Full
10 #define PNV_OCXL_ACTAG_MAX		64
11 /* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
12 #define PNV_OCXL_PASID_BITS		15
13 #define PNV_OCXL_PASID_MAX		((1 << PNV_OCXL_PASID_BITS) - 1)
14 
15 #define AFU_PRESENT (1 << 31)
16 #define AFU_INDEX_MASK 0x3F000000
17 #define AFU_INDEX_SHIFT 24
18 #define ACTAG_MASK 0xFFF
19 
20 
21 struct actag_range {
22 	u16 start;
23 	u16 count;
24 };
25 
26 struct npu_link {
27 	struct list_head list;
28 	int domain;
29 	int bus;
30 	int dev;
31 	u16 fn_desired_actags[8];
32 	struct actag_range fn_actags[8];
33 	bool assignment_done;
34 };
35 static struct list_head links_list = LIST_HEAD_INIT(links_list);
36 static DEFINE_MUTEX(links_list_lock);
37 
38 
39 /*
40  * opencapi actags handling:
41  *
42  * When sending commands, the opencapi device references the memory
43  * context it's targeting with an 'actag', which is really an alias
44  * for a (BDF, pasid) combination. When it receives a command, the NPU
45  * must do a lookup of the actag to identify the memory context. The
46  * hardware supports a finite number of actags per link (64 for
47  * POWER9).
48  *
49  * The device can carry multiple functions, and each function can have
50  * multiple AFUs. Each AFU advertises in its config space the number
51  * of desired actags. The host must configure in the config space of
52  * the AFU how many actags the AFU is really allowed to use (which can
53  * be less than what the AFU desires).
54  *
55  * When a PCI function is probed by the driver, it has no visibility
56  * about the other PCI functions and how many actags they'd like,
57  * which makes it impossible to distribute actags fairly among AFUs.
58  *
59  * Unfortunately, the only way to know how many actags a function
60  * desires is by looking at the data for each AFU in the config space
61  * and add them up. Similarly, the only way to know how many actags
62  * all the functions of the physical device desire is by adding the
63  * previously computed function counts. Then we can match that against
64  * what the hardware supports.
65  *
66  * To get a comprehensive view, we use a 'pci fixup': at the end of
67  * PCI enumeration, each function counts how many actags its AFUs
68  * desire and we save it in a 'npu_link' structure, shared between all
69  * the PCI functions of a same device. Therefore, when the first
70  * function is probed by the driver, we can get an idea of the total
71  * count of desired actags for the device, and assign the actags to
72  * the AFUs, by pro-rating if needed.
73  */
74 
75 static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)
76 {
77 	int vsec = pos;
78 	u16 vendor, id;
79 
80 	while ((vsec = pci_find_next_ext_capability(dev, vsec,
81 						    OCXL_EXT_CAP_ID_DVSEC))) {
82 		pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
83 				&vendor);
84 		pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
85 		if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
86 			return vsec;
87 	}
88 	return 0;
89 }
90 
91 static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
92 {
93 	int vsec = 0;
94 	u8 idx;
95 
96 	while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,
97 					   vsec))) {
98 		pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
99 				&idx);
100 		if (idx == afu_idx)
101 			return vsec;
102 	}
103 	return 0;
104 }
105 
106 static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)
107 {
108 	int pos;
109 	u32 val;
110 
111 	pos = find_dvsec_from_pos(dev, OCXL_DVSEC_FUNC_ID, 0);
112 	if (!pos)
113 		return -ESRCH;
114 
115 	pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
116 	if (val & AFU_PRESENT)
117 		*afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;
118 	else
119 		*afu_idx = -1;
120 	return 0;
121 }
122 
123 static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)
124 {
125 	int pos;
126 	u16 actag_sup;
127 
128 	pos = find_dvsec_afu_ctrl(dev, afu_idx);
129 	if (!pos)
130 		return -ESRCH;
131 
132 	pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,
133 			&actag_sup);
134 	*actag = actag_sup & ACTAG_MASK;
135 	return 0;
136 }
137 
138 static struct npu_link *find_link(struct pci_dev *dev)
139 {
140 	struct npu_link *link;
141 
142 	list_for_each_entry(link, &links_list, list) {
143 		/* The functions of a device all share the same link */
144 		if (link->domain == pci_domain_nr(dev->bus) &&
145 			link->bus == dev->bus->number &&
146 			link->dev == PCI_SLOT(dev->devfn)) {
147 			return link;
148 		}
149 	}
150 
151 	/* link doesn't exist yet. Allocate one */
152 	link = kzalloc(sizeof(struct npu_link), GFP_KERNEL);
153 	if (!link)
154 		return NULL;
155 	link->domain = pci_domain_nr(dev->bus);
156 	link->bus = dev->bus->number;
157 	link->dev = PCI_SLOT(dev->devfn);
158 	list_add(&link->list, &links_list);
159 	return link;
160 }
161 
162 static void pnv_ocxl_fixup_actag(struct pci_dev *dev)
163 {
164 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
165 	struct pnv_phb *phb = hose->private_data;
166 	struct npu_link *link;
167 	int rc, afu_idx = -1, i, actag;
168 
169 	if (!machine_is(powernv))
170 		return;
171 
172 	if (phb->type != PNV_PHB_NPU_OCAPI)
173 		return;
174 
175 	mutex_lock(&links_list_lock);
176 
177 	link = find_link(dev);
178 	if (!link) {
179 		dev_warn(&dev->dev, "couldn't update actag information\n");
180 		mutex_unlock(&links_list_lock);
181 		return;
182 	}
183 
184 	/*
185 	 * Check how many actags are desired for the AFUs under that
186 	 * function and add it to the count for the link
187 	 */
188 	rc = get_max_afu_index(dev, &afu_idx);
189 	if (rc) {
190 		/* Most likely an invalid config space */
191 		dev_dbg(&dev->dev, "couldn't find AFU information\n");
192 		afu_idx = -1;
193 	}
194 
195 	link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;
196 	for (i = 0; i <= afu_idx; i++) {
197 		/*
198 		 * AFU index 'holes' are allowed. So don't fail if we
199 		 * can't read the actag info for an index
200 		 */
201 		rc = get_actag_count(dev, i, &actag);
202 		if (rc)
203 			continue;
204 		link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;
205 	}
206 	dev_dbg(&dev->dev, "total actags for function: %d\n",
207 		link->fn_desired_actags[PCI_FUNC(dev->devfn)]);
208 
209 	mutex_unlock(&links_list_lock);
210 }
211 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);
212 
213 static u16 assign_fn_actags(u16 desired, u16 total)
214 {
215 	u16 count;
216 
217 	if (total <= PNV_OCXL_ACTAG_MAX)
218 		count = desired;
219 	else
220 		count = PNV_OCXL_ACTAG_MAX * desired / total;
221 
222 	return count;
223 }
224 
225 static void assign_actags(struct npu_link *link)
226 {
227 	u16 actag_count, range_start = 0, total_desired = 0;
228 	int i;
229 
230 	for (i = 0; i < 8; i++)
231 		total_desired += link->fn_desired_actags[i];
232 
233 	for (i = 0; i < 8; i++) {
234 		if (link->fn_desired_actags[i]) {
235 			actag_count = assign_fn_actags(
236 				link->fn_desired_actags[i],
237 				total_desired);
238 			link->fn_actags[i].start = range_start;
239 			link->fn_actags[i].count = actag_count;
240 			range_start += actag_count;
241 			WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);
242 		}
243 		pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
244 			link->domain, link->bus, link->dev, i,
245 			link->fn_actags[i].start, link->fn_actags[i].count,
246 			link->fn_desired_actags[i]);
247 	}
248 	link->assignment_done = true;
249 }
250 
251 int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
252 		u16 *supported)
253 {
254 	struct npu_link *link;
255 
256 	mutex_lock(&links_list_lock);
257 
258 	link = find_link(dev);
259 	if (!link) {
260 		dev_err(&dev->dev, "actag information not found\n");
261 		mutex_unlock(&links_list_lock);
262 		return -ENODEV;
263 	}
264 	/*
265 	 * On p9, we only have 64 actags per link, so they must be
266 	 * shared by all the functions of the same adapter. We counted
267 	 * the desired actag counts during PCI enumeration, so that we
268 	 * can allocate a pro-rated number of actags to each function.
269 	 */
270 	if (!link->assignment_done)
271 		assign_actags(link);
272 
273 	*base      = link->fn_actags[PCI_FUNC(dev->devfn)].start;
274 	*enabled   = link->fn_actags[PCI_FUNC(dev->devfn)].count;
275 	*supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];
276 
277 	mutex_unlock(&links_list_lock);
278 	return 0;
279 }
280 EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);
281 
282 int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
283 {
284 	struct npu_link *link;
285 	int i, rc = -EINVAL;
286 
287 	/*
288 	 * The number of PASIDs (process address space ID) which can
289 	 * be used by a function depends on how many functions exist
290 	 * on the device. The NPU needs to be configured to know how
291 	 * many bits are available to PASIDs and how many are to be
292 	 * used by the function BDF indentifier.
293 	 *
294 	 * We only support one AFU-carrying function for now.
295 	 */
296 	mutex_lock(&links_list_lock);
297 
298 	link = find_link(dev);
299 	if (!link) {
300 		dev_err(&dev->dev, "actag information not found\n");
301 		mutex_unlock(&links_list_lock);
302 		return -ENODEV;
303 	}
304 
305 	for (i = 0; i < 8; i++)
306 		if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {
307 			*count = PNV_OCXL_PASID_MAX;
308 			rc = 0;
309 			break;
310 		}
311 
312 	mutex_unlock(&links_list_lock);
313 	dev_dbg(&dev->dev, "%d PASIDs available for function\n",
314 		rc ? 0 : *count);
315 	return rc;
316 }
317 EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);
318 
319 static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
320 {
321 	int shift, idx;
322 
323 	WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);
324 	idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;
325 	shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));
326 	buf[idx] |= rate << shift;
327 }
328 
329 int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
330 			char *rate_buf, int rate_buf_size)
331 {
332 	if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
333 		return -EINVAL;
334 	/*
335 	 * The TL capabilities are a characteristic of the NPU, so
336 	 * we go with hard-coded values.
337 	 *
338 	 * The receiving rate of each template is encoded on 4 bits.
339 	 *
340 	 * On P9:
341 	 * - templates 0 -> 3 are supported
342 	 * - templates 0, 1 and 3 have a 0 receiving rate
343 	 * - template 2 has receiving rate of 1 (extra cycle)
344 	 */
345 	memset(rate_buf, 0, rate_buf_size);
346 	set_templ_rate(2, 1, rate_buf);
347 	*cap = PNV_OCXL_TL_P9_RECV_CAP;
348 	return 0;
349 }
350 EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
351 
352 int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
353 			uint64_t rate_buf_phys, int rate_buf_size)
354 {
355 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
356 	struct pnv_phb *phb = hose->private_data;
357 	int rc;
358 
359 	if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
360 		return -EINVAL;
361 
362 	rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,
363 			rate_buf_phys, rate_buf_size);
364 	if (rc) {
365 		dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);
366 		return -EINVAL;
367 	}
368 	return 0;
369 }
370 EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
371 
372 int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)
373 {
374 	int rc;
375 
376 	rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq);
377 	if (rc) {
378 		dev_err(&dev->dev,
379 			"Can't get translation interrupt for device\n");
380 		return rc;
381 	}
382 	return 0;
383 }
384 EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);
385 
386 void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
387 			void __iomem *tfc, void __iomem *pe_handle)
388 {
389 	iounmap(dsisr);
390 	iounmap(dar);
391 	iounmap(tfc);
392 	iounmap(pe_handle);
393 }
394 EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);
395 
396 int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
397 			void __iomem **dar, void __iomem **tfc,
398 			void __iomem **pe_handle)
399 {
400 	u64 reg;
401 	int i, j, rc = 0;
402 	void __iomem *regs[4];
403 
404 	/*
405 	 * opal stores the mmio addresses of the DSISR, DAR, TFC and
406 	 * PE_HANDLE registers in a device tree property, in that
407 	 * order
408 	 */
409 	for (i = 0; i < 4; i++) {
410 		rc = of_property_read_u64_index(dev->dev.of_node,
411 						"ibm,opal-xsl-mmio", i, &reg);
412 		if (rc)
413 			break;
414 		regs[i] = ioremap(reg, 8);
415 		if (!regs[i]) {
416 			rc = -EINVAL;
417 			break;
418 		}
419 	}
420 	if (rc) {
421 		dev_err(&dev->dev, "Can't map translation mmio registers\n");
422 		for (j = i - 1; j >= 0; j--)
423 			iounmap(regs[j]);
424 	} else {
425 		*dsisr = regs[0];
426 		*dar = regs[1];
427 		*tfc = regs[2];
428 		*pe_handle = regs[3];
429 	}
430 	return rc;
431 }
432 EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);
433 
434 struct spa_data {
435 	u64 phb_opal_id;
436 	u32 bdfn;
437 };
438 
439 int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
440 		void **platform_data)
441 {
442 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
443 	struct pnv_phb *phb = hose->private_data;
444 	struct spa_data *data;
445 	u32 bdfn;
446 	int rc;
447 
448 	data = kzalloc(sizeof(*data), GFP_KERNEL);
449 	if (!data)
450 		return -ENOMEM;
451 
452 	bdfn = (dev->bus->number << 8) | dev->devfn;
453 	rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
454 				PE_mask);
455 	if (rc) {
456 		dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);
457 		kfree(data);
458 		return rc;
459 	}
460 	data->phb_opal_id = phb->opal_id;
461 	data->bdfn = bdfn;
462 	*platform_data = (void *) data;
463 	return 0;
464 }
465 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);
466 
467 void pnv_ocxl_spa_release(void *platform_data)
468 {
469 	struct spa_data *data = (struct spa_data *) platform_data;
470 	int rc;
471 
472 	rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);
473 	WARN_ON(rc);
474 	kfree(data);
475 }
476 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
477 
478 int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
479 {
480 	struct spa_data *data = (struct spa_data *) platform_data;
481 	int rc;
482 
483 	rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
484 	return rc;
485 }
486 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
487 
488 int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr)
489 {
490 	__be64 flags, trigger_page;
491 	s64 rc;
492 	u32 hwirq;
493 
494 	hwirq = xive_native_alloc_irq();
495 	if (!hwirq)
496 		return -ENOENT;
497 
498 	rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL,
499 				NULL);
500 	if (rc || !trigger_page) {
501 		xive_native_free_irq(hwirq);
502 		return -ENOENT;
503 	}
504 	*irq = hwirq;
505 	*trigger_addr = be64_to_cpu(trigger_page);
506 	return 0;
507 
508 }
509 EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq);
510 
511 void pnv_ocxl_free_xive_irq(u32 irq)
512 {
513 	xive_native_free_irq(irq);
514 }
515 EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq);
516