1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * TCE helpers for IODA PCI/PCIe on PowerNV platforms
4  *
5  * Copyright 2018 IBM Corp.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version
10  * 2 of the License, or (at your option) any later version.
11  */
12 
13 #include <linux/kernel.h>
14 #include <linux/iommu.h>
15 
16 #include <asm/iommu.h>
17 #include <asm/tce.h>
18 #include "pci.h"
19 
20 unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
21 {
22 	struct pci_controller *hose = phb->hose;
23 	struct device_node *dn = hose->dn;
24 	unsigned long mask = 0;
25 	int i, rc, count;
26 	u32 val;
27 
28 	count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
29 	if (count <= 0) {
30 		mask = SZ_4K | SZ_64K;
31 		/* Add 16M for POWER8 by default */
32 		if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
33 				!cpu_has_feature(CPU_FTR_ARCH_300))
34 			mask |= SZ_16M | SZ_256M;
35 		return mask;
36 	}
37 
38 	for (i = 0; i < count; i++) {
39 		rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
40 						i, &val);
41 		if (rc == 0)
42 			mask |= 1ULL << val;
43 	}
44 
45 	return mask;
46 }
47 
48 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
49 		void *tce_mem, u64 tce_size,
50 		u64 dma_offset, unsigned int page_shift)
51 {
52 	tbl->it_blocksize = 16;
53 	tbl->it_base = (unsigned long)tce_mem;
54 	tbl->it_page_shift = page_shift;
55 	tbl->it_offset = dma_offset >> tbl->it_page_shift;
56 	tbl->it_index = 0;
57 	tbl->it_size = tce_size >> 3;
58 	tbl->it_busno = 0;
59 	tbl->it_type = TCE_PCI;
60 }
61 
62 static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
63 {
64 	struct page *tce_mem = NULL;
65 	__be64 *addr;
66 
67 	tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
68 			shift - PAGE_SHIFT);
69 	if (!tce_mem) {
70 		pr_err("Failed to allocate a TCE memory, level shift=%d\n",
71 				shift);
72 		return NULL;
73 	}
74 	addr = page_address(tce_mem);
75 	memset(addr, 0, 1UL << shift);
76 
77 	return addr;
78 }
79 
80 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
81 		unsigned long size, unsigned int levels);
82 
83 static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
84 {
85 	__be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
86 	int  level = tbl->it_indirect_levels;
87 	const long shift = ilog2(tbl->it_level_size);
88 	unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
89 
90 	while (level) {
91 		int n = (idx & mask) >> (level * shift);
92 		unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));
93 
94 		if (!tce) {
95 			__be64 *tmp2;
96 
97 			if (!alloc)
98 				return NULL;
99 
100 			tmp2 = pnv_alloc_tce_level(tbl->it_nid,
101 					ilog2(tbl->it_level_size) + 3);
102 			if (!tmp2)
103 				return NULL;
104 
105 			tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
106 			oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
107 					cpu_to_be64(tce)));
108 			if (oldtce) {
109 				pnv_pci_ioda2_table_do_free_pages(tmp2,
110 					ilog2(tbl->it_level_size) + 3, 1);
111 				tce = oldtce;
112 			}
113 		}
114 
115 		tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
116 		idx &= ~mask;
117 		mask >>= shift;
118 		--level;
119 	}
120 
121 	return tmp + idx;
122 }
123 
124 int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
125 		unsigned long uaddr, enum dma_data_direction direction,
126 		unsigned long attrs)
127 {
128 	u64 proto_tce = iommu_direction_to_tce_perm(direction);
129 	u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
130 	long i;
131 
132 	if (proto_tce & TCE_PCI_WRITE)
133 		proto_tce |= TCE_PCI_READ;
134 
135 	for (i = 0; i < npages; i++) {
136 		unsigned long newtce = proto_tce |
137 			((rpn + i) << tbl->it_page_shift);
138 		unsigned long idx = index - tbl->it_offset + i;
139 
140 		*(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
141 	}
142 
143 	return 0;
144 }
145 
146 #ifdef CONFIG_IOMMU_API
147 int pnv_tce_xchg(struct iommu_table *tbl, long index,
148 		unsigned long *hpa, enum dma_data_direction *direction,
149 		bool alloc)
150 {
151 	u64 proto_tce = iommu_direction_to_tce_perm(*direction);
152 	unsigned long newtce = *hpa | proto_tce, oldtce;
153 	unsigned long idx = index - tbl->it_offset;
154 	__be64 *ptce = NULL;
155 
156 	BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
157 
158 	if (*direction == DMA_NONE) {
159 		ptce = pnv_tce(tbl, false, idx, false);
160 		if (!ptce) {
161 			*hpa = 0;
162 			return 0;
163 		}
164 	}
165 
166 	if (!ptce) {
167 		ptce = pnv_tce(tbl, false, idx, alloc);
168 		if (!ptce)
169 			return alloc ? H_HARDWARE : H_TOO_HARD;
170 	}
171 
172 	if (newtce & TCE_PCI_WRITE)
173 		newtce |= TCE_PCI_READ;
174 
175 	oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
176 	*hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
177 	*direction = iommu_tce_direction(oldtce);
178 
179 	return 0;
180 }
181 
182 __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
183 {
184 	if (WARN_ON_ONCE(!tbl->it_userspace))
185 		return NULL;
186 
187 	return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
188 }
189 #endif
190 
191 void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
192 {
193 	long i;
194 
195 	for (i = 0; i < npages; i++) {
196 		unsigned long idx = index - tbl->it_offset + i;
197 		__be64 *ptce = pnv_tce(tbl, false, idx,	false);
198 
199 		if (ptce)
200 			*ptce = cpu_to_be64(0);
201 		else
202 			/* Skip the rest of the level */
203 			i |= tbl->it_level_size - 1;
204 	}
205 }
206 
207 unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
208 {
209 	__be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
210 
211 	if (!ptce)
212 		return 0;
213 
214 	return be64_to_cpu(*ptce);
215 }
216 
217 static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
218 		unsigned long size, unsigned int levels)
219 {
220 	const unsigned long addr_ul = (unsigned long) addr &
221 			~(TCE_PCI_READ | TCE_PCI_WRITE);
222 
223 	if (levels) {
224 		long i;
225 		u64 *tmp = (u64 *) addr_ul;
226 
227 		for (i = 0; i < size; ++i) {
228 			unsigned long hpa = be64_to_cpu(tmp[i]);
229 
230 			if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
231 				continue;
232 
233 			pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
234 					levels - 1);
235 		}
236 	}
237 
238 	free_pages(addr_ul, get_order(size << 3));
239 }
240 
241 void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
242 {
243 	const unsigned long size = tbl->it_indirect_levels ?
244 			tbl->it_level_size : tbl->it_size;
245 
246 	if (!tbl->it_size)
247 		return;
248 
249 	pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
250 			tbl->it_indirect_levels);
251 	if (tbl->it_userspace) {
252 		pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
253 				tbl->it_indirect_levels);
254 	}
255 }
256 
257 static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
258 		unsigned int levels, unsigned long limit,
259 		unsigned long *current_offset, unsigned long *total_allocated)
260 {
261 	__be64 *addr, *tmp;
262 	unsigned long allocated = 1UL << shift;
263 	unsigned int entries = 1UL << (shift - 3);
264 	long i;
265 
266 	addr = pnv_alloc_tce_level(nid, shift);
267 	*total_allocated += allocated;
268 
269 	--levels;
270 	if (!levels) {
271 		*current_offset += allocated;
272 		return addr;
273 	}
274 
275 	for (i = 0; i < entries; ++i) {
276 		tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
277 				levels, limit, current_offset, total_allocated);
278 		if (!tmp)
279 			break;
280 
281 		addr[i] = cpu_to_be64(__pa(tmp) |
282 				TCE_PCI_READ | TCE_PCI_WRITE);
283 
284 		if (*current_offset >= limit)
285 			break;
286 	}
287 
288 	return addr;
289 }
290 
291 long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
292 		__u32 page_shift, __u64 window_size, __u32 levels,
293 		bool alloc_userspace_copy, struct iommu_table *tbl)
294 {
295 	void *addr, *uas = NULL;
296 	unsigned long offset = 0, level_shift, total_allocated = 0;
297 	unsigned long total_allocated_uas = 0;
298 	const unsigned int window_shift = ilog2(window_size);
299 	unsigned int entries_shift = window_shift - page_shift;
300 	unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
301 			PAGE_SHIFT);
302 	const unsigned long tce_table_size = 1UL << table_shift;
303 
304 	if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
305 		return -EINVAL;
306 
307 	if (!is_power_of_2(window_size))
308 		return -EINVAL;
309 
310 	/* Adjust direct table size from window_size and levels */
311 	entries_shift = (entries_shift + levels - 1) / levels;
312 	level_shift = entries_shift + 3;
313 	level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
314 
315 	if ((level_shift - 3) * levels + page_shift >= 55)
316 		return -EINVAL;
317 
318 	/* Allocate TCE table */
319 	addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
320 			1, tce_table_size, &offset, &total_allocated);
321 
322 	/* addr==NULL means that the first level allocation failed */
323 	if (!addr)
324 		return -ENOMEM;
325 
326 	/*
327 	 * First level was allocated but some lower level failed as
328 	 * we did not allocate as much as we wanted,
329 	 * release partially allocated table.
330 	 */
331 	if (levels == 1 && offset < tce_table_size)
332 		goto free_tces_exit;
333 
334 	/* Allocate userspace view of the TCE table */
335 	if (alloc_userspace_copy) {
336 		offset = 0;
337 		uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
338 				1, tce_table_size, &offset,
339 				&total_allocated_uas);
340 		if (!uas)
341 			goto free_tces_exit;
342 		if (levels == 1 && (offset < tce_table_size ||
343 				total_allocated_uas != total_allocated))
344 			goto free_uas_exit;
345 	}
346 
347 	/* Setup linux iommu table */
348 	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
349 			page_shift);
350 	tbl->it_level_size = 1ULL << (level_shift - 3);
351 	tbl->it_indirect_levels = levels - 1;
352 	tbl->it_userspace = uas;
353 	tbl->it_nid = nid;
354 
355 	pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
356 			window_size, tce_table_size, bus_offset, tbl->it_base,
357 			tbl->it_userspace, 1, levels);
358 
359 	return 0;
360 
361 free_uas_exit:
362 	pnv_pci_ioda2_table_do_free_pages(uas,
363 			1ULL << (level_shift - 3), levels - 1);
364 free_tces_exit:
365 	pnv_pci_ioda2_table_do_free_pages(addr,
366 			1ULL << (level_shift - 3), levels - 1);
367 
368 	return -ENOMEM;
369 }
370 
371 void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
372 		struct iommu_table_group *table_group)
373 {
374 	long i;
375 	bool found;
376 	struct iommu_table_group_link *tgl;
377 
378 	if (!tbl || !table_group)
379 		return;
380 
381 	/* Remove link to a group from table's list of attached groups */
382 	found = false;
383 	list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
384 		if (tgl->table_group == table_group) {
385 			list_del_rcu(&tgl->next);
386 			kfree_rcu(tgl, rcu);
387 			found = true;
388 			break;
389 		}
390 	}
391 	if (WARN_ON(!found))
392 		return;
393 
394 	/* Clean a pointer to iommu_table in iommu_table_group::tables[] */
395 	found = false;
396 	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
397 		if (table_group->tables[i] == tbl) {
398 			iommu_tce_table_put(tbl);
399 			table_group->tables[i] = NULL;
400 			found = true;
401 			break;
402 		}
403 	}
404 	WARN_ON(!found);
405 }
406 
407 long pnv_pci_link_table_and_group(int node, int num,
408 		struct iommu_table *tbl,
409 		struct iommu_table_group *table_group)
410 {
411 	struct iommu_table_group_link *tgl = NULL;
412 
413 	if (WARN_ON(!tbl || !table_group))
414 		return -EINVAL;
415 
416 	tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
417 			node);
418 	if (!tgl)
419 		return -ENOMEM;
420 
421 	tgl->table_group = table_group;
422 	list_add_rcu(&tgl->next, &tbl->it_group_list);
423 
424 	table_group->tables[num] = iommu_tce_table_get(tbl);
425 
426 	return 0;
427 }
428