xref: /openbmc/linux/arch/powerpc/mm/book3s64/iommu_api.c (revision 4f727ecefefbd180de10e25b3e74c03dce3f1e75)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  IOMMU helpers in MMU context.
4  *
5  *  Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru>
6  */
7 
8 #include <linux/sched/signal.h>
9 #include <linux/slab.h>
10 #include <linux/rculist.h>
11 #include <linux/vmalloc.h>
12 #include <linux/mutex.h>
13 #include <linux/migrate.h>
14 #include <linux/hugetlb.h>
15 #include <linux/swap.h>
16 #include <linux/sizes.h>
17 #include <asm/mmu_context.h>
18 #include <asm/pte-walk.h>
19 #include <linux/mm_inline.h>
20 
21 static DEFINE_MUTEX(mem_list_mutex);
22 
23 #define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY	0x1
24 #define MM_IOMMU_TABLE_GROUP_PAGE_MASK	~(SZ_4K - 1)
25 
26 struct mm_iommu_table_group_mem_t {
27 	struct list_head next;
28 	struct rcu_head rcu;
29 	unsigned long used;
30 	atomic64_t mapped;
31 	unsigned int pageshift;
32 	u64 ua;			/* userspace address */
33 	u64 entries;		/* number of entries in hpas/hpages[] */
34 	/*
35 	 * in mm_iommu_get we temporarily use this to store
36 	 * struct page address.
37 	 *
38 	 * We need to convert ua to hpa in real mode. Make it
39 	 * simpler by storing physical address.
40 	 */
41 	union {
42 		struct page **hpages;	/* vmalloc'ed */
43 		phys_addr_t *hpas;
44 	};
45 #define MM_IOMMU_TABLE_INVALID_HPA	((uint64_t)-1)
46 	u64 dev_hpa;		/* Device memory base address */
47 };
48 
49 static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
50 		unsigned long npages, bool incr)
51 {
52 	long ret = 0, locked, lock_limit;
53 
54 	if (!npages)
55 		return 0;
56 
57 	down_write(&mm->mmap_sem);
58 
59 	if (incr) {
60 		locked = mm->locked_vm + npages;
61 		lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
62 		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
63 			ret = -ENOMEM;
64 		else
65 			mm->locked_vm += npages;
66 	} else {
67 		if (WARN_ON_ONCE(npages > mm->locked_vm))
68 			npages = mm->locked_vm;
69 		mm->locked_vm -= npages;
70 	}
71 
72 	pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n",
73 			current ? current->pid : 0,
74 			incr ? '+' : '-',
75 			npages << PAGE_SHIFT,
76 			mm->locked_vm << PAGE_SHIFT,
77 			rlimit(RLIMIT_MEMLOCK));
78 	up_write(&mm->mmap_sem);
79 
80 	return ret;
81 }
82 
83 bool mm_iommu_preregistered(struct mm_struct *mm)
84 {
85 	return !list_empty(&mm->context.iommu_group_mem_list);
86 }
87 EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
88 
89 static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
90 			      unsigned long entries, unsigned long dev_hpa,
91 			      struct mm_iommu_table_group_mem_t **pmem)
92 {
93 	struct mm_iommu_table_group_mem_t *mem, *mem2;
94 	long i, ret, locked_entries = 0, pinned = 0;
95 	unsigned int pageshift;
96 	unsigned long entry, chunk;
97 
98 	if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
99 		ret = mm_iommu_adjust_locked_vm(mm, entries, true);
100 		if (ret)
101 			return ret;
102 
103 		locked_entries = entries;
104 	}
105 
106 	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
107 	if (!mem) {
108 		ret = -ENOMEM;
109 		goto unlock_exit;
110 	}
111 
112 	if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
113 		mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
114 		mem->dev_hpa = dev_hpa;
115 		goto good_exit;
116 	}
117 	mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;
118 
119 	/*
120 	 * For a starting point for a maximum page size calculation
121 	 * we use @ua and @entries natural alignment to allow IOMMU pages
122 	 * smaller than huge pages but still bigger than PAGE_SIZE.
123 	 */
124 	mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT));
125 	mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
126 	if (!mem->hpas) {
127 		kfree(mem);
128 		ret = -ENOMEM;
129 		goto unlock_exit;
130 	}
131 
132 	down_read(&mm->mmap_sem);
133 	chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) /
134 			sizeof(struct vm_area_struct *);
135 	chunk = min(chunk, entries);
136 	for (entry = 0; entry < entries; entry += chunk) {
137 		unsigned long n = min(entries - entry, chunk);
138 
139 		ret = get_user_pages(ua + (entry << PAGE_SHIFT), n,
140 				FOLL_WRITE | FOLL_LONGTERM,
141 				mem->hpages + entry, NULL);
142 		if (ret == n) {
143 			pinned += n;
144 			continue;
145 		}
146 		if (ret > 0)
147 			pinned += ret;
148 		break;
149 	}
150 	up_read(&mm->mmap_sem);
151 	if (pinned != entries) {
152 		if (!ret)
153 			ret = -EFAULT;
154 		goto free_exit;
155 	}
156 
157 	pageshift = PAGE_SHIFT;
158 	for (i = 0; i < entries; ++i) {
159 		struct page *page = mem->hpages[i];
160 
161 		/*
162 		 * Allow to use larger than 64k IOMMU pages. Only do that
163 		 * if we are backed by hugetlb.
164 		 */
165 		if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) {
166 			struct page *head = compound_head(page);
167 
168 			pageshift = compound_order(head) + PAGE_SHIFT;
169 		}
170 		mem->pageshift = min(mem->pageshift, pageshift);
171 		/*
172 		 * We don't need struct page reference any more, switch
173 		 * to physical address.
174 		 */
175 		mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
176 	}
177 
178 good_exit:
179 	atomic64_set(&mem->mapped, 1);
180 	mem->used = 1;
181 	mem->ua = ua;
182 	mem->entries = entries;
183 
184 	mutex_lock(&mem_list_mutex);
185 
186 	list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) {
187 		/* Overlap? */
188 		if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) &&
189 				(ua < (mem2->ua +
190 				       (mem2->entries << PAGE_SHIFT)))) {
191 			ret = -EINVAL;
192 			mutex_unlock(&mem_list_mutex);
193 			goto free_exit;
194 		}
195 	}
196 
197 	list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
198 
199 	mutex_unlock(&mem_list_mutex);
200 
201 	*pmem = mem;
202 
203 	return 0;
204 
205 free_exit:
206 	/* free the reference taken */
207 	for (i = 0; i < pinned; i++)
208 		put_page(mem->hpages[i]);
209 
210 	vfree(mem->hpas);
211 	kfree(mem);
212 
213 unlock_exit:
214 	mm_iommu_adjust_locked_vm(mm, locked_entries, false);
215 
216 	return ret;
217 }
218 
219 long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
220 		struct mm_iommu_table_group_mem_t **pmem)
221 {
222 	return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
223 			pmem);
224 }
225 EXPORT_SYMBOL_GPL(mm_iommu_new);
226 
227 long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
228 		unsigned long entries, unsigned long dev_hpa,
229 		struct mm_iommu_table_group_mem_t **pmem)
230 {
231 	return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
232 }
233 EXPORT_SYMBOL_GPL(mm_iommu_newdev);
234 
235 static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
236 {
237 	long i;
238 	struct page *page = NULL;
239 
240 	if (!mem->hpas)
241 		return;
242 
243 	for (i = 0; i < mem->entries; ++i) {
244 		if (!mem->hpas[i])
245 			continue;
246 
247 		page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT);
248 		if (!page)
249 			continue;
250 
251 		if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
252 			SetPageDirty(page);
253 
254 		put_page(page);
255 		mem->hpas[i] = 0;
256 	}
257 }
258 
259 static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem)
260 {
261 
262 	mm_iommu_unpin(mem);
263 	vfree(mem->hpas);
264 	kfree(mem);
265 }
266 
267 static void mm_iommu_free(struct rcu_head *head)
268 {
269 	struct mm_iommu_table_group_mem_t *mem = container_of(head,
270 			struct mm_iommu_table_group_mem_t, rcu);
271 
272 	mm_iommu_do_free(mem);
273 }
274 
275 static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
276 {
277 	list_del_rcu(&mem->next);
278 	call_rcu(&mem->rcu, mm_iommu_free);
279 }
280 
281 long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
282 {
283 	long ret = 0;
284 	unsigned long unlock_entries = 0;
285 
286 	mutex_lock(&mem_list_mutex);
287 
288 	if (mem->used == 0) {
289 		ret = -ENOENT;
290 		goto unlock_exit;
291 	}
292 
293 	--mem->used;
294 	/* There are still users, exit */
295 	if (mem->used)
296 		goto unlock_exit;
297 
298 	/* Are there still mappings? */
299 	if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) {
300 		++mem->used;
301 		ret = -EBUSY;
302 		goto unlock_exit;
303 	}
304 
305 	if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
306 		unlock_entries = mem->entries;
307 
308 	/* @mapped became 0 so now mappings are disabled, release the region */
309 	mm_iommu_release(mem);
310 
311 unlock_exit:
312 	mutex_unlock(&mem_list_mutex);
313 
314 	mm_iommu_adjust_locked_vm(mm, unlock_entries, false);
315 
316 	return ret;
317 }
318 EXPORT_SYMBOL_GPL(mm_iommu_put);
319 
320 struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
321 		unsigned long ua, unsigned long size)
322 {
323 	struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
324 
325 	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
326 		if ((mem->ua <= ua) &&
327 				(ua + size <= mem->ua +
328 				 (mem->entries << PAGE_SHIFT))) {
329 			ret = mem;
330 			break;
331 		}
332 	}
333 
334 	return ret;
335 }
336 EXPORT_SYMBOL_GPL(mm_iommu_lookup);
337 
338 struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
339 		unsigned long ua, unsigned long size)
340 {
341 	struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
342 
343 	list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list,
344 			next) {
345 		if ((mem->ua <= ua) &&
346 				(ua + size <= mem->ua +
347 				 (mem->entries << PAGE_SHIFT))) {
348 			ret = mem;
349 			break;
350 		}
351 	}
352 
353 	return ret;
354 }
355 
356 struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
357 		unsigned long ua, unsigned long entries)
358 {
359 	struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
360 
361 	mutex_lock(&mem_list_mutex);
362 
363 	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
364 		if ((mem->ua == ua) && (mem->entries == entries)) {
365 			ret = mem;
366 			++mem->used;
367 			break;
368 		}
369 	}
370 
371 	mutex_unlock(&mem_list_mutex);
372 
373 	return ret;
374 }
375 EXPORT_SYMBOL_GPL(mm_iommu_get);
376 
377 long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
378 		unsigned long ua, unsigned int pageshift, unsigned long *hpa)
379 {
380 	const long entry = (ua - mem->ua) >> PAGE_SHIFT;
381 	u64 *va;
382 
383 	if (entry >= mem->entries)
384 		return -EFAULT;
385 
386 	if (pageshift > mem->pageshift)
387 		return -EFAULT;
388 
389 	if (!mem->hpas) {
390 		*hpa = mem->dev_hpa + (ua - mem->ua);
391 		return 0;
392 	}
393 
394 	va = &mem->hpas[entry];
395 	*hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
396 
397 	return 0;
398 }
399 EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
400 
401 long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
402 		unsigned long ua, unsigned int pageshift, unsigned long *hpa)
403 {
404 	const long entry = (ua - mem->ua) >> PAGE_SHIFT;
405 	unsigned long *pa;
406 
407 	if (entry >= mem->entries)
408 		return -EFAULT;
409 
410 	if (pageshift > mem->pageshift)
411 		return -EFAULT;
412 
413 	if (!mem->hpas) {
414 		*hpa = mem->dev_hpa + (ua - mem->ua);
415 		return 0;
416 	}
417 
418 	pa = (void *) vmalloc_to_phys(&mem->hpas[entry]);
419 	if (!pa)
420 		return -EFAULT;
421 
422 	*hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
423 
424 	return 0;
425 }
426 
427 extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
428 {
429 	struct mm_iommu_table_group_mem_t *mem;
430 	long entry;
431 	void *va;
432 	unsigned long *pa;
433 
434 	mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE);
435 	if (!mem)
436 		return;
437 
438 	if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA)
439 		return;
440 
441 	entry = (ua - mem->ua) >> PAGE_SHIFT;
442 	va = &mem->hpas[entry];
443 
444 	pa = (void *) vmalloc_to_phys(va);
445 	if (!pa)
446 		return;
447 
448 	*pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
449 }
450 
451 bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
452 		unsigned int pageshift, unsigned long *size)
453 {
454 	struct mm_iommu_table_group_mem_t *mem;
455 	unsigned long end;
456 
457 	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
458 		if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
459 			continue;
460 
461 		end = mem->dev_hpa + (mem->entries << PAGE_SHIFT);
462 		if ((mem->dev_hpa <= hpa) && (hpa < end)) {
463 			/*
464 			 * Since the IOMMU page size might be bigger than
465 			 * PAGE_SIZE, the amount of preregistered memory
466 			 * starting from @hpa might be smaller than 1<<pageshift
467 			 * and the caller needs to distinguish this situation.
468 			 */
469 			*size = min(1UL << pageshift, end - hpa);
470 			return true;
471 		}
472 	}
473 
474 	return false;
475 }
476 EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);
477 
478 long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
479 {
480 	if (atomic64_inc_not_zero(&mem->mapped))
481 		return 0;
482 
483 	/* Last mm_iommu_put() has been called, no more mappings allowed() */
484 	return -ENXIO;
485 }
486 EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc);
487 
488 void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem)
489 {
490 	atomic64_add_unless(&mem->mapped, -1, 1);
491 }
492 EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec);
493 
494 void mm_iommu_init(struct mm_struct *mm)
495 {
496 	INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list);
497 }
498