xref: /openbmc/linux/arch/powerpc/kvm/book3s_64_vio.c (revision 457c8996)
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License, version 2, as
4  * published by the Free Software Foundation.
5  *
6  * This program is distributed in the hope that it will be useful,
7  * but WITHOUT ANY WARRANTY; without even the implied warranty of
8  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9  * GNU General Public License for more details.
10  *
11  * You should have received a copy of the GNU General Public License
12  * along with this program; if not, write to the Free Software
13  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
14  *
15  * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
16  * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
17  * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
18  */
19 
20 #include <linux/types.h>
21 #include <linux/string.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/highmem.h>
25 #include <linux/gfp.h>
26 #include <linux/slab.h>
27 #include <linux/sched/signal.h>
28 #include <linux/hugetlb.h>
29 #include <linux/list.h>
30 #include <linux/anon_inodes.h>
31 #include <linux/iommu.h>
32 #include <linux/file.h>
33 
34 #include <asm/kvm_ppc.h>
35 #include <asm/kvm_book3s.h>
36 #include <asm/book3s/64/mmu-hash.h>
37 #include <asm/hvcall.h>
38 #include <asm/synch.h>
39 #include <asm/ppc-opcode.h>
40 #include <asm/kvm_host.h>
41 #include <asm/udbg.h>
42 #include <asm/iommu.h>
43 #include <asm/tce.h>
44 #include <asm/mmu_context.h>
45 
46 static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
47 {
48 	return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
49 }
50 
51 static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
52 {
53 	unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) +
54 			(tce_pages * sizeof(struct page *));
55 
56 	return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
57 }
58 
59 static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
60 {
61 	long ret = 0;
62 
63 	if (!current || !current->mm)
64 		return ret; /* process exited */
65 
66 	down_write(&current->mm->mmap_sem);
67 
68 	if (inc) {
69 		unsigned long locked, lock_limit;
70 
71 		locked = current->mm->locked_vm + stt_pages;
72 		lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
73 		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
74 			ret = -ENOMEM;
75 		else
76 			current->mm->locked_vm += stt_pages;
77 	} else {
78 		if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
79 			stt_pages = current->mm->locked_vm;
80 
81 		current->mm->locked_vm -= stt_pages;
82 	}
83 
84 	pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid,
85 			inc ? '+' : '-',
86 			stt_pages << PAGE_SHIFT,
87 			current->mm->locked_vm << PAGE_SHIFT,
88 			rlimit(RLIMIT_MEMLOCK),
89 			ret ? " - exceeded" : "");
90 
91 	up_write(&current->mm->mmap_sem);
92 
93 	return ret;
94 }
95 
96 static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head)
97 {
98 	struct kvmppc_spapr_tce_iommu_table *stit = container_of(head,
99 			struct kvmppc_spapr_tce_iommu_table, rcu);
100 
101 	iommu_tce_table_put(stit->tbl);
102 
103 	kfree(stit);
104 }
105 
106 static void kvm_spapr_tce_liobn_put(struct kref *kref)
107 {
108 	struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref,
109 			struct kvmppc_spapr_tce_iommu_table, kref);
110 
111 	list_del_rcu(&stit->next);
112 
113 	call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free);
114 }
115 
116 extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
117 		struct iommu_group *grp)
118 {
119 	int i;
120 	struct kvmppc_spapr_tce_table *stt;
121 	struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
122 	struct iommu_table_group *table_group = NULL;
123 
124 	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
125 
126 		table_group = iommu_group_get_iommudata(grp);
127 		if (WARN_ON(!table_group))
128 			continue;
129 
130 		list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
131 			for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
132 				if (table_group->tables[i] != stit->tbl)
133 					continue;
134 
135 				kref_put(&stit->kref, kvm_spapr_tce_liobn_put);
136 			}
137 		}
138 	}
139 }
140 
141 extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
142 		struct iommu_group *grp)
143 {
144 	struct kvmppc_spapr_tce_table *stt = NULL;
145 	bool found = false;
146 	struct iommu_table *tbl = NULL;
147 	struct iommu_table_group *table_group;
148 	long i;
149 	struct kvmppc_spapr_tce_iommu_table *stit;
150 	struct fd f;
151 
152 	f = fdget(tablefd);
153 	if (!f.file)
154 		return -EBADF;
155 
156 	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
157 		if (stt == f.file->private_data) {
158 			found = true;
159 			break;
160 		}
161 	}
162 
163 	fdput(f);
164 
165 	if (!found)
166 		return -EINVAL;
167 
168 	table_group = iommu_group_get_iommudata(grp);
169 	if (WARN_ON(!table_group))
170 		return -EFAULT;
171 
172 	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
173 		struct iommu_table *tbltmp = table_group->tables[i];
174 
175 		if (!tbltmp)
176 			continue;
177 		/* Make sure hardware table parameters are compatible */
178 		if ((tbltmp->it_page_shift <= stt->page_shift) &&
179 				(tbltmp->it_offset << tbltmp->it_page_shift ==
180 				 stt->offset << stt->page_shift) &&
181 				(tbltmp->it_size << tbltmp->it_page_shift >=
182 				 stt->size << stt->page_shift)) {
183 			/*
184 			 * Reference the table to avoid races with
185 			 * add/remove DMA windows.
186 			 */
187 			tbl = iommu_tce_table_get(tbltmp);
188 			break;
189 		}
190 	}
191 	if (!tbl)
192 		return -EINVAL;
193 
194 	list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
195 		if (tbl != stit->tbl)
196 			continue;
197 
198 		if (!kref_get_unless_zero(&stit->kref)) {
199 			/* stit is being destroyed */
200 			iommu_tce_table_put(tbl);
201 			return -ENOTTY;
202 		}
203 		/*
204 		 * The table is already known to this KVM, we just increased
205 		 * its KVM reference counter and can return.
206 		 */
207 		return 0;
208 	}
209 
210 	stit = kzalloc(sizeof(*stit), GFP_KERNEL);
211 	if (!stit) {
212 		iommu_tce_table_put(tbl);
213 		return -ENOMEM;
214 	}
215 
216 	stit->tbl = tbl;
217 	kref_init(&stit->kref);
218 
219 	list_add_rcu(&stit->next, &stt->iommu_tables);
220 
221 	return 0;
222 }
223 
224 static void release_spapr_tce_table(struct rcu_head *head)
225 {
226 	struct kvmppc_spapr_tce_table *stt = container_of(head,
227 			struct kvmppc_spapr_tce_table, rcu);
228 	unsigned long i, npages = kvmppc_tce_pages(stt->size);
229 
230 	for (i = 0; i < npages; i++)
231 		if (stt->pages[i])
232 			__free_page(stt->pages[i]);
233 
234 	kfree(stt);
235 }
236 
237 static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
238 		unsigned long sttpage)
239 {
240 	struct page *page = stt->pages[sttpage];
241 
242 	if (page)
243 		return page;
244 
245 	mutex_lock(&stt->alloc_lock);
246 	page = stt->pages[sttpage];
247 	if (!page) {
248 		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
249 		WARN_ON_ONCE(!page);
250 		if (page)
251 			stt->pages[sttpage] = page;
252 	}
253 	mutex_unlock(&stt->alloc_lock);
254 
255 	return page;
256 }
257 
258 static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
259 {
260 	struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
261 	struct page *page;
262 
263 	if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
264 		return VM_FAULT_SIGBUS;
265 
266 	page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
267 	if (!page)
268 		return VM_FAULT_OOM;
269 
270 	get_page(page);
271 	vmf->page = page;
272 	return 0;
273 }
274 
275 static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
276 	.fault = kvm_spapr_tce_fault,
277 };
278 
279 static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
280 {
281 	vma->vm_ops = &kvm_spapr_tce_vm_ops;
282 	return 0;
283 }
284 
285 static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
286 {
287 	struct kvmppc_spapr_tce_table *stt = filp->private_data;
288 	struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
289 	struct kvm *kvm = stt->kvm;
290 
291 	mutex_lock(&kvm->lock);
292 	list_del_rcu(&stt->list);
293 	mutex_unlock(&kvm->lock);
294 
295 	list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
296 		WARN_ON(!kref_read(&stit->kref));
297 		while (1) {
298 			if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put))
299 				break;
300 		}
301 	}
302 
303 	kvm_put_kvm(stt->kvm);
304 
305 	kvmppc_account_memlimit(
306 		kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
307 	call_rcu(&stt->rcu, release_spapr_tce_table);
308 
309 	return 0;
310 }
311 
312 static const struct file_operations kvm_spapr_tce_fops = {
313 	.mmap           = kvm_spapr_tce_mmap,
314 	.release	= kvm_spapr_tce_release,
315 };
316 
317 long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
318 				   struct kvm_create_spapr_tce_64 *args)
319 {
320 	struct kvmppc_spapr_tce_table *stt = NULL;
321 	struct kvmppc_spapr_tce_table *siter;
322 	unsigned long npages, size = args->size;
323 	int ret = -ENOMEM;
324 
325 	if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
326 		(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
327 		return -EINVAL;
328 
329 	npages = kvmppc_tce_pages(size);
330 	ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
331 	if (ret)
332 		return ret;
333 
334 	ret = -ENOMEM;
335 	stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
336 		      GFP_KERNEL);
337 	if (!stt)
338 		goto fail_acct;
339 
340 	stt->liobn = args->liobn;
341 	stt->page_shift = args->page_shift;
342 	stt->offset = args->offset;
343 	stt->size = size;
344 	stt->kvm = kvm;
345 	mutex_init(&stt->alloc_lock);
346 	INIT_LIST_HEAD_RCU(&stt->iommu_tables);
347 
348 	mutex_lock(&kvm->lock);
349 
350 	/* Check this LIOBN hasn't been previously allocated */
351 	ret = 0;
352 	list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
353 		if (siter->liobn == args->liobn) {
354 			ret = -EBUSY;
355 			break;
356 		}
357 	}
358 
359 	kvm_get_kvm(kvm);
360 	if (!ret)
361 		ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
362 				       stt, O_RDWR | O_CLOEXEC);
363 
364 	if (ret >= 0)
365 		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
366 	else
367 		kvm_put_kvm(kvm);
368 
369 	mutex_unlock(&kvm->lock);
370 
371 	if (ret >= 0)
372 		return ret;
373 
374 	kfree(stt);
375  fail_acct:
376 	kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
377 	return ret;
378 }
379 
380 static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
381 		unsigned long *ua)
382 {
383 	unsigned long gfn = tce >> PAGE_SHIFT;
384 	struct kvm_memory_slot *memslot;
385 
386 	memslot = search_memslots(kvm_memslots(kvm), gfn);
387 	if (!memslot)
388 		return -EINVAL;
389 
390 	*ua = __gfn_to_hva_memslot(memslot, gfn) |
391 		(tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
392 
393 	return 0;
394 }
395 
396 static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
397 		unsigned long tce)
398 {
399 	unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
400 	enum dma_data_direction dir = iommu_tce_direction(tce);
401 	struct kvmppc_spapr_tce_iommu_table *stit;
402 	unsigned long ua = 0;
403 
404 	/* Allow userspace to poison TCE table */
405 	if (dir == DMA_NONE)
406 		return H_SUCCESS;
407 
408 	if (iommu_tce_check_gpa(stt->page_shift, gpa))
409 		return H_TOO_HARD;
410 
411 	if (kvmppc_tce_to_ua(stt->kvm, tce, &ua))
412 		return H_TOO_HARD;
413 
414 	list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
415 		unsigned long hpa = 0;
416 		struct mm_iommu_table_group_mem_t *mem;
417 		long shift = stit->tbl->it_page_shift;
418 
419 		mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift);
420 		if (!mem)
421 			return H_TOO_HARD;
422 
423 		if (mm_iommu_ua_to_hpa(mem, ua, shift, &hpa))
424 			return H_TOO_HARD;
425 	}
426 
427 	return H_SUCCESS;
428 }
429 
430 /*
431  * Handles TCE requests for emulated devices.
432  * Puts guest TCE values to the table and expects user space to convert them.
433  * Cannot fail so kvmppc_tce_validate must be called before it.
434  */
435 static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
436 		unsigned long idx, unsigned long tce)
437 {
438 	struct page *page;
439 	u64 *tbl;
440 	unsigned long sttpage;
441 
442 	idx -= stt->offset;
443 	sttpage = idx / TCES_PER_PAGE;
444 	page = stt->pages[sttpage];
445 
446 	if (!page) {
447 		/* We allow any TCE, not just with read|write permissions */
448 		if (!tce)
449 			return;
450 
451 		page = kvm_spapr_get_tce_page(stt, sttpage);
452 		if (!page)
453 			return;
454 	}
455 	tbl = page_to_virt(page);
456 
457 	tbl[idx % TCES_PER_PAGE] = tce;
458 }
459 
460 static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
461 		unsigned long entry)
462 {
463 	unsigned long hpa = 0;
464 	enum dma_data_direction dir = DMA_NONE;
465 
466 	iommu_tce_xchg(mm, tbl, entry, &hpa, &dir);
467 }
468 
469 static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
470 		struct iommu_table *tbl, unsigned long entry)
471 {
472 	struct mm_iommu_table_group_mem_t *mem = NULL;
473 	const unsigned long pgsize = 1ULL << tbl->it_page_shift;
474 	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
475 
476 	if (!pua)
477 		return H_SUCCESS;
478 
479 	mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize);
480 	if (!mem)
481 		return H_TOO_HARD;
482 
483 	mm_iommu_mapped_dec(mem);
484 
485 	*pua = cpu_to_be64(0);
486 
487 	return H_SUCCESS;
488 }
489 
490 static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
491 		struct iommu_table *tbl, unsigned long entry)
492 {
493 	enum dma_data_direction dir = DMA_NONE;
494 	unsigned long hpa = 0;
495 	long ret;
496 
497 	if (WARN_ON_ONCE(iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir)))
498 		return H_TOO_HARD;
499 
500 	if (dir == DMA_NONE)
501 		return H_SUCCESS;
502 
503 	ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
504 	if (ret != H_SUCCESS)
505 		iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
506 
507 	return ret;
508 }
509 
510 static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
511 		struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
512 		unsigned long entry)
513 {
514 	unsigned long i, ret = H_SUCCESS;
515 	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
516 	unsigned long io_entry = entry * subpages;
517 
518 	for (i = 0; i < subpages; ++i) {
519 		ret = kvmppc_tce_iommu_do_unmap(kvm, tbl, io_entry + i);
520 		if (ret != H_SUCCESS)
521 			break;
522 	}
523 
524 	return ret;
525 }
526 
527 long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
528 		unsigned long entry, unsigned long ua,
529 		enum dma_data_direction dir)
530 {
531 	long ret;
532 	unsigned long hpa;
533 	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
534 	struct mm_iommu_table_group_mem_t *mem;
535 
536 	if (!pua)
537 		/* it_userspace allocation might be delayed */
538 		return H_TOO_HARD;
539 
540 	mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift);
541 	if (!mem)
542 		/* This only handles v2 IOMMU type, v1 is handled via ioctl() */
543 		return H_TOO_HARD;
544 
545 	if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa)))
546 		return H_TOO_HARD;
547 
548 	if (mm_iommu_mapped_inc(mem))
549 		return H_TOO_HARD;
550 
551 	ret = iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
552 	if (WARN_ON_ONCE(ret)) {
553 		mm_iommu_mapped_dec(mem);
554 		return H_TOO_HARD;
555 	}
556 
557 	if (dir != DMA_NONE)
558 		kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
559 
560 	*pua = cpu_to_be64(ua);
561 
562 	return 0;
563 }
564 
565 static long kvmppc_tce_iommu_map(struct kvm *kvm,
566 		struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
567 		unsigned long entry, unsigned long ua,
568 		enum dma_data_direction dir)
569 {
570 	unsigned long i, pgoff, ret = H_SUCCESS;
571 	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
572 	unsigned long io_entry = entry * subpages;
573 
574 	for (i = 0, pgoff = 0; i < subpages;
575 			++i, pgoff += IOMMU_PAGE_SIZE(tbl)) {
576 
577 		ret = kvmppc_tce_iommu_do_map(kvm, tbl,
578 				io_entry + i, ua + pgoff, dir);
579 		if (ret != H_SUCCESS)
580 			break;
581 	}
582 
583 	return ret;
584 }
585 
586 long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
587 		      unsigned long ioba, unsigned long tce)
588 {
589 	struct kvmppc_spapr_tce_table *stt;
590 	long ret, idx;
591 	struct kvmppc_spapr_tce_iommu_table *stit;
592 	unsigned long entry, ua = 0;
593 	enum dma_data_direction dir;
594 
595 	/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
596 	/* 	    liobn, ioba, tce); */
597 
598 	stt = kvmppc_find_table(vcpu->kvm, liobn);
599 	if (!stt)
600 		return H_TOO_HARD;
601 
602 	ret = kvmppc_ioba_validate(stt, ioba, 1);
603 	if (ret != H_SUCCESS)
604 		return ret;
605 
606 	idx = srcu_read_lock(&vcpu->kvm->srcu);
607 
608 	ret = kvmppc_tce_validate(stt, tce);
609 	if (ret != H_SUCCESS)
610 		goto unlock_exit;
611 
612 	dir = iommu_tce_direction(tce);
613 
614 	if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
615 		ret = H_PARAMETER;
616 		goto unlock_exit;
617 	}
618 
619 	entry = ioba >> stt->page_shift;
620 
621 	list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
622 		if (dir == DMA_NONE)
623 			ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
624 					stit->tbl, entry);
625 		else
626 			ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
627 					entry, ua, dir);
628 
629 		if (ret != H_SUCCESS) {
630 			kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
631 			goto unlock_exit;
632 		}
633 	}
634 
635 	kvmppc_tce_put(stt, entry, tce);
636 
637 unlock_exit:
638 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
639 
640 	return ret;
641 }
642 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
643 
644 long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
645 		unsigned long liobn, unsigned long ioba,
646 		unsigned long tce_list, unsigned long npages)
647 {
648 	struct kvmppc_spapr_tce_table *stt;
649 	long i, ret = H_SUCCESS, idx;
650 	unsigned long entry, ua = 0;
651 	u64 __user *tces;
652 	u64 tce;
653 	struct kvmppc_spapr_tce_iommu_table *stit;
654 
655 	stt = kvmppc_find_table(vcpu->kvm, liobn);
656 	if (!stt)
657 		return H_TOO_HARD;
658 
659 	entry = ioba >> stt->page_shift;
660 	/*
661 	 * SPAPR spec says that the maximum size of the list is 512 TCEs
662 	 * so the whole table fits in 4K page
663 	 */
664 	if (npages > 512)
665 		return H_PARAMETER;
666 
667 	if (tce_list & (SZ_4K - 1))
668 		return H_PARAMETER;
669 
670 	ret = kvmppc_ioba_validate(stt, ioba, npages);
671 	if (ret != H_SUCCESS)
672 		return ret;
673 
674 	idx = srcu_read_lock(&vcpu->kvm->srcu);
675 	if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua)) {
676 		ret = H_TOO_HARD;
677 		goto unlock_exit;
678 	}
679 	tces = (u64 __user *) ua;
680 
681 	for (i = 0; i < npages; ++i) {
682 		if (get_user(tce, tces + i)) {
683 			ret = H_TOO_HARD;
684 			goto unlock_exit;
685 		}
686 		tce = be64_to_cpu(tce);
687 
688 		ret = kvmppc_tce_validate(stt, tce);
689 		if (ret != H_SUCCESS)
690 			goto unlock_exit;
691 	}
692 
693 	for (i = 0; i < npages; ++i) {
694 		/*
695 		 * This looks unsafe, because we validate, then regrab
696 		 * the TCE from userspace which could have been changed by
697 		 * another thread.
698 		 *
699 		 * But it actually is safe, because the relevant checks will be
700 		 * re-executed in the following code.  If userspace tries to
701 		 * change this dodgily it will result in a messier failure mode
702 		 * but won't threaten the host.
703 		 */
704 		if (get_user(tce, tces + i)) {
705 			ret = H_TOO_HARD;
706 			goto unlock_exit;
707 		}
708 		tce = be64_to_cpu(tce);
709 
710 		if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua))
711 			return H_PARAMETER;
712 
713 		list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
714 			ret = kvmppc_tce_iommu_map(vcpu->kvm, stt,
715 					stit->tbl, entry + i, ua,
716 					iommu_tce_direction(tce));
717 
718 			if (ret != H_SUCCESS) {
719 				kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl,
720 						entry);
721 				goto unlock_exit;
722 			}
723 		}
724 
725 		kvmppc_tce_put(stt, entry + i, tce);
726 	}
727 
728 unlock_exit:
729 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
730 
731 	return ret;
732 }
733 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);
734 
735 long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
736 		unsigned long liobn, unsigned long ioba,
737 		unsigned long tce_value, unsigned long npages)
738 {
739 	struct kvmppc_spapr_tce_table *stt;
740 	long i, ret;
741 	struct kvmppc_spapr_tce_iommu_table *stit;
742 
743 	stt = kvmppc_find_table(vcpu->kvm, liobn);
744 	if (!stt)
745 		return H_TOO_HARD;
746 
747 	ret = kvmppc_ioba_validate(stt, ioba, npages);
748 	if (ret != H_SUCCESS)
749 		return ret;
750 
751 	/* Check permission bits only to allow userspace poison TCE for debug */
752 	if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
753 		return H_PARAMETER;
754 
755 	list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
756 		unsigned long entry = ioba >> stt->page_shift;
757 
758 		for (i = 0; i < npages; ++i) {
759 			ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
760 					stit->tbl, entry + i);
761 
762 			if (ret == H_SUCCESS)
763 				continue;
764 
765 			if (ret == H_TOO_HARD)
766 				return ret;
767 
768 			WARN_ON_ONCE(1);
769 			kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
770 		}
771 	}
772 
773 	for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
774 		kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
775 
776 	return H_SUCCESS;
777 }
778 EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
779