xref: /openbmc/linux/arch/powerpc/kvm/book3s_64_vio.c (revision 8d81cd1a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *
4  * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
5  * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
6  * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
7  */
8 
9 #include <linux/types.h>
10 #include <linux/string.h>
11 #include <linux/kvm.h>
12 #include <linux/kvm_host.h>
13 #include <linux/highmem.h>
14 #include <linux/gfp.h>
15 #include <linux/slab.h>
16 #include <linux/sched/signal.h>
17 #include <linux/hugetlb.h>
18 #include <linux/list.h>
19 #include <linux/anon_inodes.h>
20 #include <linux/iommu.h>
21 #include <linux/file.h>
22 #include <linux/mm.h>
23 
24 #include <asm/kvm_ppc.h>
25 #include <asm/kvm_book3s.h>
26 #include <asm/book3s/64/mmu-hash.h>
27 #include <asm/hvcall.h>
28 #include <asm/synch.h>
29 #include <asm/ppc-opcode.h>
30 #include <asm/udbg.h>
31 #include <asm/iommu.h>
32 #include <asm/tce.h>
33 #include <asm/mmu_context.h>
34 
35 static struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
36 	unsigned long liobn)
37 {
38 	struct kvmppc_spapr_tce_table *stt;
39 
40 	list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list)
41 		if (stt->liobn == liobn)
42 			return stt;
43 
44 	return NULL;
45 }
46 
47 static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
48 {
49 	return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
50 }
51 
52 static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
53 {
54 	unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) +
55 			(tce_pages * sizeof(struct page *));
56 
57 	return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
58 }
59 
60 static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head)
61 {
62 	struct kvmppc_spapr_tce_iommu_table *stit = container_of(head,
63 			struct kvmppc_spapr_tce_iommu_table, rcu);
64 
65 	iommu_tce_table_put(stit->tbl);
66 
67 	kfree(stit);
68 }
69 
70 static void kvm_spapr_tce_liobn_put(struct kref *kref)
71 {
72 	struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref,
73 			struct kvmppc_spapr_tce_iommu_table, kref);
74 
75 	list_del_rcu(&stit->next);
76 
77 	call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free);
78 }
79 
80 extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
81 		struct iommu_group *grp)
82 {
83 	int i;
84 	struct kvmppc_spapr_tce_table *stt;
85 	struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
86 	struct iommu_table_group *table_group = NULL;
87 
88 	rcu_read_lock();
89 	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
90 
91 		table_group = iommu_group_get_iommudata(grp);
92 		if (WARN_ON(!table_group))
93 			continue;
94 
95 		list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
96 			for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
97 				if (table_group->tables[i] != stit->tbl)
98 					continue;
99 
100 				kref_put(&stit->kref, kvm_spapr_tce_liobn_put);
101 			}
102 		}
103 		cond_resched_rcu();
104 	}
105 	rcu_read_unlock();
106 }
107 
108 extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
109 		struct iommu_group *grp)
110 {
111 	struct kvmppc_spapr_tce_table *stt = NULL;
112 	bool found = false;
113 	struct iommu_table *tbl = NULL;
114 	struct iommu_table_group *table_group;
115 	long i;
116 	struct kvmppc_spapr_tce_iommu_table *stit;
117 	struct fd f;
118 
119 	f = fdget(tablefd);
120 	if (!f.file)
121 		return -EBADF;
122 
123 	rcu_read_lock();
124 	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
125 		if (stt == f.file->private_data) {
126 			found = true;
127 			break;
128 		}
129 	}
130 	rcu_read_unlock();
131 
132 	if (!found) {
133 		fdput(f);
134 		return -EINVAL;
135 	}
136 
137 	table_group = iommu_group_get_iommudata(grp);
138 	if (WARN_ON(!table_group)) {
139 		fdput(f);
140 		return -EFAULT;
141 	}
142 
143 	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
144 		struct iommu_table *tbltmp = table_group->tables[i];
145 
146 		if (!tbltmp)
147 			continue;
148 		/* Make sure hardware table parameters are compatible */
149 		if ((tbltmp->it_page_shift <= stt->page_shift) &&
150 				(tbltmp->it_offset << tbltmp->it_page_shift ==
151 				 stt->offset << stt->page_shift) &&
152 				(tbltmp->it_size << tbltmp->it_page_shift >=
153 				 stt->size << stt->page_shift)) {
154 			/*
155 			 * Reference the table to avoid races with
156 			 * add/remove DMA windows.
157 			 */
158 			tbl = iommu_tce_table_get(tbltmp);
159 			break;
160 		}
161 	}
162 	if (!tbl) {
163 		fdput(f);
164 		return -EINVAL;
165 	}
166 
167 	rcu_read_lock();
168 	list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
169 		if (tbl != stit->tbl)
170 			continue;
171 
172 		if (!kref_get_unless_zero(&stit->kref)) {
173 			/* stit is being destroyed */
174 			iommu_tce_table_put(tbl);
175 			rcu_read_unlock();
176 			fdput(f);
177 			return -ENOTTY;
178 		}
179 		/*
180 		 * The table is already known to this KVM, we just increased
181 		 * its KVM reference counter and can return.
182 		 */
183 		rcu_read_unlock();
184 		fdput(f);
185 		return 0;
186 	}
187 	rcu_read_unlock();
188 
189 	stit = kzalloc(sizeof(*stit), GFP_KERNEL);
190 	if (!stit) {
191 		iommu_tce_table_put(tbl);
192 		fdput(f);
193 		return -ENOMEM;
194 	}
195 
196 	stit->tbl = tbl;
197 	kref_init(&stit->kref);
198 
199 	list_add_rcu(&stit->next, &stt->iommu_tables);
200 
201 	fdput(f);
202 	return 0;
203 }
204 
205 static void release_spapr_tce_table(struct rcu_head *head)
206 {
207 	struct kvmppc_spapr_tce_table *stt = container_of(head,
208 			struct kvmppc_spapr_tce_table, rcu);
209 	unsigned long i, npages = kvmppc_tce_pages(stt->size);
210 
211 	for (i = 0; i < npages; i++)
212 		if (stt->pages[i])
213 			__free_page(stt->pages[i]);
214 
215 	kfree(stt);
216 }
217 
218 static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
219 		unsigned long sttpage)
220 {
221 	struct page *page = stt->pages[sttpage];
222 
223 	if (page)
224 		return page;
225 
226 	mutex_lock(&stt->alloc_lock);
227 	page = stt->pages[sttpage];
228 	if (!page) {
229 		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
230 		WARN_ON_ONCE(!page);
231 		if (page)
232 			stt->pages[sttpage] = page;
233 	}
234 	mutex_unlock(&stt->alloc_lock);
235 
236 	return page;
237 }
238 
239 static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
240 {
241 	struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
242 	struct page *page;
243 
244 	if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
245 		return VM_FAULT_SIGBUS;
246 
247 	page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
248 	if (!page)
249 		return VM_FAULT_OOM;
250 
251 	get_page(page);
252 	vmf->page = page;
253 	return 0;
254 }
255 
256 static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
257 	.fault = kvm_spapr_tce_fault,
258 };
259 
260 static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
261 {
262 	vma->vm_ops = &kvm_spapr_tce_vm_ops;
263 	return 0;
264 }
265 
266 static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
267 {
268 	struct kvmppc_spapr_tce_table *stt = filp->private_data;
269 	struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
270 	struct kvm *kvm = stt->kvm;
271 
272 	mutex_lock(&kvm->lock);
273 	list_del_rcu(&stt->list);
274 	mutex_unlock(&kvm->lock);
275 
276 	list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
277 		WARN_ON(!kref_read(&stit->kref));
278 		while (1) {
279 			if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put))
280 				break;
281 		}
282 	}
283 
284 	account_locked_vm(kvm->mm,
285 		kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
286 
287 	kvm_put_kvm(stt->kvm);
288 
289 	call_rcu(&stt->rcu, release_spapr_tce_table);
290 
291 	return 0;
292 }
293 
294 static const struct file_operations kvm_spapr_tce_fops = {
295 	.mmap           = kvm_spapr_tce_mmap,
296 	.release	= kvm_spapr_tce_release,
297 };
298 
299 int kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
300 				  struct kvm_create_spapr_tce_64 *args)
301 {
302 	struct kvmppc_spapr_tce_table *stt = NULL;
303 	struct kvmppc_spapr_tce_table *siter;
304 	struct mm_struct *mm = kvm->mm;
305 	unsigned long npages;
306 	int ret;
307 
308 	if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
309 		(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
310 		return -EINVAL;
311 
312 	npages = kvmppc_tce_pages(args->size);
313 	ret = account_locked_vm(mm, kvmppc_stt_pages(npages), true);
314 	if (ret)
315 		return ret;
316 
317 	ret = -ENOMEM;
318 	stt = kzalloc(struct_size(stt, pages, npages), GFP_KERNEL | __GFP_NOWARN);
319 	if (!stt)
320 		goto fail_acct;
321 
322 	stt->liobn = args->liobn;
323 	stt->page_shift = args->page_shift;
324 	stt->offset = args->offset;
325 	stt->size = args->size;
326 	stt->kvm = kvm;
327 	mutex_init(&stt->alloc_lock);
328 	INIT_LIST_HEAD_RCU(&stt->iommu_tables);
329 
330 	mutex_lock(&kvm->lock);
331 
332 	/* Check this LIOBN hasn't been previously allocated */
333 	ret = 0;
334 	list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
335 		if (siter->liobn == args->liobn) {
336 			ret = -EBUSY;
337 			break;
338 		}
339 	}
340 
341 	kvm_get_kvm(kvm);
342 	if (!ret)
343 		ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
344 				       stt, O_RDWR | O_CLOEXEC);
345 
346 	if (ret >= 0)
347 		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
348 	else
349 		kvm_put_kvm_no_destroy(kvm);
350 
351 	mutex_unlock(&kvm->lock);
352 
353 	if (ret >= 0)
354 		return ret;
355 
356 	kfree(stt);
357  fail_acct:
358 	account_locked_vm(mm, kvmppc_stt_pages(npages), false);
359 	return ret;
360 }
361 
362 static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
363 		unsigned long *ua)
364 {
365 	unsigned long gfn = tce >> PAGE_SHIFT;
366 	struct kvm_memory_slot *memslot;
367 
368 	memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
369 	if (!memslot)
370 		return -EINVAL;
371 
372 	*ua = __gfn_to_hva_memslot(memslot, gfn) |
373 		(tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
374 
375 	return 0;
376 }
377 
378 static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
379 		unsigned long tce)
380 {
381 	unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
382 	enum dma_data_direction dir = iommu_tce_direction(tce);
383 	struct kvmppc_spapr_tce_iommu_table *stit;
384 	unsigned long ua = 0;
385 
386 	/* Allow userspace to poison TCE table */
387 	if (dir == DMA_NONE)
388 		return H_SUCCESS;
389 
390 	if (iommu_tce_check_gpa(stt->page_shift, gpa))
391 		return H_TOO_HARD;
392 
393 	if (kvmppc_tce_to_ua(stt->kvm, tce, &ua))
394 		return H_TOO_HARD;
395 
396 	rcu_read_lock();
397 	list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
398 		unsigned long hpa = 0;
399 		struct mm_iommu_table_group_mem_t *mem;
400 		long shift = stit->tbl->it_page_shift;
401 
402 		mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift);
403 		if (!mem || mm_iommu_ua_to_hpa(mem, ua, shift, &hpa)) {
404 			rcu_read_unlock();
405 			return H_TOO_HARD;
406 		}
407 	}
408 	rcu_read_unlock();
409 
410 	return H_SUCCESS;
411 }
412 
413 /*
414  * Handles TCE requests for emulated devices.
415  * Puts guest TCE values to the table and expects user space to convert them.
416  * Cannot fail so kvmppc_tce_validate must be called before it.
417  */
418 static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
419 		unsigned long idx, unsigned long tce)
420 {
421 	struct page *page;
422 	u64 *tbl;
423 	unsigned long sttpage;
424 
425 	idx -= stt->offset;
426 	sttpage = idx / TCES_PER_PAGE;
427 	page = stt->pages[sttpage];
428 
429 	if (!page) {
430 		/* We allow any TCE, not just with read|write permissions */
431 		if (!tce)
432 			return;
433 
434 		page = kvm_spapr_get_tce_page(stt, sttpage);
435 		if (!page)
436 			return;
437 	}
438 	tbl = page_to_virt(page);
439 
440 	tbl[idx % TCES_PER_PAGE] = tce;
441 }
442 
443 static void kvmppc_clear_tce(struct mm_struct *mm, struct kvmppc_spapr_tce_table *stt,
444 		struct iommu_table *tbl, unsigned long entry)
445 {
446 	unsigned long i;
447 	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
448 	unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift);
449 
450 	for (i = 0; i < subpages; ++i) {
451 		unsigned long hpa = 0;
452 		enum dma_data_direction dir = DMA_NONE;
453 
454 		iommu_tce_xchg_no_kill(mm, tbl, io_entry + i, &hpa, &dir);
455 	}
456 }
457 
458 static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
459 		struct iommu_table *tbl, unsigned long entry)
460 {
461 	struct mm_iommu_table_group_mem_t *mem = NULL;
462 	const unsigned long pgsize = 1ULL << tbl->it_page_shift;
463 	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
464 
465 	if (!pua)
466 		return H_SUCCESS;
467 
468 	mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize);
469 	if (!mem)
470 		return H_TOO_HARD;
471 
472 	mm_iommu_mapped_dec(mem);
473 
474 	*pua = cpu_to_be64(0);
475 
476 	return H_SUCCESS;
477 }
478 
479 static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
480 		struct iommu_table *tbl, unsigned long entry)
481 {
482 	enum dma_data_direction dir = DMA_NONE;
483 	unsigned long hpa = 0;
484 	long ret;
485 
486 	if (WARN_ON_ONCE(iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa,
487 					&dir)))
488 		return H_TOO_HARD;
489 
490 	if (dir == DMA_NONE)
491 		return H_SUCCESS;
492 
493 	ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
494 	if (ret != H_SUCCESS)
495 		iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir);
496 
497 	return ret;
498 }
499 
500 static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
501 		struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
502 		unsigned long entry)
503 {
504 	unsigned long i, ret = H_SUCCESS;
505 	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
506 	unsigned long io_entry = entry * subpages;
507 
508 	for (i = 0; i < subpages; ++i) {
509 		ret = kvmppc_tce_iommu_do_unmap(kvm, tbl, io_entry + i);
510 		if (ret != H_SUCCESS)
511 			break;
512 	}
513 
514 	iommu_tce_kill(tbl, io_entry, subpages);
515 
516 	return ret;
517 }
518 
519 static long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
520 		unsigned long entry, unsigned long ua,
521 		enum dma_data_direction dir)
522 {
523 	long ret;
524 	unsigned long hpa;
525 	__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
526 	struct mm_iommu_table_group_mem_t *mem;
527 
528 	if (!pua)
529 		/* it_userspace allocation might be delayed */
530 		return H_TOO_HARD;
531 
532 	mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift);
533 	if (!mem)
534 		/* This only handles v2 IOMMU type, v1 is handled via ioctl() */
535 		return H_TOO_HARD;
536 
537 	if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa)))
538 		return H_TOO_HARD;
539 
540 	if (mm_iommu_mapped_inc(mem))
541 		return H_TOO_HARD;
542 
543 	ret = iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir);
544 	if (WARN_ON_ONCE(ret)) {
545 		mm_iommu_mapped_dec(mem);
546 		return H_TOO_HARD;
547 	}
548 
549 	if (dir != DMA_NONE)
550 		kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
551 
552 	*pua = cpu_to_be64(ua);
553 
554 	return 0;
555 }
556 
557 static long kvmppc_tce_iommu_map(struct kvm *kvm,
558 		struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
559 		unsigned long entry, unsigned long ua,
560 		enum dma_data_direction dir)
561 {
562 	unsigned long i, pgoff, ret = H_SUCCESS;
563 	unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
564 	unsigned long io_entry = entry * subpages;
565 
566 	for (i = 0, pgoff = 0; i < subpages;
567 			++i, pgoff += IOMMU_PAGE_SIZE(tbl)) {
568 
569 		ret = kvmppc_tce_iommu_do_map(kvm, tbl,
570 				io_entry + i, ua + pgoff, dir);
571 		if (ret != H_SUCCESS)
572 			break;
573 	}
574 
575 	iommu_tce_kill(tbl, io_entry, subpages);
576 
577 	return ret;
578 }
579 
580 long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
581 		      unsigned long ioba, unsigned long tce)
582 {
583 	struct kvmppc_spapr_tce_table *stt;
584 	long ret, idx;
585 	struct kvmppc_spapr_tce_iommu_table *stit;
586 	unsigned long entry, ua = 0;
587 	enum dma_data_direction dir;
588 
589 	/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
590 	/* 	    liobn, ioba, tce); */
591 
592 	stt = kvmppc_find_table(vcpu->kvm, liobn);
593 	if (!stt)
594 		return H_TOO_HARD;
595 
596 	ret = kvmppc_ioba_validate(stt, ioba, 1);
597 	if (ret != H_SUCCESS)
598 		return ret;
599 
600 	idx = srcu_read_lock(&vcpu->kvm->srcu);
601 
602 	ret = kvmppc_tce_validate(stt, tce);
603 	if (ret != H_SUCCESS)
604 		goto unlock_exit;
605 
606 	dir = iommu_tce_direction(tce);
607 
608 	if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
609 		ret = H_PARAMETER;
610 		goto unlock_exit;
611 	}
612 
613 	entry = ioba >> stt->page_shift;
614 
615 	list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
616 		if (dir == DMA_NONE)
617 			ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
618 					stit->tbl, entry);
619 		else
620 			ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
621 					entry, ua, dir);
622 
623 
624 		if (ret != H_SUCCESS) {
625 			kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry);
626 			goto unlock_exit;
627 		}
628 	}
629 
630 	kvmppc_tce_put(stt, entry, tce);
631 
632 unlock_exit:
633 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
634 
635 	return ret;
636 }
637 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
638 
639 long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
640 		unsigned long liobn, unsigned long ioba,
641 		unsigned long tce_list, unsigned long npages)
642 {
643 	struct kvmppc_spapr_tce_table *stt;
644 	long i, ret = H_SUCCESS, idx;
645 	unsigned long entry, ua = 0;
646 	u64 __user *tces;
647 	u64 tce;
648 	struct kvmppc_spapr_tce_iommu_table *stit;
649 
650 	stt = kvmppc_find_table(vcpu->kvm, liobn);
651 	if (!stt)
652 		return H_TOO_HARD;
653 
654 	entry = ioba >> stt->page_shift;
655 	/*
656 	 * SPAPR spec says that the maximum size of the list is 512 TCEs
657 	 * so the whole table fits in 4K page
658 	 */
659 	if (npages > 512)
660 		return H_PARAMETER;
661 
662 	if (tce_list & (SZ_4K - 1))
663 		return H_PARAMETER;
664 
665 	ret = kvmppc_ioba_validate(stt, ioba, npages);
666 	if (ret != H_SUCCESS)
667 		return ret;
668 
669 	idx = srcu_read_lock(&vcpu->kvm->srcu);
670 	if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua)) {
671 		ret = H_TOO_HARD;
672 		goto unlock_exit;
673 	}
674 	tces = (u64 __user *) ua;
675 
676 	for (i = 0; i < npages; ++i) {
677 		if (get_user(tce, tces + i)) {
678 			ret = H_TOO_HARD;
679 			goto unlock_exit;
680 		}
681 		tce = be64_to_cpu(tce);
682 
683 		ret = kvmppc_tce_validate(stt, tce);
684 		if (ret != H_SUCCESS)
685 			goto unlock_exit;
686 	}
687 
688 	for (i = 0; i < npages; ++i) {
689 		/*
690 		 * This looks unsafe, because we validate, then regrab
691 		 * the TCE from userspace which could have been changed by
692 		 * another thread.
693 		 *
694 		 * But it actually is safe, because the relevant checks will be
695 		 * re-executed in the following code.  If userspace tries to
696 		 * change this dodgily it will result in a messier failure mode
697 		 * but won't threaten the host.
698 		 */
699 		if (get_user(tce, tces + i)) {
700 			ret = H_TOO_HARD;
701 			goto unlock_exit;
702 		}
703 		tce = be64_to_cpu(tce);
704 
705 		if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
706 			ret = H_PARAMETER;
707 			goto unlock_exit;
708 		}
709 
710 		list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
711 			ret = kvmppc_tce_iommu_map(vcpu->kvm, stt,
712 					stit->tbl, entry + i, ua,
713 					iommu_tce_direction(tce));
714 
715 			if (ret != H_SUCCESS) {
716 				kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl,
717 						 entry + i);
718 				goto unlock_exit;
719 			}
720 		}
721 
722 		kvmppc_tce_put(stt, entry + i, tce);
723 	}
724 
725 unlock_exit:
726 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
727 
728 	return ret;
729 }
730 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);
731 
732 long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
733 		unsigned long liobn, unsigned long ioba,
734 		unsigned long tce_value, unsigned long npages)
735 {
736 	struct kvmppc_spapr_tce_table *stt;
737 	long i, ret;
738 	struct kvmppc_spapr_tce_iommu_table *stit;
739 
740 	stt = kvmppc_find_table(vcpu->kvm, liobn);
741 	if (!stt)
742 		return H_TOO_HARD;
743 
744 	ret = kvmppc_ioba_validate(stt, ioba, npages);
745 	if (ret != H_SUCCESS)
746 		return ret;
747 
748 	/* Check permission bits only to allow userspace poison TCE for debug */
749 	if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
750 		return H_PARAMETER;
751 
752 	list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
753 		unsigned long entry = ioba >> stt->page_shift;
754 
755 		for (i = 0; i < npages; ++i) {
756 			ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
757 					stit->tbl, entry + i);
758 
759 			if (ret == H_SUCCESS)
760 				continue;
761 
762 			if (ret == H_TOO_HARD)
763 				return ret;
764 
765 			WARN_ON_ONCE(1);
766 			kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry + i);
767 		}
768 	}
769 
770 	for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
771 		kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
772 
773 	return ret;
774 }
775 EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
776 
777 long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
778 		      unsigned long ioba)
779 {
780 	struct kvmppc_spapr_tce_table *stt;
781 	long ret;
782 	unsigned long idx;
783 	struct page *page;
784 	u64 *tbl;
785 
786 	stt = kvmppc_find_table(vcpu->kvm, liobn);
787 	if (!stt)
788 		return H_TOO_HARD;
789 
790 	ret = kvmppc_ioba_validate(stt, ioba, 1);
791 	if (ret != H_SUCCESS)
792 		return ret;
793 
794 	idx = (ioba >> stt->page_shift) - stt->offset;
795 	page = stt->pages[idx / TCES_PER_PAGE];
796 	if (!page) {
797 		vcpu->arch.regs.gpr[4] = 0;
798 		return H_SUCCESS;
799 	}
800 	tbl = (u64 *)page_address(page);
801 
802 	vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE];
803 
804 	return H_SUCCESS;
805 }
806 EXPORT_SYMBOL_GPL(kvmppc_h_get_tce);
807