xref: /openbmc/linux/arch/powerpc/kvm/powerpc.c (revision e5f586c763a079349398e2b0c7c271386193ac34)
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License, version 2, as
4  * published by the Free Software Foundation.
5  *
6  * This program is distributed in the hope that it will be useful,
7  * but WITHOUT ANY WARRANTY; without even the implied warranty of
8  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9  * GNU General Public License for more details.
10  *
11  * You should have received a copy of the GNU General Public License
12  * along with this program; if not, write to the Free Software
13  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
14  *
15  * Copyright IBM Corp. 2007
16  *
17  * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18  *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
19  */
20 
21 #include <linux/errno.h>
22 #include <linux/err.h>
23 #include <linux/kvm_host.h>
24 #include <linux/vmalloc.h>
25 #include <linux/hrtimer.h>
26 #include <linux/sched/signal.h>
27 #include <linux/fs.h>
28 #include <linux/slab.h>
29 #include <linux/file.h>
30 #include <linux/module.h>
31 #include <linux/irqbypass.h>
32 #include <linux/kvm_irqfd.h>
33 #include <asm/cputable.h>
34 #include <linux/uaccess.h>
35 #include <asm/kvm_ppc.h>
36 #include <asm/tlbflush.h>
37 #include <asm/cputhreads.h>
38 #include <asm/irqflags.h>
39 #include <asm/iommu.h>
40 #include "timing.h"
41 #include "irq.h"
42 #include "../mm/mmu_decl.h"
43 
44 #define CREATE_TRACE_POINTS
45 #include "trace.h"
46 
47 struct kvmppc_ops *kvmppc_hv_ops;
48 EXPORT_SYMBOL_GPL(kvmppc_hv_ops);
49 struct kvmppc_ops *kvmppc_pr_ops;
50 EXPORT_SYMBOL_GPL(kvmppc_pr_ops);
51 
52 
53 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
54 {
55 	return !!(v->arch.pending_exceptions) ||
56 	       v->requests;
57 }
58 
59 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
60 {
61 	return 1;
62 }
63 
64 /*
65  * Common checks before entering the guest world.  Call with interrupts
66  * disabled.
67  *
68  * returns:
69  *
70  * == 1 if we're ready to go into guest state
71  * <= 0 if we need to go back to the host with return value
72  */
73 int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
74 {
75 	int r;
76 
77 	WARN_ON(irqs_disabled());
78 	hard_irq_disable();
79 
80 	while (true) {
81 		if (need_resched()) {
82 			local_irq_enable();
83 			cond_resched();
84 			hard_irq_disable();
85 			continue;
86 		}
87 
88 		if (signal_pending(current)) {
89 			kvmppc_account_exit(vcpu, SIGNAL_EXITS);
90 			vcpu->run->exit_reason = KVM_EXIT_INTR;
91 			r = -EINTR;
92 			break;
93 		}
94 
95 		vcpu->mode = IN_GUEST_MODE;
96 
97 		/*
98 		 * Reading vcpu->requests must happen after setting vcpu->mode,
99 		 * so we don't miss a request because the requester sees
100 		 * OUTSIDE_GUEST_MODE and assumes we'll be checking requests
101 		 * before next entering the guest (and thus doesn't IPI).
102 		 * This also orders the write to mode from any reads
103 		 * to the page tables done while the VCPU is running.
104 		 * Please see the comment in kvm_flush_remote_tlbs.
105 		 */
106 		smp_mb();
107 
108 		if (vcpu->requests) {
109 			/* Make sure we process requests preemptable */
110 			local_irq_enable();
111 			trace_kvm_check_requests(vcpu);
112 			r = kvmppc_core_check_requests(vcpu);
113 			hard_irq_disable();
114 			if (r > 0)
115 				continue;
116 			break;
117 		}
118 
119 		if (kvmppc_core_prepare_to_enter(vcpu)) {
120 			/* interrupts got enabled in between, so we
121 			   are back at square 1 */
122 			continue;
123 		}
124 
125 		guest_enter_irqoff();
126 		return 1;
127 	}
128 
129 	/* return to host */
130 	local_irq_enable();
131 	return r;
132 }
133 EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter);
134 
135 #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
136 static void kvmppc_swab_shared(struct kvm_vcpu *vcpu)
137 {
138 	struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared;
139 	int i;
140 
141 	shared->sprg0 = swab64(shared->sprg0);
142 	shared->sprg1 = swab64(shared->sprg1);
143 	shared->sprg2 = swab64(shared->sprg2);
144 	shared->sprg3 = swab64(shared->sprg3);
145 	shared->srr0 = swab64(shared->srr0);
146 	shared->srr1 = swab64(shared->srr1);
147 	shared->dar = swab64(shared->dar);
148 	shared->msr = swab64(shared->msr);
149 	shared->dsisr = swab32(shared->dsisr);
150 	shared->int_pending = swab32(shared->int_pending);
151 	for (i = 0; i < ARRAY_SIZE(shared->sr); i++)
152 		shared->sr[i] = swab32(shared->sr[i]);
153 }
154 #endif
155 
156 int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
157 {
158 	int nr = kvmppc_get_gpr(vcpu, 11);
159 	int r;
160 	unsigned long __maybe_unused param1 = kvmppc_get_gpr(vcpu, 3);
161 	unsigned long __maybe_unused param2 = kvmppc_get_gpr(vcpu, 4);
162 	unsigned long __maybe_unused param3 = kvmppc_get_gpr(vcpu, 5);
163 	unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6);
164 	unsigned long r2 = 0;
165 
166 	if (!(kvmppc_get_msr(vcpu) & MSR_SF)) {
167 		/* 32 bit mode */
168 		param1 &= 0xffffffff;
169 		param2 &= 0xffffffff;
170 		param3 &= 0xffffffff;
171 		param4 &= 0xffffffff;
172 	}
173 
174 	switch (nr) {
175 	case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE):
176 	{
177 #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
178 		/* Book3S can be little endian, find it out here */
179 		int shared_big_endian = true;
180 		if (vcpu->arch.intr_msr & MSR_LE)
181 			shared_big_endian = false;
182 		if (shared_big_endian != vcpu->arch.shared_big_endian)
183 			kvmppc_swab_shared(vcpu);
184 		vcpu->arch.shared_big_endian = shared_big_endian;
185 #endif
186 
187 		if (!(param2 & MAGIC_PAGE_FLAG_NOT_MAPPED_NX)) {
188 			/*
189 			 * Older versions of the Linux magic page code had
190 			 * a bug where they would map their trampoline code
191 			 * NX. If that's the case, remove !PR NX capability.
192 			 */
193 			vcpu->arch.disable_kernel_nx = true;
194 			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
195 		}
196 
197 		vcpu->arch.magic_page_pa = param1 & ~0xfffULL;
198 		vcpu->arch.magic_page_ea = param2 & ~0xfffULL;
199 
200 #ifdef CONFIG_PPC_64K_PAGES
201 		/*
202 		 * Make sure our 4k magic page is in the same window of a 64k
203 		 * page within the guest and within the host's page.
204 		 */
205 		if ((vcpu->arch.magic_page_pa & 0xf000) !=
206 		    ((ulong)vcpu->arch.shared & 0xf000)) {
207 			void *old_shared = vcpu->arch.shared;
208 			ulong shared = (ulong)vcpu->arch.shared;
209 			void *new_shared;
210 
211 			shared &= PAGE_MASK;
212 			shared |= vcpu->arch.magic_page_pa & 0xf000;
213 			new_shared = (void*)shared;
214 			memcpy(new_shared, old_shared, 0x1000);
215 			vcpu->arch.shared = new_shared;
216 		}
217 #endif
218 
219 		r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
220 
221 		r = EV_SUCCESS;
222 		break;
223 	}
224 	case KVM_HCALL_TOKEN(KVM_HC_FEATURES):
225 		r = EV_SUCCESS;
226 #if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2)
227 		r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
228 #endif
229 
230 		/* Second return value is in r4 */
231 		break;
232 	case EV_HCALL_TOKEN(EV_IDLE):
233 		r = EV_SUCCESS;
234 		kvm_vcpu_block(vcpu);
235 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
236 		break;
237 	default:
238 		r = EV_UNIMPLEMENTED;
239 		break;
240 	}
241 
242 	kvmppc_set_gpr(vcpu, 4, r2);
243 
244 	return r;
245 }
246 EXPORT_SYMBOL_GPL(kvmppc_kvm_pv);
247 
248 int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
249 {
250 	int r = false;
251 
252 	/* We have to know what CPU to virtualize */
253 	if (!vcpu->arch.pvr)
254 		goto out;
255 
256 	/* PAPR only works with book3s_64 */
257 	if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled)
258 		goto out;
259 
260 	/* HV KVM can only do PAPR mode for now */
261 	if (!vcpu->arch.papr_enabled && is_kvmppc_hv_enabled(vcpu->kvm))
262 		goto out;
263 
264 #ifdef CONFIG_KVM_BOOKE_HV
265 	if (!cpu_has_feature(CPU_FTR_EMB_HV))
266 		goto out;
267 #endif
268 
269 	r = true;
270 
271 out:
272 	vcpu->arch.sane = r;
273 	return r ? 0 : -EINVAL;
274 }
275 EXPORT_SYMBOL_GPL(kvmppc_sanity_check);
276 
277 int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
278 {
279 	enum emulation_result er;
280 	int r;
281 
282 	er = kvmppc_emulate_loadstore(vcpu);
283 	switch (er) {
284 	case EMULATE_DONE:
285 		/* Future optimization: only reload non-volatiles if they were
286 		 * actually modified. */
287 		r = RESUME_GUEST_NV;
288 		break;
289 	case EMULATE_AGAIN:
290 		r = RESUME_GUEST;
291 		break;
292 	case EMULATE_DO_MMIO:
293 		run->exit_reason = KVM_EXIT_MMIO;
294 		/* We must reload nonvolatiles because "update" load/store
295 		 * instructions modify register state. */
296 		/* Future optimization: only reload non-volatiles if they were
297 		 * actually modified. */
298 		r = RESUME_HOST_NV;
299 		break;
300 	case EMULATE_FAIL:
301 	{
302 		u32 last_inst;
303 
304 		kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
305 		/* XXX Deliver Program interrupt to guest. */
306 		pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst);
307 		r = RESUME_HOST;
308 		break;
309 	}
310 	default:
311 		WARN_ON(1);
312 		r = RESUME_GUEST;
313 	}
314 
315 	return r;
316 }
317 EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio);
318 
319 int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
320 	      bool data)
321 {
322 	ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
323 	struct kvmppc_pte pte;
324 	int r;
325 
326 	vcpu->stat.st++;
327 
328 	r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
329 			 XLATE_WRITE, &pte);
330 	if (r < 0)
331 		return r;
332 
333 	*eaddr = pte.raddr;
334 
335 	if (!pte.may_write)
336 		return -EPERM;
337 
338 	/* Magic page override */
339 	if (kvmppc_supports_magic_page(vcpu) && mp_pa &&
340 	    ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) &&
341 	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
342 		void *magic = vcpu->arch.shared;
343 		magic += pte.eaddr & 0xfff;
344 		memcpy(magic, ptr, size);
345 		return EMULATE_DONE;
346 	}
347 
348 	if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size))
349 		return EMULATE_DO_MMIO;
350 
351 	return EMULATE_DONE;
352 }
353 EXPORT_SYMBOL_GPL(kvmppc_st);
354 
355 int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
356 		      bool data)
357 {
358 	ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
359 	struct kvmppc_pte pte;
360 	int rc;
361 
362 	vcpu->stat.ld++;
363 
364 	rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
365 			  XLATE_READ, &pte);
366 	if (rc)
367 		return rc;
368 
369 	*eaddr = pte.raddr;
370 
371 	if (!pte.may_read)
372 		return -EPERM;
373 
374 	if (!data && !pte.may_execute)
375 		return -ENOEXEC;
376 
377 	/* Magic page override */
378 	if (kvmppc_supports_magic_page(vcpu) && mp_pa &&
379 	    ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) &&
380 	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
381 		void *magic = vcpu->arch.shared;
382 		magic += pte.eaddr & 0xfff;
383 		memcpy(ptr, magic, size);
384 		return EMULATE_DONE;
385 	}
386 
387 	if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size))
388 		return EMULATE_DO_MMIO;
389 
390 	return EMULATE_DONE;
391 }
392 EXPORT_SYMBOL_GPL(kvmppc_ld);
393 
394 int kvm_arch_hardware_enable(void)
395 {
396 	return 0;
397 }
398 
399 int kvm_arch_hardware_setup(void)
400 {
401 	return 0;
402 }
403 
404 void kvm_arch_check_processor_compat(void *rtn)
405 {
406 	*(int *)rtn = kvmppc_core_check_processor_compat();
407 }
408 
409 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
410 {
411 	struct kvmppc_ops *kvm_ops = NULL;
412 	/*
413 	 * if we have both HV and PR enabled, default is HV
414 	 */
415 	if (type == 0) {
416 		if (kvmppc_hv_ops)
417 			kvm_ops = kvmppc_hv_ops;
418 		else
419 			kvm_ops = kvmppc_pr_ops;
420 		if (!kvm_ops)
421 			goto err_out;
422 	} else	if (type == KVM_VM_PPC_HV) {
423 		if (!kvmppc_hv_ops)
424 			goto err_out;
425 		kvm_ops = kvmppc_hv_ops;
426 	} else if (type == KVM_VM_PPC_PR) {
427 		if (!kvmppc_pr_ops)
428 			goto err_out;
429 		kvm_ops = kvmppc_pr_ops;
430 	} else
431 		goto err_out;
432 
433 	if (kvm_ops->owner && !try_module_get(kvm_ops->owner))
434 		return -ENOENT;
435 
436 	kvm->arch.kvm_ops = kvm_ops;
437 	return kvmppc_core_init_vm(kvm);
438 err_out:
439 	return -EINVAL;
440 }
441 
442 bool kvm_arch_has_vcpu_debugfs(void)
443 {
444 	return false;
445 }
446 
447 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
448 {
449 	return 0;
450 }
451 
452 void kvm_arch_destroy_vm(struct kvm *kvm)
453 {
454 	unsigned int i;
455 	struct kvm_vcpu *vcpu;
456 
457 #ifdef CONFIG_KVM_XICS
458 	/*
459 	 * We call kick_all_cpus_sync() to ensure that all
460 	 * CPUs have executed any pending IPIs before we
461 	 * continue and free VCPUs structures below.
462 	 */
463 	if (is_kvmppc_hv_enabled(kvm))
464 		kick_all_cpus_sync();
465 #endif
466 
467 	kvm_for_each_vcpu(i, vcpu, kvm)
468 		kvm_arch_vcpu_free(vcpu);
469 
470 	mutex_lock(&kvm->lock);
471 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
472 		kvm->vcpus[i] = NULL;
473 
474 	atomic_set(&kvm->online_vcpus, 0);
475 
476 	kvmppc_core_destroy_vm(kvm);
477 
478 	mutex_unlock(&kvm->lock);
479 
480 	/* drop the module reference */
481 	module_put(kvm->arch.kvm_ops->owner);
482 }
483 
484 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
485 {
486 	int r;
487 	/* Assume we're using HV mode when the HV module is loaded */
488 	int hv_enabled = kvmppc_hv_ops ? 1 : 0;
489 
490 	if (kvm) {
491 		/*
492 		 * Hooray - we know which VM type we're running on. Depend on
493 		 * that rather than the guess above.
494 		 */
495 		hv_enabled = is_kvmppc_hv_enabled(kvm);
496 	}
497 
498 	switch (ext) {
499 #ifdef CONFIG_BOOKE
500 	case KVM_CAP_PPC_BOOKE_SREGS:
501 	case KVM_CAP_PPC_BOOKE_WATCHDOG:
502 	case KVM_CAP_PPC_EPR:
503 #else
504 	case KVM_CAP_PPC_SEGSTATE:
505 	case KVM_CAP_PPC_HIOR:
506 	case KVM_CAP_PPC_PAPR:
507 #endif
508 	case KVM_CAP_PPC_UNSET_IRQ:
509 	case KVM_CAP_PPC_IRQ_LEVEL:
510 	case KVM_CAP_ENABLE_CAP:
511 	case KVM_CAP_ENABLE_CAP_VM:
512 	case KVM_CAP_ONE_REG:
513 	case KVM_CAP_IOEVENTFD:
514 	case KVM_CAP_DEVICE_CTRL:
515 	case KVM_CAP_IMMEDIATE_EXIT:
516 		r = 1;
517 		break;
518 	case KVM_CAP_PPC_PAIRED_SINGLES:
519 	case KVM_CAP_PPC_OSI:
520 	case KVM_CAP_PPC_GET_PVINFO:
521 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
522 	case KVM_CAP_SW_TLB:
523 #endif
524 		/* We support this only for PR */
525 		r = !hv_enabled;
526 		break;
527 #ifdef CONFIG_KVM_MMIO
528 	case KVM_CAP_COALESCED_MMIO:
529 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
530 		break;
531 #endif
532 #ifdef CONFIG_KVM_MPIC
533 	case KVM_CAP_IRQ_MPIC:
534 		r = 1;
535 		break;
536 #endif
537 
538 #ifdef CONFIG_PPC_BOOK3S_64
539 	case KVM_CAP_SPAPR_TCE:
540 	case KVM_CAP_SPAPR_TCE_64:
541 	case KVM_CAP_PPC_RTAS:
542 	case KVM_CAP_PPC_FIXUP_HCALL:
543 	case KVM_CAP_PPC_ENABLE_HCALL:
544 #ifdef CONFIG_KVM_XICS
545 	case KVM_CAP_IRQ_XICS:
546 #endif
547 		r = 1;
548 		break;
549 
550 	case KVM_CAP_PPC_ALLOC_HTAB:
551 		r = hv_enabled;
552 		break;
553 #endif /* CONFIG_PPC_BOOK3S_64 */
554 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
555 	case KVM_CAP_PPC_SMT:
556 		r = 0;
557 		if (hv_enabled) {
558 			if (cpu_has_feature(CPU_FTR_ARCH_300))
559 				r = 1;
560 			else
561 				r = threads_per_subcore;
562 		}
563 		break;
564 	case KVM_CAP_PPC_RMA:
565 		r = 0;
566 		break;
567 	case KVM_CAP_PPC_HWRNG:
568 		r = kvmppc_hwrng_present();
569 		break;
570 	case KVM_CAP_PPC_MMU_RADIX:
571 		r = !!(hv_enabled && radix_enabled());
572 		break;
573 	case KVM_CAP_PPC_MMU_HASH_V3:
574 		r = !!(hv_enabled && !radix_enabled() &&
575 		       cpu_has_feature(CPU_FTR_ARCH_300));
576 		break;
577 #endif
578 	case KVM_CAP_SYNC_MMU:
579 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
580 		r = hv_enabled;
581 #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
582 		r = 1;
583 #else
584 		r = 0;
585 #endif
586 		break;
587 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
588 	case KVM_CAP_PPC_HTAB_FD:
589 		r = hv_enabled;
590 		break;
591 #endif
592 	case KVM_CAP_NR_VCPUS:
593 		/*
594 		 * Recommending a number of CPUs is somewhat arbitrary; we
595 		 * return the number of present CPUs for -HV (since a host
596 		 * will have secondary threads "offline"), and for other KVM
597 		 * implementations just count online CPUs.
598 		 */
599 		if (hv_enabled)
600 			r = num_present_cpus();
601 		else
602 			r = num_online_cpus();
603 		break;
604 	case KVM_CAP_NR_MEMSLOTS:
605 		r = KVM_USER_MEM_SLOTS;
606 		break;
607 	case KVM_CAP_MAX_VCPUS:
608 		r = KVM_MAX_VCPUS;
609 		break;
610 #ifdef CONFIG_PPC_BOOK3S_64
611 	case KVM_CAP_PPC_GET_SMMU_INFO:
612 		r = 1;
613 		break;
614 	case KVM_CAP_SPAPR_MULTITCE:
615 		r = 1;
616 		break;
617 	case KVM_CAP_SPAPR_RESIZE_HPT:
618 		/* Disable this on POWER9 until code handles new HPTE format */
619 		r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
620 		break;
621 #endif
622 	case KVM_CAP_PPC_HTM:
623 		r = cpu_has_feature(CPU_FTR_TM_COMP) &&
624 		    is_kvmppc_hv_enabled(kvm);
625 		break;
626 	default:
627 		r = 0;
628 		break;
629 	}
630 	return r;
631 
632 }
633 
634 long kvm_arch_dev_ioctl(struct file *filp,
635                         unsigned int ioctl, unsigned long arg)
636 {
637 	return -EINVAL;
638 }
639 
640 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
641 			   struct kvm_memory_slot *dont)
642 {
643 	kvmppc_core_free_memslot(kvm, free, dont);
644 }
645 
646 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
647 			    unsigned long npages)
648 {
649 	return kvmppc_core_create_memslot(kvm, slot, npages);
650 }
651 
652 int kvm_arch_prepare_memory_region(struct kvm *kvm,
653 				   struct kvm_memory_slot *memslot,
654 				   const struct kvm_userspace_memory_region *mem,
655 				   enum kvm_mr_change change)
656 {
657 	return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
658 }
659 
660 void kvm_arch_commit_memory_region(struct kvm *kvm,
661 				   const struct kvm_userspace_memory_region *mem,
662 				   const struct kvm_memory_slot *old,
663 				   const struct kvm_memory_slot *new,
664 				   enum kvm_mr_change change)
665 {
666 	kvmppc_core_commit_memory_region(kvm, mem, old, new);
667 }
668 
669 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
670 				   struct kvm_memory_slot *slot)
671 {
672 	kvmppc_core_flush_memslot(kvm, slot);
673 }
674 
675 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
676 {
677 	struct kvm_vcpu *vcpu;
678 	vcpu = kvmppc_core_vcpu_create(kvm, id);
679 	if (!IS_ERR(vcpu)) {
680 		vcpu->arch.wqp = &vcpu->wq;
681 		kvmppc_create_vcpu_debugfs(vcpu, id);
682 	}
683 	return vcpu;
684 }
685 
686 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
687 {
688 }
689 
690 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
691 {
692 	/* Make sure we're not using the vcpu anymore */
693 	hrtimer_cancel(&vcpu->arch.dec_timer);
694 
695 	kvmppc_remove_vcpu_debugfs(vcpu);
696 
697 	switch (vcpu->arch.irq_type) {
698 	case KVMPPC_IRQ_MPIC:
699 		kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
700 		break;
701 	case KVMPPC_IRQ_XICS:
702 		kvmppc_xics_free_icp(vcpu);
703 		break;
704 	}
705 
706 	kvmppc_core_vcpu_free(vcpu);
707 }
708 
709 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
710 {
711 	kvm_arch_vcpu_free(vcpu);
712 }
713 
714 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
715 {
716 	return kvmppc_core_pending_dec(vcpu);
717 }
718 
719 static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
720 {
721 	struct kvm_vcpu *vcpu;
722 
723 	vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
724 	kvmppc_decrementer_func(vcpu);
725 
726 	return HRTIMER_NORESTART;
727 }
728 
729 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
730 {
731 	int ret;
732 
733 	hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
734 	vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
735 	vcpu->arch.dec_expires = ~(u64)0;
736 
737 #ifdef CONFIG_KVM_EXIT_TIMING
738 	mutex_init(&vcpu->arch.exit_timing_lock);
739 #endif
740 	ret = kvmppc_subarch_vcpu_init(vcpu);
741 	return ret;
742 }
743 
744 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
745 {
746 	kvmppc_mmu_destroy(vcpu);
747 	kvmppc_subarch_vcpu_uninit(vcpu);
748 }
749 
750 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
751 {
752 #ifdef CONFIG_BOOKE
753 	/*
754 	 * vrsave (formerly usprg0) isn't used by Linux, but may
755 	 * be used by the guest.
756 	 *
757 	 * On non-booke this is associated with Altivec and
758 	 * is handled by code in book3s.c.
759 	 */
760 	mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
761 #endif
762 	kvmppc_core_vcpu_load(vcpu, cpu);
763 }
764 
765 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
766 {
767 	kvmppc_core_vcpu_put(vcpu);
768 #ifdef CONFIG_BOOKE
769 	vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
770 #endif
771 }
772 
773 /*
774  * irq_bypass_add_producer and irq_bypass_del_producer are only
775  * useful if the architecture supports PCI passthrough.
776  * irq_bypass_stop and irq_bypass_start are not needed and so
777  * kvm_ops are not defined for them.
778  */
779 bool kvm_arch_has_irq_bypass(void)
780 {
781 	return ((kvmppc_hv_ops && kvmppc_hv_ops->irq_bypass_add_producer) ||
782 		(kvmppc_pr_ops && kvmppc_pr_ops->irq_bypass_add_producer));
783 }
784 
785 int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
786 				     struct irq_bypass_producer *prod)
787 {
788 	struct kvm_kernel_irqfd *irqfd =
789 		container_of(cons, struct kvm_kernel_irqfd, consumer);
790 	struct kvm *kvm = irqfd->kvm;
791 
792 	if (kvm->arch.kvm_ops->irq_bypass_add_producer)
793 		return kvm->arch.kvm_ops->irq_bypass_add_producer(cons, prod);
794 
795 	return 0;
796 }
797 
798 void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
799 				      struct irq_bypass_producer *prod)
800 {
801 	struct kvm_kernel_irqfd *irqfd =
802 		container_of(cons, struct kvm_kernel_irqfd, consumer);
803 	struct kvm *kvm = irqfd->kvm;
804 
805 	if (kvm->arch.kvm_ops->irq_bypass_del_producer)
806 		kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod);
807 }
808 
809 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
810                                       struct kvm_run *run)
811 {
812 	u64 uninitialized_var(gpr);
813 
814 	if (run->mmio.len > sizeof(gpr)) {
815 		printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
816 		return;
817 	}
818 
819 	if (!vcpu->arch.mmio_host_swabbed) {
820 		switch (run->mmio.len) {
821 		case 8: gpr = *(u64 *)run->mmio.data; break;
822 		case 4: gpr = *(u32 *)run->mmio.data; break;
823 		case 2: gpr = *(u16 *)run->mmio.data; break;
824 		case 1: gpr = *(u8 *)run->mmio.data; break;
825 		}
826 	} else {
827 		switch (run->mmio.len) {
828 		case 8: gpr = swab64(*(u64 *)run->mmio.data); break;
829 		case 4: gpr = swab32(*(u32 *)run->mmio.data); break;
830 		case 2: gpr = swab16(*(u16 *)run->mmio.data); break;
831 		case 1: gpr = *(u8 *)run->mmio.data; break;
832 		}
833 	}
834 
835 	if (vcpu->arch.mmio_sign_extend) {
836 		switch (run->mmio.len) {
837 #ifdef CONFIG_PPC64
838 		case 4:
839 			gpr = (s64)(s32)gpr;
840 			break;
841 #endif
842 		case 2:
843 			gpr = (s64)(s16)gpr;
844 			break;
845 		case 1:
846 			gpr = (s64)(s8)gpr;
847 			break;
848 		}
849 	}
850 
851 	kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
852 
853 	switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) {
854 	case KVM_MMIO_REG_GPR:
855 		kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
856 		break;
857 	case KVM_MMIO_REG_FPR:
858 		VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;
859 		break;
860 #ifdef CONFIG_PPC_BOOK3S
861 	case KVM_MMIO_REG_QPR:
862 		vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
863 		break;
864 	case KVM_MMIO_REG_FQPR:
865 		VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;
866 		vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
867 		break;
868 #endif
869 	default:
870 		BUG();
871 	}
872 }
873 
874 static int __kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
875 				unsigned int rt, unsigned int bytes,
876 				int is_default_endian, int sign_extend)
877 {
878 	int idx, ret;
879 	bool host_swabbed;
880 
881 	/* Pity C doesn't have a logical XOR operator */
882 	if (kvmppc_need_byteswap(vcpu)) {
883 		host_swabbed = is_default_endian;
884 	} else {
885 		host_swabbed = !is_default_endian;
886 	}
887 
888 	if (bytes > sizeof(run->mmio.data)) {
889 		printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
890 		       run->mmio.len);
891 	}
892 
893 	run->mmio.phys_addr = vcpu->arch.paddr_accessed;
894 	run->mmio.len = bytes;
895 	run->mmio.is_write = 0;
896 
897 	vcpu->arch.io_gpr = rt;
898 	vcpu->arch.mmio_host_swabbed = host_swabbed;
899 	vcpu->mmio_needed = 1;
900 	vcpu->mmio_is_write = 0;
901 	vcpu->arch.mmio_sign_extend = sign_extend;
902 
903 	idx = srcu_read_lock(&vcpu->kvm->srcu);
904 
905 	ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
906 			      bytes, &run->mmio.data);
907 
908 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
909 
910 	if (!ret) {
911 		kvmppc_complete_mmio_load(vcpu, run);
912 		vcpu->mmio_needed = 0;
913 		return EMULATE_DONE;
914 	}
915 
916 	return EMULATE_DO_MMIO;
917 }
918 
919 int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
920 		       unsigned int rt, unsigned int bytes,
921 		       int is_default_endian)
922 {
923 	return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 0);
924 }
925 EXPORT_SYMBOL_GPL(kvmppc_handle_load);
926 
927 /* Same as above, but sign extends */
928 int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
929 			unsigned int rt, unsigned int bytes,
930 			int is_default_endian)
931 {
932 	return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 1);
933 }
934 
935 int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
936 			u64 val, unsigned int bytes, int is_default_endian)
937 {
938 	void *data = run->mmio.data;
939 	int idx, ret;
940 	bool host_swabbed;
941 
942 	/* Pity C doesn't have a logical XOR operator */
943 	if (kvmppc_need_byteswap(vcpu)) {
944 		host_swabbed = is_default_endian;
945 	} else {
946 		host_swabbed = !is_default_endian;
947 	}
948 
949 	if (bytes > sizeof(run->mmio.data)) {
950 		printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
951 		       run->mmio.len);
952 	}
953 
954 	run->mmio.phys_addr = vcpu->arch.paddr_accessed;
955 	run->mmio.len = bytes;
956 	run->mmio.is_write = 1;
957 	vcpu->mmio_needed = 1;
958 	vcpu->mmio_is_write = 1;
959 
960 	/* Store the value at the lowest bytes in 'data'. */
961 	if (!host_swabbed) {
962 		switch (bytes) {
963 		case 8: *(u64 *)data = val; break;
964 		case 4: *(u32 *)data = val; break;
965 		case 2: *(u16 *)data = val; break;
966 		case 1: *(u8  *)data = val; break;
967 		}
968 	} else {
969 		switch (bytes) {
970 		case 8: *(u64 *)data = swab64(val); break;
971 		case 4: *(u32 *)data = swab32(val); break;
972 		case 2: *(u16 *)data = swab16(val); break;
973 		case 1: *(u8  *)data = val; break;
974 		}
975 	}
976 
977 	idx = srcu_read_lock(&vcpu->kvm->srcu);
978 
979 	ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
980 			       bytes, &run->mmio.data);
981 
982 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
983 
984 	if (!ret) {
985 		vcpu->mmio_needed = 0;
986 		return EMULATE_DONE;
987 	}
988 
989 	return EMULATE_DO_MMIO;
990 }
991 EXPORT_SYMBOL_GPL(kvmppc_handle_store);
992 
993 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
994 {
995 	int r = 0;
996 	union kvmppc_one_reg val;
997 	int size;
998 
999 	size = one_reg_size(reg->id);
1000 	if (size > sizeof(val))
1001 		return -EINVAL;
1002 
1003 	r = kvmppc_get_one_reg(vcpu, reg->id, &val);
1004 	if (r == -EINVAL) {
1005 		r = 0;
1006 		switch (reg->id) {
1007 #ifdef CONFIG_ALTIVEC
1008 		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
1009 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
1010 				r = -ENXIO;
1011 				break;
1012 			}
1013 			val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
1014 			break;
1015 		case KVM_REG_PPC_VSCR:
1016 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
1017 				r = -ENXIO;
1018 				break;
1019 			}
1020 			val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
1021 			break;
1022 		case KVM_REG_PPC_VRSAVE:
1023 			val = get_reg_val(reg->id, vcpu->arch.vrsave);
1024 			break;
1025 #endif /* CONFIG_ALTIVEC */
1026 		default:
1027 			r = -EINVAL;
1028 			break;
1029 		}
1030 	}
1031 
1032 	if (r)
1033 		return r;
1034 
1035 	if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
1036 		r = -EFAULT;
1037 
1038 	return r;
1039 }
1040 
1041 int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
1042 {
1043 	int r;
1044 	union kvmppc_one_reg val;
1045 	int size;
1046 
1047 	size = one_reg_size(reg->id);
1048 	if (size > sizeof(val))
1049 		return -EINVAL;
1050 
1051 	if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
1052 		return -EFAULT;
1053 
1054 	r = kvmppc_set_one_reg(vcpu, reg->id, &val);
1055 	if (r == -EINVAL) {
1056 		r = 0;
1057 		switch (reg->id) {
1058 #ifdef CONFIG_ALTIVEC
1059 		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
1060 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
1061 				r = -ENXIO;
1062 				break;
1063 			}
1064 			vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
1065 			break;
1066 		case KVM_REG_PPC_VSCR:
1067 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
1068 				r = -ENXIO;
1069 				break;
1070 			}
1071 			vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
1072 			break;
1073 		case KVM_REG_PPC_VRSAVE:
1074 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
1075 				r = -ENXIO;
1076 				break;
1077 			}
1078 			vcpu->arch.vrsave = set_reg_val(reg->id, val);
1079 			break;
1080 #endif /* CONFIG_ALTIVEC */
1081 		default:
1082 			r = -EINVAL;
1083 			break;
1084 		}
1085 	}
1086 
1087 	return r;
1088 }
1089 
1090 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
1091 {
1092 	int r;
1093 	sigset_t sigsaved;
1094 
1095 	if (vcpu->sigset_active)
1096 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
1097 
1098 	if (vcpu->mmio_needed) {
1099 		if (!vcpu->mmio_is_write)
1100 			kvmppc_complete_mmio_load(vcpu, run);
1101 		vcpu->mmio_needed = 0;
1102 	} else if (vcpu->arch.osi_needed) {
1103 		u64 *gprs = run->osi.gprs;
1104 		int i;
1105 
1106 		for (i = 0; i < 32; i++)
1107 			kvmppc_set_gpr(vcpu, i, gprs[i]);
1108 		vcpu->arch.osi_needed = 0;
1109 	} else if (vcpu->arch.hcall_needed) {
1110 		int i;
1111 
1112 		kvmppc_set_gpr(vcpu, 3, run->papr_hcall.ret);
1113 		for (i = 0; i < 9; ++i)
1114 			kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]);
1115 		vcpu->arch.hcall_needed = 0;
1116 #ifdef CONFIG_BOOKE
1117 	} else if (vcpu->arch.epr_needed) {
1118 		kvmppc_set_epr(vcpu, run->epr.epr);
1119 		vcpu->arch.epr_needed = 0;
1120 #endif
1121 	}
1122 
1123 	if (run->immediate_exit)
1124 		r = -EINTR;
1125 	else
1126 		r = kvmppc_vcpu_run(run, vcpu);
1127 
1128 	if (vcpu->sigset_active)
1129 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1130 
1131 	return r;
1132 }
1133 
1134 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
1135 {
1136 	if (irq->irq == KVM_INTERRUPT_UNSET) {
1137 		kvmppc_core_dequeue_external(vcpu);
1138 		return 0;
1139 	}
1140 
1141 	kvmppc_core_queue_external(vcpu, irq);
1142 
1143 	kvm_vcpu_kick(vcpu);
1144 
1145 	return 0;
1146 }
1147 
1148 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
1149 				     struct kvm_enable_cap *cap)
1150 {
1151 	int r;
1152 
1153 	if (cap->flags)
1154 		return -EINVAL;
1155 
1156 	switch (cap->cap) {
1157 	case KVM_CAP_PPC_OSI:
1158 		r = 0;
1159 		vcpu->arch.osi_enabled = true;
1160 		break;
1161 	case KVM_CAP_PPC_PAPR:
1162 		r = 0;
1163 		vcpu->arch.papr_enabled = true;
1164 		break;
1165 	case KVM_CAP_PPC_EPR:
1166 		r = 0;
1167 		if (cap->args[0])
1168 			vcpu->arch.epr_flags |= KVMPPC_EPR_USER;
1169 		else
1170 			vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER;
1171 		break;
1172 #ifdef CONFIG_BOOKE
1173 	case KVM_CAP_PPC_BOOKE_WATCHDOG:
1174 		r = 0;
1175 		vcpu->arch.watchdog_enabled = true;
1176 		break;
1177 #endif
1178 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
1179 	case KVM_CAP_SW_TLB: {
1180 		struct kvm_config_tlb cfg;
1181 		void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0];
1182 
1183 		r = -EFAULT;
1184 		if (copy_from_user(&cfg, user_ptr, sizeof(cfg)))
1185 			break;
1186 
1187 		r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg);
1188 		break;
1189 	}
1190 #endif
1191 #ifdef CONFIG_KVM_MPIC
1192 	case KVM_CAP_IRQ_MPIC: {
1193 		struct fd f;
1194 		struct kvm_device *dev;
1195 
1196 		r = -EBADF;
1197 		f = fdget(cap->args[0]);
1198 		if (!f.file)
1199 			break;
1200 
1201 		r = -EPERM;
1202 		dev = kvm_device_from_filp(f.file);
1203 		if (dev)
1204 			r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
1205 
1206 		fdput(f);
1207 		break;
1208 	}
1209 #endif
1210 #ifdef CONFIG_KVM_XICS
1211 	case KVM_CAP_IRQ_XICS: {
1212 		struct fd f;
1213 		struct kvm_device *dev;
1214 
1215 		r = -EBADF;
1216 		f = fdget(cap->args[0]);
1217 		if (!f.file)
1218 			break;
1219 
1220 		r = -EPERM;
1221 		dev = kvm_device_from_filp(f.file);
1222 		if (dev)
1223 			r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
1224 
1225 		fdput(f);
1226 		break;
1227 	}
1228 #endif /* CONFIG_KVM_XICS */
1229 	default:
1230 		r = -EINVAL;
1231 		break;
1232 	}
1233 
1234 	if (!r)
1235 		r = kvmppc_sanity_check(vcpu);
1236 
1237 	return r;
1238 }
1239 
1240 bool kvm_arch_intc_initialized(struct kvm *kvm)
1241 {
1242 #ifdef CONFIG_KVM_MPIC
1243 	if (kvm->arch.mpic)
1244 		return true;
1245 #endif
1246 #ifdef CONFIG_KVM_XICS
1247 	if (kvm->arch.xics)
1248 		return true;
1249 #endif
1250 	return false;
1251 }
1252 
1253 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1254                                     struct kvm_mp_state *mp_state)
1255 {
1256 	return -EINVAL;
1257 }
1258 
1259 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1260                                     struct kvm_mp_state *mp_state)
1261 {
1262 	return -EINVAL;
1263 }
1264 
1265 long kvm_arch_vcpu_ioctl(struct file *filp,
1266                          unsigned int ioctl, unsigned long arg)
1267 {
1268 	struct kvm_vcpu *vcpu = filp->private_data;
1269 	void __user *argp = (void __user *)arg;
1270 	long r;
1271 
1272 	switch (ioctl) {
1273 	case KVM_INTERRUPT: {
1274 		struct kvm_interrupt irq;
1275 		r = -EFAULT;
1276 		if (copy_from_user(&irq, argp, sizeof(irq)))
1277 			goto out;
1278 		r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
1279 		goto out;
1280 	}
1281 
1282 	case KVM_ENABLE_CAP:
1283 	{
1284 		struct kvm_enable_cap cap;
1285 		r = -EFAULT;
1286 		if (copy_from_user(&cap, argp, sizeof(cap)))
1287 			goto out;
1288 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
1289 		break;
1290 	}
1291 
1292 	case KVM_SET_ONE_REG:
1293 	case KVM_GET_ONE_REG:
1294 	{
1295 		struct kvm_one_reg reg;
1296 		r = -EFAULT;
1297 		if (copy_from_user(&reg, argp, sizeof(reg)))
1298 			goto out;
1299 		if (ioctl == KVM_SET_ONE_REG)
1300 			r = kvm_vcpu_ioctl_set_one_reg(vcpu, &reg);
1301 		else
1302 			r = kvm_vcpu_ioctl_get_one_reg(vcpu, &reg);
1303 		break;
1304 	}
1305 
1306 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
1307 	case KVM_DIRTY_TLB: {
1308 		struct kvm_dirty_tlb dirty;
1309 		r = -EFAULT;
1310 		if (copy_from_user(&dirty, argp, sizeof(dirty)))
1311 			goto out;
1312 		r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
1313 		break;
1314 	}
1315 #endif
1316 	default:
1317 		r = -EINVAL;
1318 	}
1319 
1320 out:
1321 	return r;
1322 }
1323 
1324 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1325 {
1326 	return VM_FAULT_SIGBUS;
1327 }
1328 
1329 static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
1330 {
1331 	u32 inst_nop = 0x60000000;
1332 #ifdef CONFIG_KVM_BOOKE_HV
1333 	u32 inst_sc1 = 0x44000022;
1334 	pvinfo->hcall[0] = cpu_to_be32(inst_sc1);
1335 	pvinfo->hcall[1] = cpu_to_be32(inst_nop);
1336 	pvinfo->hcall[2] = cpu_to_be32(inst_nop);
1337 	pvinfo->hcall[3] = cpu_to_be32(inst_nop);
1338 #else
1339 	u32 inst_lis = 0x3c000000;
1340 	u32 inst_ori = 0x60000000;
1341 	u32 inst_sc = 0x44000002;
1342 	u32 inst_imm_mask = 0xffff;
1343 
1344 	/*
1345 	 * The hypercall to get into KVM from within guest context is as
1346 	 * follows:
1347 	 *
1348 	 *    lis r0, r0, KVM_SC_MAGIC_R0@h
1349 	 *    ori r0, KVM_SC_MAGIC_R0@l
1350 	 *    sc
1351 	 *    nop
1352 	 */
1353 	pvinfo->hcall[0] = cpu_to_be32(inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask));
1354 	pvinfo->hcall[1] = cpu_to_be32(inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask));
1355 	pvinfo->hcall[2] = cpu_to_be32(inst_sc);
1356 	pvinfo->hcall[3] = cpu_to_be32(inst_nop);
1357 #endif
1358 
1359 	pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
1360 
1361 	return 0;
1362 }
1363 
1364 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
1365 			  bool line_status)
1366 {
1367 	if (!irqchip_in_kernel(kvm))
1368 		return -ENXIO;
1369 
1370 	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
1371 					irq_event->irq, irq_event->level,
1372 					line_status);
1373 	return 0;
1374 }
1375 
1376 
1377 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
1378 				   struct kvm_enable_cap *cap)
1379 {
1380 	int r;
1381 
1382 	if (cap->flags)
1383 		return -EINVAL;
1384 
1385 	switch (cap->cap) {
1386 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
1387 	case KVM_CAP_PPC_ENABLE_HCALL: {
1388 		unsigned long hcall = cap->args[0];
1389 
1390 		r = -EINVAL;
1391 		if (hcall > MAX_HCALL_OPCODE || (hcall & 3) ||
1392 		    cap->args[1] > 1)
1393 			break;
1394 		if (!kvmppc_book3s_hcall_implemented(kvm, hcall))
1395 			break;
1396 		if (cap->args[1])
1397 			set_bit(hcall / 4, kvm->arch.enabled_hcalls);
1398 		else
1399 			clear_bit(hcall / 4, kvm->arch.enabled_hcalls);
1400 		r = 0;
1401 		break;
1402 	}
1403 #endif
1404 	default:
1405 		r = -EINVAL;
1406 		break;
1407 	}
1408 
1409 	return r;
1410 }
1411 
1412 long kvm_arch_vm_ioctl(struct file *filp,
1413                        unsigned int ioctl, unsigned long arg)
1414 {
1415 	struct kvm *kvm __maybe_unused = filp->private_data;
1416 	void __user *argp = (void __user *)arg;
1417 	long r;
1418 
1419 	switch (ioctl) {
1420 	case KVM_PPC_GET_PVINFO: {
1421 		struct kvm_ppc_pvinfo pvinfo;
1422 		memset(&pvinfo, 0, sizeof(pvinfo));
1423 		r = kvm_vm_ioctl_get_pvinfo(&pvinfo);
1424 		if (copy_to_user(argp, &pvinfo, sizeof(pvinfo))) {
1425 			r = -EFAULT;
1426 			goto out;
1427 		}
1428 
1429 		break;
1430 	}
1431 	case KVM_ENABLE_CAP:
1432 	{
1433 		struct kvm_enable_cap cap;
1434 		r = -EFAULT;
1435 		if (copy_from_user(&cap, argp, sizeof(cap)))
1436 			goto out;
1437 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1438 		break;
1439 	}
1440 #ifdef CONFIG_PPC_BOOK3S_64
1441 	case KVM_CREATE_SPAPR_TCE_64: {
1442 		struct kvm_create_spapr_tce_64 create_tce_64;
1443 
1444 		r = -EFAULT;
1445 		if (copy_from_user(&create_tce_64, argp, sizeof(create_tce_64)))
1446 			goto out;
1447 		if (create_tce_64.flags) {
1448 			r = -EINVAL;
1449 			goto out;
1450 		}
1451 		r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
1452 		goto out;
1453 	}
1454 	case KVM_CREATE_SPAPR_TCE: {
1455 		struct kvm_create_spapr_tce create_tce;
1456 		struct kvm_create_spapr_tce_64 create_tce_64;
1457 
1458 		r = -EFAULT;
1459 		if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
1460 			goto out;
1461 
1462 		create_tce_64.liobn = create_tce.liobn;
1463 		create_tce_64.page_shift = IOMMU_PAGE_SHIFT_4K;
1464 		create_tce_64.offset = 0;
1465 		create_tce_64.size = create_tce.window_size >>
1466 				IOMMU_PAGE_SHIFT_4K;
1467 		create_tce_64.flags = 0;
1468 		r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
1469 		goto out;
1470 	}
1471 	case KVM_PPC_GET_SMMU_INFO: {
1472 		struct kvm_ppc_smmu_info info;
1473 		struct kvm *kvm = filp->private_data;
1474 
1475 		memset(&info, 0, sizeof(info));
1476 		r = kvm->arch.kvm_ops->get_smmu_info(kvm, &info);
1477 		if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
1478 			r = -EFAULT;
1479 		break;
1480 	}
1481 	case KVM_PPC_RTAS_DEFINE_TOKEN: {
1482 		struct kvm *kvm = filp->private_data;
1483 
1484 		r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
1485 		break;
1486 	}
1487 	case KVM_PPC_CONFIGURE_V3_MMU: {
1488 		struct kvm *kvm = filp->private_data;
1489 		struct kvm_ppc_mmuv3_cfg cfg;
1490 
1491 		r = -EINVAL;
1492 		if (!kvm->arch.kvm_ops->configure_mmu)
1493 			goto out;
1494 		r = -EFAULT;
1495 		if (copy_from_user(&cfg, argp, sizeof(cfg)))
1496 			goto out;
1497 		r = kvm->arch.kvm_ops->configure_mmu(kvm, &cfg);
1498 		break;
1499 	}
1500 	case KVM_PPC_GET_RMMU_INFO: {
1501 		struct kvm *kvm = filp->private_data;
1502 		struct kvm_ppc_rmmu_info info;
1503 
1504 		r = -EINVAL;
1505 		if (!kvm->arch.kvm_ops->get_rmmu_info)
1506 			goto out;
1507 		r = kvm->arch.kvm_ops->get_rmmu_info(kvm, &info);
1508 		if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
1509 			r = -EFAULT;
1510 		break;
1511 	}
1512 	default: {
1513 		struct kvm *kvm = filp->private_data;
1514 		r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
1515 	}
1516 #else /* CONFIG_PPC_BOOK3S_64 */
1517 	default:
1518 		r = -ENOTTY;
1519 #endif
1520 	}
1521 out:
1522 	return r;
1523 }
1524 
1525 static unsigned long lpid_inuse[BITS_TO_LONGS(KVMPPC_NR_LPIDS)];
1526 static unsigned long nr_lpids;
1527 
1528 long kvmppc_alloc_lpid(void)
1529 {
1530 	long lpid;
1531 
1532 	do {
1533 		lpid = find_first_zero_bit(lpid_inuse, KVMPPC_NR_LPIDS);
1534 		if (lpid >= nr_lpids) {
1535 			pr_err("%s: No LPIDs free\n", __func__);
1536 			return -ENOMEM;
1537 		}
1538 	} while (test_and_set_bit(lpid, lpid_inuse));
1539 
1540 	return lpid;
1541 }
1542 EXPORT_SYMBOL_GPL(kvmppc_alloc_lpid);
1543 
1544 void kvmppc_claim_lpid(long lpid)
1545 {
1546 	set_bit(lpid, lpid_inuse);
1547 }
1548 EXPORT_SYMBOL_GPL(kvmppc_claim_lpid);
1549 
1550 void kvmppc_free_lpid(long lpid)
1551 {
1552 	clear_bit(lpid, lpid_inuse);
1553 }
1554 EXPORT_SYMBOL_GPL(kvmppc_free_lpid);
1555 
1556 void kvmppc_init_lpid(unsigned long nr_lpids_param)
1557 {
1558 	nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param);
1559 	memset(lpid_inuse, 0, sizeof(lpid_inuse));
1560 }
1561 EXPORT_SYMBOL_GPL(kvmppc_init_lpid);
1562 
1563 int kvm_arch_init(void *opaque)
1564 {
1565 	return 0;
1566 }
1567 
1568 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
1569