xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision f7d84fa7)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 
34 #include <asm/asm-offsets.h>
35 #include <asm/lowcore.h>
36 #include <asm/stp.h>
37 #include <asm/pgtable.h>
38 #include <asm/gmap.h>
39 #include <asm/nmi.h>
40 #include <asm/switch_to.h>
41 #include <asm/isc.h>
42 #include <asm/sclp.h>
43 #include <asm/cpacf.h>
44 #include <asm/timex.h>
45 #include "kvm-s390.h"
46 #include "gaccess.h"
47 
48 #define KMSG_COMPONENT "kvm-s390"
49 #undef pr_fmt
50 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
51 
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55 
56 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
60 
61 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
62 
63 struct kvm_stats_debugfs_item debugfs_entries[] = {
64 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
65 	{ "exit_null", VCPU_STAT(exit_null) },
66 	{ "exit_validity", VCPU_STAT(exit_validity) },
67 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
68 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
69 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
71 	{ "exit_pei", VCPU_STAT(exit_pei) },
72 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
79 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
81 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
82 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
83 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
84 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
85 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
86 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
87 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
88 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
89 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
90 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
91 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
92 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
93 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
94 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
95 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
96 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
97 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
98 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
99 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
100 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
101 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
102 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
103 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
104 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
105 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
106 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
107 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
108 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
109 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
110 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
111 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
112 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
113 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
114 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
115 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
116 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
117 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
118 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
119 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
120 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
121 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
122 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
123 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
124 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
125 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
126 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
127 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
128 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
129 	{ NULL }
130 };
131 
132 /* allow nested virtualization in KVM (if enabled by user space) */
133 static int nested;
134 module_param(nested, int, S_IRUGO);
135 MODULE_PARM_DESC(nested, "Nested virtualization support");
136 
137 /* upper facilities limit for kvm */
138 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
139 
140 unsigned long kvm_s390_fac_list_mask_size(void)
141 {
142 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
143 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
144 }
145 
146 /* available cpu features supported by kvm */
147 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
148 /* available subfunctions indicated via query / "test bit" */
149 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
150 
151 static struct gmap_notifier gmap_notifier;
152 static struct gmap_notifier vsie_gmap_notifier;
153 debug_info_t *kvm_s390_dbf;
154 
155 /* Section: not file related */
156 int kvm_arch_hardware_enable(void)
157 {
158 	/* every s390 is virtualization enabled ;-) */
159 	return 0;
160 }
161 
162 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
163 			      unsigned long end);
164 
165 /*
166  * This callback is executed during stop_machine(). All CPUs are therefore
167  * temporarily stopped. In order not to change guest behavior, we have to
168  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
169  * so a CPU won't be stopped while calculating with the epoch.
170  */
171 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
172 			  void *v)
173 {
174 	struct kvm *kvm;
175 	struct kvm_vcpu *vcpu;
176 	int i;
177 	unsigned long long *delta = v;
178 
179 	list_for_each_entry(kvm, &vm_list, vm_list) {
180 		kvm->arch.epoch -= *delta;
181 		kvm_for_each_vcpu(i, vcpu, kvm) {
182 			vcpu->arch.sie_block->epoch -= *delta;
183 			if (vcpu->arch.cputm_enabled)
184 				vcpu->arch.cputm_start += *delta;
185 			if (vcpu->arch.vsie_block)
186 				vcpu->arch.vsie_block->epoch -= *delta;
187 		}
188 	}
189 	return NOTIFY_OK;
190 }
191 
192 static struct notifier_block kvm_clock_notifier = {
193 	.notifier_call = kvm_clock_sync,
194 };
195 
196 int kvm_arch_hardware_setup(void)
197 {
198 	gmap_notifier.notifier_call = kvm_gmap_notifier;
199 	gmap_register_pte_notifier(&gmap_notifier);
200 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
201 	gmap_register_pte_notifier(&vsie_gmap_notifier);
202 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
203 				       &kvm_clock_notifier);
204 	return 0;
205 }
206 
207 void kvm_arch_hardware_unsetup(void)
208 {
209 	gmap_unregister_pte_notifier(&gmap_notifier);
210 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
211 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
212 					 &kvm_clock_notifier);
213 }
214 
215 static void allow_cpu_feat(unsigned long nr)
216 {
217 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
218 }
219 
220 static inline int plo_test_bit(unsigned char nr)
221 {
222 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
223 	int cc;
224 
225 	asm volatile(
226 		/* Parameter registers are ignored for "test bit" */
227 		"	plo	0,0,0,0(0)\n"
228 		"	ipm	%0\n"
229 		"	srl	%0,28\n"
230 		: "=d" (cc)
231 		: "d" (r0)
232 		: "cc");
233 	return cc == 0;
234 }
235 
236 static void kvm_s390_cpu_feat_init(void)
237 {
238 	int i;
239 
240 	for (i = 0; i < 256; ++i) {
241 		if (plo_test_bit(i))
242 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
243 	}
244 
245 	if (test_facility(28)) /* TOD-clock steering */
246 		ptff(kvm_s390_available_subfunc.ptff,
247 		     sizeof(kvm_s390_available_subfunc.ptff),
248 		     PTFF_QAF);
249 
250 	if (test_facility(17)) { /* MSA */
251 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
252 			      kvm_s390_available_subfunc.kmac);
253 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
254 			      kvm_s390_available_subfunc.kmc);
255 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
256 			      kvm_s390_available_subfunc.km);
257 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
258 			      kvm_s390_available_subfunc.kimd);
259 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
260 			      kvm_s390_available_subfunc.klmd);
261 	}
262 	if (test_facility(76)) /* MSA3 */
263 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
264 			      kvm_s390_available_subfunc.pckmo);
265 	if (test_facility(77)) { /* MSA4 */
266 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
267 			      kvm_s390_available_subfunc.kmctr);
268 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
269 			      kvm_s390_available_subfunc.kmf);
270 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
271 			      kvm_s390_available_subfunc.kmo);
272 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
273 			      kvm_s390_available_subfunc.pcc);
274 	}
275 	if (test_facility(57)) /* MSA5 */
276 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
277 			      kvm_s390_available_subfunc.ppno);
278 
279 	if (test_facility(146)) /* MSA8 */
280 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
281 			      kvm_s390_available_subfunc.kma);
282 
283 	if (MACHINE_HAS_ESOP)
284 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
285 	/*
286 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
287 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
288 	 */
289 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
290 	    !test_facility(3) || !nested)
291 		return;
292 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
293 	if (sclp.has_64bscao)
294 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
295 	if (sclp.has_siif)
296 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
297 	if (sclp.has_gpere)
298 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
299 	if (sclp.has_gsls)
300 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
301 	if (sclp.has_ib)
302 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
303 	if (sclp.has_cei)
304 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
305 	if (sclp.has_ibs)
306 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
307 	if (sclp.has_kss)
308 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
309 	/*
310 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
311 	 * all skey handling functions read/set the skey from the PGSTE
312 	 * instead of the real storage key.
313 	 *
314 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
315 	 * pages being detected as preserved although they are resident.
316 	 *
317 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
318 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
319 	 *
320 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
321 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
322 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
323 	 *
324 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
325 	 * cannot easily shadow the SCA because of the ipte lock.
326 	 */
327 }
328 
329 int kvm_arch_init(void *opaque)
330 {
331 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
332 	if (!kvm_s390_dbf)
333 		return -ENOMEM;
334 
335 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
336 		debug_unregister(kvm_s390_dbf);
337 		return -ENOMEM;
338 	}
339 
340 	kvm_s390_cpu_feat_init();
341 
342 	/* Register floating interrupt controller interface. */
343 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
344 }
345 
346 void kvm_arch_exit(void)
347 {
348 	debug_unregister(kvm_s390_dbf);
349 }
350 
351 /* Section: device related */
352 long kvm_arch_dev_ioctl(struct file *filp,
353 			unsigned int ioctl, unsigned long arg)
354 {
355 	if (ioctl == KVM_S390_ENABLE_SIE)
356 		return s390_enable_sie();
357 	return -EINVAL;
358 }
359 
360 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
361 {
362 	int r;
363 
364 	switch (ext) {
365 	case KVM_CAP_S390_PSW:
366 	case KVM_CAP_S390_GMAP:
367 	case KVM_CAP_SYNC_MMU:
368 #ifdef CONFIG_KVM_S390_UCONTROL
369 	case KVM_CAP_S390_UCONTROL:
370 #endif
371 	case KVM_CAP_ASYNC_PF:
372 	case KVM_CAP_SYNC_REGS:
373 	case KVM_CAP_ONE_REG:
374 	case KVM_CAP_ENABLE_CAP:
375 	case KVM_CAP_S390_CSS_SUPPORT:
376 	case KVM_CAP_IOEVENTFD:
377 	case KVM_CAP_DEVICE_CTRL:
378 	case KVM_CAP_ENABLE_CAP_VM:
379 	case KVM_CAP_S390_IRQCHIP:
380 	case KVM_CAP_VM_ATTRIBUTES:
381 	case KVM_CAP_MP_STATE:
382 	case KVM_CAP_IMMEDIATE_EXIT:
383 	case KVM_CAP_S390_INJECT_IRQ:
384 	case KVM_CAP_S390_USER_SIGP:
385 	case KVM_CAP_S390_USER_STSI:
386 	case KVM_CAP_S390_SKEYS:
387 	case KVM_CAP_S390_IRQ_STATE:
388 	case KVM_CAP_S390_USER_INSTR0:
389 	case KVM_CAP_S390_AIS:
390 		r = 1;
391 		break;
392 	case KVM_CAP_S390_MEM_OP:
393 		r = MEM_OP_MAX_SIZE;
394 		break;
395 	case KVM_CAP_NR_VCPUS:
396 	case KVM_CAP_MAX_VCPUS:
397 		r = KVM_S390_BSCA_CPU_SLOTS;
398 		if (!kvm_s390_use_sca_entries())
399 			r = KVM_MAX_VCPUS;
400 		else if (sclp.has_esca && sclp.has_64bscao)
401 			r = KVM_S390_ESCA_CPU_SLOTS;
402 		break;
403 	case KVM_CAP_NR_MEMSLOTS:
404 		r = KVM_USER_MEM_SLOTS;
405 		break;
406 	case KVM_CAP_S390_COW:
407 		r = MACHINE_HAS_ESOP;
408 		break;
409 	case KVM_CAP_S390_VECTOR_REGISTERS:
410 		r = MACHINE_HAS_VX;
411 		break;
412 	case KVM_CAP_S390_RI:
413 		r = test_facility(64);
414 		break;
415 	case KVM_CAP_S390_GS:
416 		r = test_facility(133);
417 		break;
418 	default:
419 		r = 0;
420 	}
421 	return r;
422 }
423 
424 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
425 					struct kvm_memory_slot *memslot)
426 {
427 	gfn_t cur_gfn, last_gfn;
428 	unsigned long address;
429 	struct gmap *gmap = kvm->arch.gmap;
430 
431 	/* Loop over all guest pages */
432 	last_gfn = memslot->base_gfn + memslot->npages;
433 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
434 		address = gfn_to_hva_memslot(memslot, cur_gfn);
435 
436 		if (test_and_clear_guest_dirty(gmap->mm, address))
437 			mark_page_dirty(kvm, cur_gfn);
438 		if (fatal_signal_pending(current))
439 			return;
440 		cond_resched();
441 	}
442 }
443 
444 /* Section: vm related */
445 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
446 
447 /*
448  * Get (and clear) the dirty memory log for a memory slot.
449  */
450 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
451 			       struct kvm_dirty_log *log)
452 {
453 	int r;
454 	unsigned long n;
455 	struct kvm_memslots *slots;
456 	struct kvm_memory_slot *memslot;
457 	int is_dirty = 0;
458 
459 	if (kvm_is_ucontrol(kvm))
460 		return -EINVAL;
461 
462 	mutex_lock(&kvm->slots_lock);
463 
464 	r = -EINVAL;
465 	if (log->slot >= KVM_USER_MEM_SLOTS)
466 		goto out;
467 
468 	slots = kvm_memslots(kvm);
469 	memslot = id_to_memslot(slots, log->slot);
470 	r = -ENOENT;
471 	if (!memslot->dirty_bitmap)
472 		goto out;
473 
474 	kvm_s390_sync_dirty_log(kvm, memslot);
475 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
476 	if (r)
477 		goto out;
478 
479 	/* Clear the dirty log */
480 	if (is_dirty) {
481 		n = kvm_dirty_bitmap_bytes(memslot);
482 		memset(memslot->dirty_bitmap, 0, n);
483 	}
484 	r = 0;
485 out:
486 	mutex_unlock(&kvm->slots_lock);
487 	return r;
488 }
489 
490 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
491 {
492 	unsigned int i;
493 	struct kvm_vcpu *vcpu;
494 
495 	kvm_for_each_vcpu(i, vcpu, kvm) {
496 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
497 	}
498 }
499 
500 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
501 {
502 	int r;
503 
504 	if (cap->flags)
505 		return -EINVAL;
506 
507 	switch (cap->cap) {
508 	case KVM_CAP_S390_IRQCHIP:
509 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
510 		kvm->arch.use_irqchip = 1;
511 		r = 0;
512 		break;
513 	case KVM_CAP_S390_USER_SIGP:
514 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
515 		kvm->arch.user_sigp = 1;
516 		r = 0;
517 		break;
518 	case KVM_CAP_S390_VECTOR_REGISTERS:
519 		mutex_lock(&kvm->lock);
520 		if (kvm->created_vcpus) {
521 			r = -EBUSY;
522 		} else if (MACHINE_HAS_VX) {
523 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
524 			set_kvm_facility(kvm->arch.model.fac_list, 129);
525 			if (test_facility(134)) {
526 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
527 				set_kvm_facility(kvm->arch.model.fac_list, 134);
528 			}
529 			if (test_facility(135)) {
530 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
531 				set_kvm_facility(kvm->arch.model.fac_list, 135);
532 			}
533 			r = 0;
534 		} else
535 			r = -EINVAL;
536 		mutex_unlock(&kvm->lock);
537 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
538 			 r ? "(not available)" : "(success)");
539 		break;
540 	case KVM_CAP_S390_RI:
541 		r = -EINVAL;
542 		mutex_lock(&kvm->lock);
543 		if (kvm->created_vcpus) {
544 			r = -EBUSY;
545 		} else if (test_facility(64)) {
546 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
547 			set_kvm_facility(kvm->arch.model.fac_list, 64);
548 			r = 0;
549 		}
550 		mutex_unlock(&kvm->lock);
551 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
552 			 r ? "(not available)" : "(success)");
553 		break;
554 	case KVM_CAP_S390_AIS:
555 		mutex_lock(&kvm->lock);
556 		if (kvm->created_vcpus) {
557 			r = -EBUSY;
558 		} else {
559 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
560 			set_kvm_facility(kvm->arch.model.fac_list, 72);
561 			r = 0;
562 		}
563 		mutex_unlock(&kvm->lock);
564 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
565 			 r ? "(not available)" : "(success)");
566 		break;
567 	case KVM_CAP_S390_GS:
568 		r = -EINVAL;
569 		mutex_lock(&kvm->lock);
570 		if (atomic_read(&kvm->online_vcpus)) {
571 			r = -EBUSY;
572 		} else if (test_facility(133)) {
573 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
574 			set_kvm_facility(kvm->arch.model.fac_list, 133);
575 			r = 0;
576 		}
577 		mutex_unlock(&kvm->lock);
578 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
579 			 r ? "(not available)" : "(success)");
580 		break;
581 	case KVM_CAP_S390_USER_STSI:
582 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
583 		kvm->arch.user_stsi = 1;
584 		r = 0;
585 		break;
586 	case KVM_CAP_S390_USER_INSTR0:
587 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
588 		kvm->arch.user_instr0 = 1;
589 		icpt_operexc_on_all_vcpus(kvm);
590 		r = 0;
591 		break;
592 	default:
593 		r = -EINVAL;
594 		break;
595 	}
596 	return r;
597 }
598 
599 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
600 {
601 	int ret;
602 
603 	switch (attr->attr) {
604 	case KVM_S390_VM_MEM_LIMIT_SIZE:
605 		ret = 0;
606 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
607 			 kvm->arch.mem_limit);
608 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
609 			ret = -EFAULT;
610 		break;
611 	default:
612 		ret = -ENXIO;
613 		break;
614 	}
615 	return ret;
616 }
617 
618 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
619 {
620 	int ret;
621 	unsigned int idx;
622 	switch (attr->attr) {
623 	case KVM_S390_VM_MEM_ENABLE_CMMA:
624 		ret = -ENXIO;
625 		if (!sclp.has_cmma)
626 			break;
627 
628 		ret = -EBUSY;
629 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
630 		mutex_lock(&kvm->lock);
631 		if (!kvm->created_vcpus) {
632 			kvm->arch.use_cmma = 1;
633 			ret = 0;
634 		}
635 		mutex_unlock(&kvm->lock);
636 		break;
637 	case KVM_S390_VM_MEM_CLR_CMMA:
638 		ret = -ENXIO;
639 		if (!sclp.has_cmma)
640 			break;
641 		ret = -EINVAL;
642 		if (!kvm->arch.use_cmma)
643 			break;
644 
645 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
646 		mutex_lock(&kvm->lock);
647 		idx = srcu_read_lock(&kvm->srcu);
648 		s390_reset_cmma(kvm->arch.gmap->mm);
649 		srcu_read_unlock(&kvm->srcu, idx);
650 		mutex_unlock(&kvm->lock);
651 		ret = 0;
652 		break;
653 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
654 		unsigned long new_limit;
655 
656 		if (kvm_is_ucontrol(kvm))
657 			return -EINVAL;
658 
659 		if (get_user(new_limit, (u64 __user *)attr->addr))
660 			return -EFAULT;
661 
662 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
663 		    new_limit > kvm->arch.mem_limit)
664 			return -E2BIG;
665 
666 		if (!new_limit)
667 			return -EINVAL;
668 
669 		/* gmap_create takes last usable address */
670 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
671 			new_limit -= 1;
672 
673 		ret = -EBUSY;
674 		mutex_lock(&kvm->lock);
675 		if (!kvm->created_vcpus) {
676 			/* gmap_create will round the limit up */
677 			struct gmap *new = gmap_create(current->mm, new_limit);
678 
679 			if (!new) {
680 				ret = -ENOMEM;
681 			} else {
682 				gmap_remove(kvm->arch.gmap);
683 				new->private = kvm;
684 				kvm->arch.gmap = new;
685 				ret = 0;
686 			}
687 		}
688 		mutex_unlock(&kvm->lock);
689 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
690 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
691 			 (void *) kvm->arch.gmap->asce);
692 		break;
693 	}
694 	default:
695 		ret = -ENXIO;
696 		break;
697 	}
698 	return ret;
699 }
700 
701 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
702 
703 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
704 {
705 	struct kvm_vcpu *vcpu;
706 	int i;
707 
708 	if (!test_kvm_facility(kvm, 76))
709 		return -EINVAL;
710 
711 	mutex_lock(&kvm->lock);
712 	switch (attr->attr) {
713 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
714 		get_random_bytes(
715 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
716 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
717 		kvm->arch.crypto.aes_kw = 1;
718 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
719 		break;
720 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
721 		get_random_bytes(
722 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
723 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
724 		kvm->arch.crypto.dea_kw = 1;
725 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
726 		break;
727 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
728 		kvm->arch.crypto.aes_kw = 0;
729 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
730 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
731 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
732 		break;
733 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
734 		kvm->arch.crypto.dea_kw = 0;
735 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
736 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
737 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
738 		break;
739 	default:
740 		mutex_unlock(&kvm->lock);
741 		return -ENXIO;
742 	}
743 
744 	kvm_for_each_vcpu(i, vcpu, kvm) {
745 		kvm_s390_vcpu_crypto_setup(vcpu);
746 		exit_sie(vcpu);
747 	}
748 	mutex_unlock(&kvm->lock);
749 	return 0;
750 }
751 
752 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
753 {
754 	u8 gtod_high;
755 
756 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
757 					   sizeof(gtod_high)))
758 		return -EFAULT;
759 
760 	if (gtod_high != 0)
761 		return -EINVAL;
762 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
763 
764 	return 0;
765 }
766 
767 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
768 {
769 	u64 gtod;
770 
771 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
772 		return -EFAULT;
773 
774 	kvm_s390_set_tod_clock(kvm, gtod);
775 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
776 	return 0;
777 }
778 
779 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
780 {
781 	int ret;
782 
783 	if (attr->flags)
784 		return -EINVAL;
785 
786 	switch (attr->attr) {
787 	case KVM_S390_VM_TOD_HIGH:
788 		ret = kvm_s390_set_tod_high(kvm, attr);
789 		break;
790 	case KVM_S390_VM_TOD_LOW:
791 		ret = kvm_s390_set_tod_low(kvm, attr);
792 		break;
793 	default:
794 		ret = -ENXIO;
795 		break;
796 	}
797 	return ret;
798 }
799 
800 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
801 {
802 	u8 gtod_high = 0;
803 
804 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
805 					 sizeof(gtod_high)))
806 		return -EFAULT;
807 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
808 
809 	return 0;
810 }
811 
812 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
813 {
814 	u64 gtod;
815 
816 	gtod = kvm_s390_get_tod_clock_fast(kvm);
817 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
818 		return -EFAULT;
819 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
820 
821 	return 0;
822 }
823 
824 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
825 {
826 	int ret;
827 
828 	if (attr->flags)
829 		return -EINVAL;
830 
831 	switch (attr->attr) {
832 	case KVM_S390_VM_TOD_HIGH:
833 		ret = kvm_s390_get_tod_high(kvm, attr);
834 		break;
835 	case KVM_S390_VM_TOD_LOW:
836 		ret = kvm_s390_get_tod_low(kvm, attr);
837 		break;
838 	default:
839 		ret = -ENXIO;
840 		break;
841 	}
842 	return ret;
843 }
844 
845 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
846 {
847 	struct kvm_s390_vm_cpu_processor *proc;
848 	u16 lowest_ibc, unblocked_ibc;
849 	int ret = 0;
850 
851 	mutex_lock(&kvm->lock);
852 	if (kvm->created_vcpus) {
853 		ret = -EBUSY;
854 		goto out;
855 	}
856 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
857 	if (!proc) {
858 		ret = -ENOMEM;
859 		goto out;
860 	}
861 	if (!copy_from_user(proc, (void __user *)attr->addr,
862 			    sizeof(*proc))) {
863 		kvm->arch.model.cpuid = proc->cpuid;
864 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
865 		unblocked_ibc = sclp.ibc & 0xfff;
866 		if (lowest_ibc && proc->ibc) {
867 			if (proc->ibc > unblocked_ibc)
868 				kvm->arch.model.ibc = unblocked_ibc;
869 			else if (proc->ibc < lowest_ibc)
870 				kvm->arch.model.ibc = lowest_ibc;
871 			else
872 				kvm->arch.model.ibc = proc->ibc;
873 		}
874 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
875 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
876 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
877 			 kvm->arch.model.ibc,
878 			 kvm->arch.model.cpuid);
879 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
880 			 kvm->arch.model.fac_list[0],
881 			 kvm->arch.model.fac_list[1],
882 			 kvm->arch.model.fac_list[2]);
883 	} else
884 		ret = -EFAULT;
885 	kfree(proc);
886 out:
887 	mutex_unlock(&kvm->lock);
888 	return ret;
889 }
890 
891 static int kvm_s390_set_processor_feat(struct kvm *kvm,
892 				       struct kvm_device_attr *attr)
893 {
894 	struct kvm_s390_vm_cpu_feat data;
895 	int ret = -EBUSY;
896 
897 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
898 		return -EFAULT;
899 	if (!bitmap_subset((unsigned long *) data.feat,
900 			   kvm_s390_available_cpu_feat,
901 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
902 		return -EINVAL;
903 
904 	mutex_lock(&kvm->lock);
905 	if (!atomic_read(&kvm->online_vcpus)) {
906 		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
907 			    KVM_S390_VM_CPU_FEAT_NR_BITS);
908 		ret = 0;
909 	}
910 	mutex_unlock(&kvm->lock);
911 	return ret;
912 }
913 
914 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
915 					  struct kvm_device_attr *attr)
916 {
917 	/*
918 	 * Once supported by kernel + hw, we have to store the subfunctions
919 	 * in kvm->arch and remember that user space configured them.
920 	 */
921 	return -ENXIO;
922 }
923 
924 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
925 {
926 	int ret = -ENXIO;
927 
928 	switch (attr->attr) {
929 	case KVM_S390_VM_CPU_PROCESSOR:
930 		ret = kvm_s390_set_processor(kvm, attr);
931 		break;
932 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
933 		ret = kvm_s390_set_processor_feat(kvm, attr);
934 		break;
935 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
936 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
937 		break;
938 	}
939 	return ret;
940 }
941 
942 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
943 {
944 	struct kvm_s390_vm_cpu_processor *proc;
945 	int ret = 0;
946 
947 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
948 	if (!proc) {
949 		ret = -ENOMEM;
950 		goto out;
951 	}
952 	proc->cpuid = kvm->arch.model.cpuid;
953 	proc->ibc = kvm->arch.model.ibc;
954 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
955 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
956 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
957 		 kvm->arch.model.ibc,
958 		 kvm->arch.model.cpuid);
959 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
960 		 kvm->arch.model.fac_list[0],
961 		 kvm->arch.model.fac_list[1],
962 		 kvm->arch.model.fac_list[2]);
963 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
964 		ret = -EFAULT;
965 	kfree(proc);
966 out:
967 	return ret;
968 }
969 
970 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
971 {
972 	struct kvm_s390_vm_cpu_machine *mach;
973 	int ret = 0;
974 
975 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
976 	if (!mach) {
977 		ret = -ENOMEM;
978 		goto out;
979 	}
980 	get_cpu_id((struct cpuid *) &mach->cpuid);
981 	mach->ibc = sclp.ibc;
982 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
983 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
984 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
985 	       sizeof(S390_lowcore.stfle_fac_list));
986 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
987 		 kvm->arch.model.ibc,
988 		 kvm->arch.model.cpuid);
989 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
990 		 mach->fac_mask[0],
991 		 mach->fac_mask[1],
992 		 mach->fac_mask[2]);
993 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
994 		 mach->fac_list[0],
995 		 mach->fac_list[1],
996 		 mach->fac_list[2]);
997 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
998 		ret = -EFAULT;
999 	kfree(mach);
1000 out:
1001 	return ret;
1002 }
1003 
1004 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1005 				       struct kvm_device_attr *attr)
1006 {
1007 	struct kvm_s390_vm_cpu_feat data;
1008 
1009 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1010 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1011 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1012 		return -EFAULT;
1013 	return 0;
1014 }
1015 
1016 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1017 				     struct kvm_device_attr *attr)
1018 {
1019 	struct kvm_s390_vm_cpu_feat data;
1020 
1021 	bitmap_copy((unsigned long *) data.feat,
1022 		    kvm_s390_available_cpu_feat,
1023 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1024 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1025 		return -EFAULT;
1026 	return 0;
1027 }
1028 
1029 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1030 					  struct kvm_device_attr *attr)
1031 {
1032 	/*
1033 	 * Once we can actually configure subfunctions (kernel + hw support),
1034 	 * we have to check if they were already set by user space, if so copy
1035 	 * them from kvm->arch.
1036 	 */
1037 	return -ENXIO;
1038 }
1039 
1040 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1041 					struct kvm_device_attr *attr)
1042 {
1043 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1044 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1045 		return -EFAULT;
1046 	return 0;
1047 }
1048 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1049 {
1050 	int ret = -ENXIO;
1051 
1052 	switch (attr->attr) {
1053 	case KVM_S390_VM_CPU_PROCESSOR:
1054 		ret = kvm_s390_get_processor(kvm, attr);
1055 		break;
1056 	case KVM_S390_VM_CPU_MACHINE:
1057 		ret = kvm_s390_get_machine(kvm, attr);
1058 		break;
1059 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1060 		ret = kvm_s390_get_processor_feat(kvm, attr);
1061 		break;
1062 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1063 		ret = kvm_s390_get_machine_feat(kvm, attr);
1064 		break;
1065 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1066 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1067 		break;
1068 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1069 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1070 		break;
1071 	}
1072 	return ret;
1073 }
1074 
1075 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1076 {
1077 	int ret;
1078 
1079 	switch (attr->group) {
1080 	case KVM_S390_VM_MEM_CTRL:
1081 		ret = kvm_s390_set_mem_control(kvm, attr);
1082 		break;
1083 	case KVM_S390_VM_TOD:
1084 		ret = kvm_s390_set_tod(kvm, attr);
1085 		break;
1086 	case KVM_S390_VM_CPU_MODEL:
1087 		ret = kvm_s390_set_cpu_model(kvm, attr);
1088 		break;
1089 	case KVM_S390_VM_CRYPTO:
1090 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1091 		break;
1092 	default:
1093 		ret = -ENXIO;
1094 		break;
1095 	}
1096 
1097 	return ret;
1098 }
1099 
1100 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1101 {
1102 	int ret;
1103 
1104 	switch (attr->group) {
1105 	case KVM_S390_VM_MEM_CTRL:
1106 		ret = kvm_s390_get_mem_control(kvm, attr);
1107 		break;
1108 	case KVM_S390_VM_TOD:
1109 		ret = kvm_s390_get_tod(kvm, attr);
1110 		break;
1111 	case KVM_S390_VM_CPU_MODEL:
1112 		ret = kvm_s390_get_cpu_model(kvm, attr);
1113 		break;
1114 	default:
1115 		ret = -ENXIO;
1116 		break;
1117 	}
1118 
1119 	return ret;
1120 }
1121 
1122 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1123 {
1124 	int ret;
1125 
1126 	switch (attr->group) {
1127 	case KVM_S390_VM_MEM_CTRL:
1128 		switch (attr->attr) {
1129 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1130 		case KVM_S390_VM_MEM_CLR_CMMA:
1131 			ret = sclp.has_cmma ? 0 : -ENXIO;
1132 			break;
1133 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1134 			ret = 0;
1135 			break;
1136 		default:
1137 			ret = -ENXIO;
1138 			break;
1139 		}
1140 		break;
1141 	case KVM_S390_VM_TOD:
1142 		switch (attr->attr) {
1143 		case KVM_S390_VM_TOD_LOW:
1144 		case KVM_S390_VM_TOD_HIGH:
1145 			ret = 0;
1146 			break;
1147 		default:
1148 			ret = -ENXIO;
1149 			break;
1150 		}
1151 		break;
1152 	case KVM_S390_VM_CPU_MODEL:
1153 		switch (attr->attr) {
1154 		case KVM_S390_VM_CPU_PROCESSOR:
1155 		case KVM_S390_VM_CPU_MACHINE:
1156 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1157 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1158 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1159 			ret = 0;
1160 			break;
1161 		/* configuring subfunctions is not supported yet */
1162 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1163 		default:
1164 			ret = -ENXIO;
1165 			break;
1166 		}
1167 		break;
1168 	case KVM_S390_VM_CRYPTO:
1169 		switch (attr->attr) {
1170 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1171 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1172 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1173 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1174 			ret = 0;
1175 			break;
1176 		default:
1177 			ret = -ENXIO;
1178 			break;
1179 		}
1180 		break;
1181 	default:
1182 		ret = -ENXIO;
1183 		break;
1184 	}
1185 
1186 	return ret;
1187 }
1188 
1189 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1190 {
1191 	uint8_t *keys;
1192 	uint64_t hva;
1193 	int i, r = 0;
1194 
1195 	if (args->flags != 0)
1196 		return -EINVAL;
1197 
1198 	/* Is this guest using storage keys? */
1199 	if (!mm_use_skey(current->mm))
1200 		return KVM_S390_GET_SKEYS_NONE;
1201 
1202 	/* Enforce sane limit on memory allocation */
1203 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1204 		return -EINVAL;
1205 
1206 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1207 	if (!keys)
1208 		return -ENOMEM;
1209 
1210 	down_read(&current->mm->mmap_sem);
1211 	for (i = 0; i < args->count; i++) {
1212 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1213 		if (kvm_is_error_hva(hva)) {
1214 			r = -EFAULT;
1215 			break;
1216 		}
1217 
1218 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1219 		if (r)
1220 			break;
1221 	}
1222 	up_read(&current->mm->mmap_sem);
1223 
1224 	if (!r) {
1225 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1226 				 sizeof(uint8_t) * args->count);
1227 		if (r)
1228 			r = -EFAULT;
1229 	}
1230 
1231 	kvfree(keys);
1232 	return r;
1233 }
1234 
1235 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1236 {
1237 	uint8_t *keys;
1238 	uint64_t hva;
1239 	int i, r = 0;
1240 
1241 	if (args->flags != 0)
1242 		return -EINVAL;
1243 
1244 	/* Enforce sane limit on memory allocation */
1245 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1246 		return -EINVAL;
1247 
1248 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1249 	if (!keys)
1250 		return -ENOMEM;
1251 
1252 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1253 			   sizeof(uint8_t) * args->count);
1254 	if (r) {
1255 		r = -EFAULT;
1256 		goto out;
1257 	}
1258 
1259 	/* Enable storage key handling for the guest */
1260 	r = s390_enable_skey();
1261 	if (r)
1262 		goto out;
1263 
1264 	down_read(&current->mm->mmap_sem);
1265 	for (i = 0; i < args->count; i++) {
1266 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1267 		if (kvm_is_error_hva(hva)) {
1268 			r = -EFAULT;
1269 			break;
1270 		}
1271 
1272 		/* Lowest order bit is reserved */
1273 		if (keys[i] & 0x01) {
1274 			r = -EINVAL;
1275 			break;
1276 		}
1277 
1278 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1279 		if (r)
1280 			break;
1281 	}
1282 	up_read(&current->mm->mmap_sem);
1283 out:
1284 	kvfree(keys);
1285 	return r;
1286 }
1287 
1288 long kvm_arch_vm_ioctl(struct file *filp,
1289 		       unsigned int ioctl, unsigned long arg)
1290 {
1291 	struct kvm *kvm = filp->private_data;
1292 	void __user *argp = (void __user *)arg;
1293 	struct kvm_device_attr attr;
1294 	int r;
1295 
1296 	switch (ioctl) {
1297 	case KVM_S390_INTERRUPT: {
1298 		struct kvm_s390_interrupt s390int;
1299 
1300 		r = -EFAULT;
1301 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1302 			break;
1303 		r = kvm_s390_inject_vm(kvm, &s390int);
1304 		break;
1305 	}
1306 	case KVM_ENABLE_CAP: {
1307 		struct kvm_enable_cap cap;
1308 		r = -EFAULT;
1309 		if (copy_from_user(&cap, argp, sizeof(cap)))
1310 			break;
1311 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1312 		break;
1313 	}
1314 	case KVM_CREATE_IRQCHIP: {
1315 		struct kvm_irq_routing_entry routing;
1316 
1317 		r = -EINVAL;
1318 		if (kvm->arch.use_irqchip) {
1319 			/* Set up dummy routing. */
1320 			memset(&routing, 0, sizeof(routing));
1321 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1322 		}
1323 		break;
1324 	}
1325 	case KVM_SET_DEVICE_ATTR: {
1326 		r = -EFAULT;
1327 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1328 			break;
1329 		r = kvm_s390_vm_set_attr(kvm, &attr);
1330 		break;
1331 	}
1332 	case KVM_GET_DEVICE_ATTR: {
1333 		r = -EFAULT;
1334 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1335 			break;
1336 		r = kvm_s390_vm_get_attr(kvm, &attr);
1337 		break;
1338 	}
1339 	case KVM_HAS_DEVICE_ATTR: {
1340 		r = -EFAULT;
1341 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1342 			break;
1343 		r = kvm_s390_vm_has_attr(kvm, &attr);
1344 		break;
1345 	}
1346 	case KVM_S390_GET_SKEYS: {
1347 		struct kvm_s390_skeys args;
1348 
1349 		r = -EFAULT;
1350 		if (copy_from_user(&args, argp,
1351 				   sizeof(struct kvm_s390_skeys)))
1352 			break;
1353 		r = kvm_s390_get_skeys(kvm, &args);
1354 		break;
1355 	}
1356 	case KVM_S390_SET_SKEYS: {
1357 		struct kvm_s390_skeys args;
1358 
1359 		r = -EFAULT;
1360 		if (copy_from_user(&args, argp,
1361 				   sizeof(struct kvm_s390_skeys)))
1362 			break;
1363 		r = kvm_s390_set_skeys(kvm, &args);
1364 		break;
1365 	}
1366 	default:
1367 		r = -ENOTTY;
1368 	}
1369 
1370 	return r;
1371 }
1372 
1373 static int kvm_s390_query_ap_config(u8 *config)
1374 {
1375 	u32 fcn_code = 0x04000000UL;
1376 	u32 cc = 0;
1377 
1378 	memset(config, 0, 128);
1379 	asm volatile(
1380 		"lgr 0,%1\n"
1381 		"lgr 2,%2\n"
1382 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1383 		"0: ipm %0\n"
1384 		"srl %0,28\n"
1385 		"1:\n"
1386 		EX_TABLE(0b, 1b)
1387 		: "+r" (cc)
1388 		: "r" (fcn_code), "r" (config)
1389 		: "cc", "0", "2", "memory"
1390 	);
1391 
1392 	return cc;
1393 }
1394 
1395 static int kvm_s390_apxa_installed(void)
1396 {
1397 	u8 config[128];
1398 	int cc;
1399 
1400 	if (test_facility(12)) {
1401 		cc = kvm_s390_query_ap_config(config);
1402 
1403 		if (cc)
1404 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1405 		else
1406 			return config[0] & 0x40;
1407 	}
1408 
1409 	return 0;
1410 }
1411 
1412 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1413 {
1414 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1415 
1416 	if (kvm_s390_apxa_installed())
1417 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1418 	else
1419 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1420 }
1421 
1422 static u64 kvm_s390_get_initial_cpuid(void)
1423 {
1424 	struct cpuid cpuid;
1425 
1426 	get_cpu_id(&cpuid);
1427 	cpuid.version = 0xff;
1428 	return *((u64 *) &cpuid);
1429 }
1430 
1431 static void kvm_s390_crypto_init(struct kvm *kvm)
1432 {
1433 	if (!test_kvm_facility(kvm, 76))
1434 		return;
1435 
1436 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1437 	kvm_s390_set_crycb_format(kvm);
1438 
1439 	/* Enable AES/DEA protected key functions by default */
1440 	kvm->arch.crypto.aes_kw = 1;
1441 	kvm->arch.crypto.dea_kw = 1;
1442 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1443 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1444 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1445 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1446 }
1447 
1448 static void sca_dispose(struct kvm *kvm)
1449 {
1450 	if (kvm->arch.use_esca)
1451 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1452 	else
1453 		free_page((unsigned long)(kvm->arch.sca));
1454 	kvm->arch.sca = NULL;
1455 }
1456 
1457 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1458 {
1459 	gfp_t alloc_flags = GFP_KERNEL;
1460 	int i, rc;
1461 	char debug_name[16];
1462 	static unsigned long sca_offset;
1463 
1464 	rc = -EINVAL;
1465 #ifdef CONFIG_KVM_S390_UCONTROL
1466 	if (type & ~KVM_VM_S390_UCONTROL)
1467 		goto out_err;
1468 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1469 		goto out_err;
1470 #else
1471 	if (type)
1472 		goto out_err;
1473 #endif
1474 
1475 	rc = s390_enable_sie();
1476 	if (rc)
1477 		goto out_err;
1478 
1479 	rc = -ENOMEM;
1480 
1481 	ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1482 
1483 	kvm->arch.use_esca = 0; /* start with basic SCA */
1484 	if (!sclp.has_64bscao)
1485 		alloc_flags |= GFP_DMA;
1486 	rwlock_init(&kvm->arch.sca_lock);
1487 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1488 	if (!kvm->arch.sca)
1489 		goto out_err;
1490 	spin_lock(&kvm_lock);
1491 	sca_offset += 16;
1492 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1493 		sca_offset = 0;
1494 	kvm->arch.sca = (struct bsca_block *)
1495 			((char *) kvm->arch.sca + sca_offset);
1496 	spin_unlock(&kvm_lock);
1497 
1498 	sprintf(debug_name, "kvm-%u", current->pid);
1499 
1500 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1501 	if (!kvm->arch.dbf)
1502 		goto out_err;
1503 
1504 	kvm->arch.sie_page2 =
1505 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1506 	if (!kvm->arch.sie_page2)
1507 		goto out_err;
1508 
1509 	/* Populate the facility mask initially. */
1510 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1511 	       sizeof(S390_lowcore.stfle_fac_list));
1512 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1513 		if (i < kvm_s390_fac_list_mask_size())
1514 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1515 		else
1516 			kvm->arch.model.fac_mask[i] = 0UL;
1517 	}
1518 
1519 	/* Populate the facility list initially. */
1520 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1521 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1522 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1523 
1524 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1525 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1526 
1527 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1528 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1529 
1530 	kvm_s390_crypto_init(kvm);
1531 
1532 	mutex_init(&kvm->arch.float_int.ais_lock);
1533 	kvm->arch.float_int.simm = 0;
1534 	kvm->arch.float_int.nimm = 0;
1535 	spin_lock_init(&kvm->arch.float_int.lock);
1536 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1537 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1538 	init_waitqueue_head(&kvm->arch.ipte_wq);
1539 	mutex_init(&kvm->arch.ipte_mutex);
1540 
1541 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1542 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1543 
1544 	if (type & KVM_VM_S390_UCONTROL) {
1545 		kvm->arch.gmap = NULL;
1546 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1547 	} else {
1548 		if (sclp.hamax == U64_MAX)
1549 			kvm->arch.mem_limit = TASK_SIZE_MAX;
1550 		else
1551 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1552 						    sclp.hamax + 1);
1553 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1554 		if (!kvm->arch.gmap)
1555 			goto out_err;
1556 		kvm->arch.gmap->private = kvm;
1557 		kvm->arch.gmap->pfault_enabled = 0;
1558 	}
1559 
1560 	kvm->arch.css_support = 0;
1561 	kvm->arch.use_irqchip = 0;
1562 	kvm->arch.epoch = 0;
1563 
1564 	spin_lock_init(&kvm->arch.start_stop_lock);
1565 	kvm_s390_vsie_init(kvm);
1566 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1567 
1568 	return 0;
1569 out_err:
1570 	free_page((unsigned long)kvm->arch.sie_page2);
1571 	debug_unregister(kvm->arch.dbf);
1572 	sca_dispose(kvm);
1573 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1574 	return rc;
1575 }
1576 
1577 bool kvm_arch_has_vcpu_debugfs(void)
1578 {
1579 	return false;
1580 }
1581 
1582 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1583 {
1584 	return 0;
1585 }
1586 
1587 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1588 {
1589 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1590 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1591 	kvm_s390_clear_local_irqs(vcpu);
1592 	kvm_clear_async_pf_completion_queue(vcpu);
1593 	if (!kvm_is_ucontrol(vcpu->kvm))
1594 		sca_del_vcpu(vcpu);
1595 
1596 	if (kvm_is_ucontrol(vcpu->kvm))
1597 		gmap_remove(vcpu->arch.gmap);
1598 
1599 	if (vcpu->kvm->arch.use_cmma)
1600 		kvm_s390_vcpu_unsetup_cmma(vcpu);
1601 	free_page((unsigned long)(vcpu->arch.sie_block));
1602 
1603 	kvm_vcpu_uninit(vcpu);
1604 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1605 }
1606 
1607 static void kvm_free_vcpus(struct kvm *kvm)
1608 {
1609 	unsigned int i;
1610 	struct kvm_vcpu *vcpu;
1611 
1612 	kvm_for_each_vcpu(i, vcpu, kvm)
1613 		kvm_arch_vcpu_destroy(vcpu);
1614 
1615 	mutex_lock(&kvm->lock);
1616 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1617 		kvm->vcpus[i] = NULL;
1618 
1619 	atomic_set(&kvm->online_vcpus, 0);
1620 	mutex_unlock(&kvm->lock);
1621 }
1622 
1623 void kvm_arch_destroy_vm(struct kvm *kvm)
1624 {
1625 	kvm_free_vcpus(kvm);
1626 	sca_dispose(kvm);
1627 	debug_unregister(kvm->arch.dbf);
1628 	free_page((unsigned long)kvm->arch.sie_page2);
1629 	if (!kvm_is_ucontrol(kvm))
1630 		gmap_remove(kvm->arch.gmap);
1631 	kvm_s390_destroy_adapters(kvm);
1632 	kvm_s390_clear_float_irqs(kvm);
1633 	kvm_s390_vsie_destroy(kvm);
1634 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1635 }
1636 
1637 /* Section: vcpu related */
1638 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1639 {
1640 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1641 	if (!vcpu->arch.gmap)
1642 		return -ENOMEM;
1643 	vcpu->arch.gmap->private = vcpu->kvm;
1644 
1645 	return 0;
1646 }
1647 
1648 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1649 {
1650 	if (!kvm_s390_use_sca_entries())
1651 		return;
1652 	read_lock(&vcpu->kvm->arch.sca_lock);
1653 	if (vcpu->kvm->arch.use_esca) {
1654 		struct esca_block *sca = vcpu->kvm->arch.sca;
1655 
1656 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1657 		sca->cpu[vcpu->vcpu_id].sda = 0;
1658 	} else {
1659 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1660 
1661 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1662 		sca->cpu[vcpu->vcpu_id].sda = 0;
1663 	}
1664 	read_unlock(&vcpu->kvm->arch.sca_lock);
1665 }
1666 
1667 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1668 {
1669 	if (!kvm_s390_use_sca_entries()) {
1670 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1671 
1672 		/* we still need the basic sca for the ipte control */
1673 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1674 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1675 	}
1676 	read_lock(&vcpu->kvm->arch.sca_lock);
1677 	if (vcpu->kvm->arch.use_esca) {
1678 		struct esca_block *sca = vcpu->kvm->arch.sca;
1679 
1680 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1681 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1682 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1683 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
1684 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1685 	} else {
1686 		struct bsca_block *sca = vcpu->kvm->arch.sca;
1687 
1688 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1689 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1690 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1691 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1692 	}
1693 	read_unlock(&vcpu->kvm->arch.sca_lock);
1694 }
1695 
1696 /* Basic SCA to Extended SCA data copy routines */
1697 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1698 {
1699 	d->sda = s->sda;
1700 	d->sigp_ctrl.c = s->sigp_ctrl.c;
1701 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1702 }
1703 
1704 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1705 {
1706 	int i;
1707 
1708 	d->ipte_control = s->ipte_control;
1709 	d->mcn[0] = s->mcn;
1710 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1711 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1712 }
1713 
1714 static int sca_switch_to_extended(struct kvm *kvm)
1715 {
1716 	struct bsca_block *old_sca = kvm->arch.sca;
1717 	struct esca_block *new_sca;
1718 	struct kvm_vcpu *vcpu;
1719 	unsigned int vcpu_idx;
1720 	u32 scaol, scaoh;
1721 
1722 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1723 	if (!new_sca)
1724 		return -ENOMEM;
1725 
1726 	scaoh = (u32)((u64)(new_sca) >> 32);
1727 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
1728 
1729 	kvm_s390_vcpu_block_all(kvm);
1730 	write_lock(&kvm->arch.sca_lock);
1731 
1732 	sca_copy_b_to_e(new_sca, old_sca);
1733 
1734 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1735 		vcpu->arch.sie_block->scaoh = scaoh;
1736 		vcpu->arch.sie_block->scaol = scaol;
1737 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
1738 	}
1739 	kvm->arch.sca = new_sca;
1740 	kvm->arch.use_esca = 1;
1741 
1742 	write_unlock(&kvm->arch.sca_lock);
1743 	kvm_s390_vcpu_unblock_all(kvm);
1744 
1745 	free_page((unsigned long)old_sca);
1746 
1747 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1748 		 old_sca, kvm->arch.sca);
1749 	return 0;
1750 }
1751 
1752 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1753 {
1754 	int rc;
1755 
1756 	if (!kvm_s390_use_sca_entries()) {
1757 		if (id < KVM_MAX_VCPUS)
1758 			return true;
1759 		return false;
1760 	}
1761 	if (id < KVM_S390_BSCA_CPU_SLOTS)
1762 		return true;
1763 	if (!sclp.has_esca || !sclp.has_64bscao)
1764 		return false;
1765 
1766 	mutex_lock(&kvm->lock);
1767 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1768 	mutex_unlock(&kvm->lock);
1769 
1770 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1771 }
1772 
1773 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1774 {
1775 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1776 	kvm_clear_async_pf_completion_queue(vcpu);
1777 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1778 				    KVM_SYNC_GPRS |
1779 				    KVM_SYNC_ACRS |
1780 				    KVM_SYNC_CRS |
1781 				    KVM_SYNC_ARCH0 |
1782 				    KVM_SYNC_PFAULT;
1783 	kvm_s390_set_prefix(vcpu, 0);
1784 	if (test_kvm_facility(vcpu->kvm, 64))
1785 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1786 	if (test_kvm_facility(vcpu->kvm, 133))
1787 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
1788 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
1789 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1790 	 */
1791 	if (MACHINE_HAS_VX)
1792 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1793 	else
1794 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1795 
1796 	if (kvm_is_ucontrol(vcpu->kvm))
1797 		return __kvm_ucontrol_vcpu_init(vcpu);
1798 
1799 	return 0;
1800 }
1801 
1802 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1803 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1804 {
1805 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1806 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1807 	vcpu->arch.cputm_start = get_tod_clock_fast();
1808 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1809 }
1810 
1811 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1812 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1813 {
1814 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1815 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1816 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1817 	vcpu->arch.cputm_start = 0;
1818 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1819 }
1820 
1821 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1822 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1823 {
1824 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1825 	vcpu->arch.cputm_enabled = true;
1826 	__start_cpu_timer_accounting(vcpu);
1827 }
1828 
1829 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1830 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1831 {
1832 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1833 	__stop_cpu_timer_accounting(vcpu);
1834 	vcpu->arch.cputm_enabled = false;
1835 }
1836 
1837 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1838 {
1839 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1840 	__enable_cpu_timer_accounting(vcpu);
1841 	preempt_enable();
1842 }
1843 
1844 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1845 {
1846 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1847 	__disable_cpu_timer_accounting(vcpu);
1848 	preempt_enable();
1849 }
1850 
1851 /* set the cpu timer - may only be called from the VCPU thread itself */
1852 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1853 {
1854 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1855 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1856 	if (vcpu->arch.cputm_enabled)
1857 		vcpu->arch.cputm_start = get_tod_clock_fast();
1858 	vcpu->arch.sie_block->cputm = cputm;
1859 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1860 	preempt_enable();
1861 }
1862 
1863 /* update and get the cpu timer - can also be called from other VCPU threads */
1864 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1865 {
1866 	unsigned int seq;
1867 	__u64 value;
1868 
1869 	if (unlikely(!vcpu->arch.cputm_enabled))
1870 		return vcpu->arch.sie_block->cputm;
1871 
1872 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1873 	do {
1874 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1875 		/*
1876 		 * If the writer would ever execute a read in the critical
1877 		 * section, e.g. in irq context, we have a deadlock.
1878 		 */
1879 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1880 		value = vcpu->arch.sie_block->cputm;
1881 		/* if cputm_start is 0, accounting is being started/stopped */
1882 		if (likely(vcpu->arch.cputm_start))
1883 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1884 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1885 	preempt_enable();
1886 	return value;
1887 }
1888 
1889 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1890 {
1891 
1892 	gmap_enable(vcpu->arch.enabled_gmap);
1893 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1894 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1895 		__start_cpu_timer_accounting(vcpu);
1896 	vcpu->cpu = cpu;
1897 }
1898 
1899 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1900 {
1901 	vcpu->cpu = -1;
1902 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1903 		__stop_cpu_timer_accounting(vcpu);
1904 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1905 	vcpu->arch.enabled_gmap = gmap_get_enabled();
1906 	gmap_disable(vcpu->arch.enabled_gmap);
1907 
1908 }
1909 
1910 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1911 {
1912 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1913 	vcpu->arch.sie_block->gpsw.mask = 0UL;
1914 	vcpu->arch.sie_block->gpsw.addr = 0UL;
1915 	kvm_s390_set_prefix(vcpu, 0);
1916 	kvm_s390_set_cpu_timer(vcpu, 0);
1917 	vcpu->arch.sie_block->ckc       = 0UL;
1918 	vcpu->arch.sie_block->todpr     = 0;
1919 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1920 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1921 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1922 	/* make sure the new fpc will be lazily loaded */
1923 	save_fpu_regs();
1924 	current->thread.fpu.fpc = 0;
1925 	vcpu->arch.sie_block->gbea = 1;
1926 	vcpu->arch.sie_block->pp = 0;
1927 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1928 	kvm_clear_async_pf_completion_queue(vcpu);
1929 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1930 		kvm_s390_vcpu_stop(vcpu);
1931 	kvm_s390_clear_local_irqs(vcpu);
1932 }
1933 
1934 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1935 {
1936 	mutex_lock(&vcpu->kvm->lock);
1937 	preempt_disable();
1938 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1939 	preempt_enable();
1940 	mutex_unlock(&vcpu->kvm->lock);
1941 	if (!kvm_is_ucontrol(vcpu->kvm)) {
1942 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1943 		sca_add_vcpu(vcpu);
1944 	}
1945 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1946 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1947 	/* make vcpu_load load the right gmap on the first trigger */
1948 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1949 }
1950 
1951 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1952 {
1953 	if (!test_kvm_facility(vcpu->kvm, 76))
1954 		return;
1955 
1956 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1957 
1958 	if (vcpu->kvm->arch.crypto.aes_kw)
1959 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1960 	if (vcpu->kvm->arch.crypto.dea_kw)
1961 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1962 
1963 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1964 }
1965 
1966 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1967 {
1968 	free_page(vcpu->arch.sie_block->cbrlo);
1969 	vcpu->arch.sie_block->cbrlo = 0;
1970 }
1971 
1972 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1973 {
1974 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1975 	if (!vcpu->arch.sie_block->cbrlo)
1976 		return -ENOMEM;
1977 
1978 	vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
1979 	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
1980 	return 0;
1981 }
1982 
1983 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1984 {
1985 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1986 
1987 	vcpu->arch.sie_block->ibc = model->ibc;
1988 	if (test_kvm_facility(vcpu->kvm, 7))
1989 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1990 }
1991 
1992 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1993 {
1994 	int rc = 0;
1995 
1996 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1997 						    CPUSTAT_SM |
1998 						    CPUSTAT_STOPPED);
1999 
2000 	if (test_kvm_facility(vcpu->kvm, 78))
2001 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2002 	else if (test_kvm_facility(vcpu->kvm, 8))
2003 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2004 
2005 	kvm_s390_vcpu_setup_model(vcpu);
2006 
2007 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2008 	if (MACHINE_HAS_ESOP)
2009 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2010 	if (test_kvm_facility(vcpu->kvm, 9))
2011 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2012 	if (test_kvm_facility(vcpu->kvm, 73))
2013 		vcpu->arch.sie_block->ecb |= ECB_TE;
2014 
2015 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2016 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2017 	if (test_kvm_facility(vcpu->kvm, 130))
2018 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2019 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2020 	if (sclp.has_cei)
2021 		vcpu->arch.sie_block->eca |= ECA_CEI;
2022 	if (sclp.has_ib)
2023 		vcpu->arch.sie_block->eca |= ECA_IB;
2024 	if (sclp.has_siif)
2025 		vcpu->arch.sie_block->eca |= ECA_SII;
2026 	if (sclp.has_sigpif)
2027 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2028 	if (test_kvm_facility(vcpu->kvm, 129)) {
2029 		vcpu->arch.sie_block->eca |= ECA_VX;
2030 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2031 	}
2032 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2033 					| SDNXC;
2034 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2035 
2036 	if (sclp.has_kss)
2037 		atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2038 	else
2039 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2040 
2041 	if (vcpu->kvm->arch.use_cmma) {
2042 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2043 		if (rc)
2044 			return rc;
2045 	}
2046 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2047 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2048 
2049 	kvm_s390_vcpu_crypto_setup(vcpu);
2050 
2051 	return rc;
2052 }
2053 
2054 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2055 				      unsigned int id)
2056 {
2057 	struct kvm_vcpu *vcpu;
2058 	struct sie_page *sie_page;
2059 	int rc = -EINVAL;
2060 
2061 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2062 		goto out;
2063 
2064 	rc = -ENOMEM;
2065 
2066 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2067 	if (!vcpu)
2068 		goto out;
2069 
2070 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2071 	if (!sie_page)
2072 		goto out_free_cpu;
2073 
2074 	vcpu->arch.sie_block = &sie_page->sie_block;
2075 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2076 
2077 	/* the real guest size will always be smaller than msl */
2078 	vcpu->arch.sie_block->mso = 0;
2079 	vcpu->arch.sie_block->msl = sclp.hamax;
2080 
2081 	vcpu->arch.sie_block->icpua = id;
2082 	spin_lock_init(&vcpu->arch.local_int.lock);
2083 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2084 	vcpu->arch.local_int.wq = &vcpu->wq;
2085 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2086 	seqcount_init(&vcpu->arch.cputm_seqcount);
2087 
2088 	rc = kvm_vcpu_init(vcpu, kvm, id);
2089 	if (rc)
2090 		goto out_free_sie_block;
2091 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2092 		 vcpu->arch.sie_block);
2093 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2094 
2095 	return vcpu;
2096 out_free_sie_block:
2097 	free_page((unsigned long)(vcpu->arch.sie_block));
2098 out_free_cpu:
2099 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2100 out:
2101 	return ERR_PTR(rc);
2102 }
2103 
2104 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2105 {
2106 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2107 }
2108 
2109 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2110 {
2111 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2112 	exit_sie(vcpu);
2113 }
2114 
2115 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2116 {
2117 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2118 }
2119 
2120 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2121 {
2122 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2123 	exit_sie(vcpu);
2124 }
2125 
2126 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2127 {
2128 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2129 }
2130 
2131 /*
2132  * Kick a guest cpu out of SIE and wait until SIE is not running.
2133  * If the CPU is not running (e.g. waiting as idle) the function will
2134  * return immediately. */
2135 void exit_sie(struct kvm_vcpu *vcpu)
2136 {
2137 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2138 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2139 		cpu_relax();
2140 }
2141 
2142 /* Kick a guest cpu out of SIE to process a request synchronously */
2143 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2144 {
2145 	kvm_make_request(req, vcpu);
2146 	kvm_s390_vcpu_request(vcpu);
2147 }
2148 
2149 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2150 			      unsigned long end)
2151 {
2152 	struct kvm *kvm = gmap->private;
2153 	struct kvm_vcpu *vcpu;
2154 	unsigned long prefix;
2155 	int i;
2156 
2157 	if (gmap_is_shadow(gmap))
2158 		return;
2159 	if (start >= 1UL << 31)
2160 		/* We are only interested in prefix pages */
2161 		return;
2162 	kvm_for_each_vcpu(i, vcpu, kvm) {
2163 		/* match against both prefix pages */
2164 		prefix = kvm_s390_get_prefix(vcpu);
2165 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2166 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2167 				   start, end);
2168 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2169 		}
2170 	}
2171 }
2172 
2173 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2174 {
2175 	/* kvm common code refers to this, but never calls it */
2176 	BUG();
2177 	return 0;
2178 }
2179 
2180 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2181 					   struct kvm_one_reg *reg)
2182 {
2183 	int r = -EINVAL;
2184 
2185 	switch (reg->id) {
2186 	case KVM_REG_S390_TODPR:
2187 		r = put_user(vcpu->arch.sie_block->todpr,
2188 			     (u32 __user *)reg->addr);
2189 		break;
2190 	case KVM_REG_S390_EPOCHDIFF:
2191 		r = put_user(vcpu->arch.sie_block->epoch,
2192 			     (u64 __user *)reg->addr);
2193 		break;
2194 	case KVM_REG_S390_CPU_TIMER:
2195 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2196 			     (u64 __user *)reg->addr);
2197 		break;
2198 	case KVM_REG_S390_CLOCK_COMP:
2199 		r = put_user(vcpu->arch.sie_block->ckc,
2200 			     (u64 __user *)reg->addr);
2201 		break;
2202 	case KVM_REG_S390_PFTOKEN:
2203 		r = put_user(vcpu->arch.pfault_token,
2204 			     (u64 __user *)reg->addr);
2205 		break;
2206 	case KVM_REG_S390_PFCOMPARE:
2207 		r = put_user(vcpu->arch.pfault_compare,
2208 			     (u64 __user *)reg->addr);
2209 		break;
2210 	case KVM_REG_S390_PFSELECT:
2211 		r = put_user(vcpu->arch.pfault_select,
2212 			     (u64 __user *)reg->addr);
2213 		break;
2214 	case KVM_REG_S390_PP:
2215 		r = put_user(vcpu->arch.sie_block->pp,
2216 			     (u64 __user *)reg->addr);
2217 		break;
2218 	case KVM_REG_S390_GBEA:
2219 		r = put_user(vcpu->arch.sie_block->gbea,
2220 			     (u64 __user *)reg->addr);
2221 		break;
2222 	default:
2223 		break;
2224 	}
2225 
2226 	return r;
2227 }
2228 
2229 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2230 					   struct kvm_one_reg *reg)
2231 {
2232 	int r = -EINVAL;
2233 	__u64 val;
2234 
2235 	switch (reg->id) {
2236 	case KVM_REG_S390_TODPR:
2237 		r = get_user(vcpu->arch.sie_block->todpr,
2238 			     (u32 __user *)reg->addr);
2239 		break;
2240 	case KVM_REG_S390_EPOCHDIFF:
2241 		r = get_user(vcpu->arch.sie_block->epoch,
2242 			     (u64 __user *)reg->addr);
2243 		break;
2244 	case KVM_REG_S390_CPU_TIMER:
2245 		r = get_user(val, (u64 __user *)reg->addr);
2246 		if (!r)
2247 			kvm_s390_set_cpu_timer(vcpu, val);
2248 		break;
2249 	case KVM_REG_S390_CLOCK_COMP:
2250 		r = get_user(vcpu->arch.sie_block->ckc,
2251 			     (u64 __user *)reg->addr);
2252 		break;
2253 	case KVM_REG_S390_PFTOKEN:
2254 		r = get_user(vcpu->arch.pfault_token,
2255 			     (u64 __user *)reg->addr);
2256 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2257 			kvm_clear_async_pf_completion_queue(vcpu);
2258 		break;
2259 	case KVM_REG_S390_PFCOMPARE:
2260 		r = get_user(vcpu->arch.pfault_compare,
2261 			     (u64 __user *)reg->addr);
2262 		break;
2263 	case KVM_REG_S390_PFSELECT:
2264 		r = get_user(vcpu->arch.pfault_select,
2265 			     (u64 __user *)reg->addr);
2266 		break;
2267 	case KVM_REG_S390_PP:
2268 		r = get_user(vcpu->arch.sie_block->pp,
2269 			     (u64 __user *)reg->addr);
2270 		break;
2271 	case KVM_REG_S390_GBEA:
2272 		r = get_user(vcpu->arch.sie_block->gbea,
2273 			     (u64 __user *)reg->addr);
2274 		break;
2275 	default:
2276 		break;
2277 	}
2278 
2279 	return r;
2280 }
2281 
2282 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2283 {
2284 	kvm_s390_vcpu_initial_reset(vcpu);
2285 	return 0;
2286 }
2287 
2288 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2289 {
2290 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2291 	return 0;
2292 }
2293 
2294 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2295 {
2296 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2297 	return 0;
2298 }
2299 
2300 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2301 				  struct kvm_sregs *sregs)
2302 {
2303 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2304 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2305 	return 0;
2306 }
2307 
2308 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2309 				  struct kvm_sregs *sregs)
2310 {
2311 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2312 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2313 	return 0;
2314 }
2315 
2316 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2317 {
2318 	if (test_fp_ctl(fpu->fpc))
2319 		return -EINVAL;
2320 	vcpu->run->s.regs.fpc = fpu->fpc;
2321 	if (MACHINE_HAS_VX)
2322 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2323 				 (freg_t *) fpu->fprs);
2324 	else
2325 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2326 	return 0;
2327 }
2328 
2329 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2330 {
2331 	/* make sure we have the latest values */
2332 	save_fpu_regs();
2333 	if (MACHINE_HAS_VX)
2334 		convert_vx_to_fp((freg_t *) fpu->fprs,
2335 				 (__vector128 *) vcpu->run->s.regs.vrs);
2336 	else
2337 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2338 	fpu->fpc = vcpu->run->s.regs.fpc;
2339 	return 0;
2340 }
2341 
2342 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2343 {
2344 	int rc = 0;
2345 
2346 	if (!is_vcpu_stopped(vcpu))
2347 		rc = -EBUSY;
2348 	else {
2349 		vcpu->run->psw_mask = psw.mask;
2350 		vcpu->run->psw_addr = psw.addr;
2351 	}
2352 	return rc;
2353 }
2354 
2355 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2356 				  struct kvm_translation *tr)
2357 {
2358 	return -EINVAL; /* not implemented yet */
2359 }
2360 
2361 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2362 			      KVM_GUESTDBG_USE_HW_BP | \
2363 			      KVM_GUESTDBG_ENABLE)
2364 
2365 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2366 					struct kvm_guest_debug *dbg)
2367 {
2368 	int rc = 0;
2369 
2370 	vcpu->guest_debug = 0;
2371 	kvm_s390_clear_bp_data(vcpu);
2372 
2373 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2374 		return -EINVAL;
2375 	if (!sclp.has_gpere)
2376 		return -EINVAL;
2377 
2378 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2379 		vcpu->guest_debug = dbg->control;
2380 		/* enforce guest PER */
2381 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2382 
2383 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2384 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2385 	} else {
2386 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2387 		vcpu->arch.guestdbg.last_bp = 0;
2388 	}
2389 
2390 	if (rc) {
2391 		vcpu->guest_debug = 0;
2392 		kvm_s390_clear_bp_data(vcpu);
2393 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2394 	}
2395 
2396 	return rc;
2397 }
2398 
2399 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2400 				    struct kvm_mp_state *mp_state)
2401 {
2402 	/* CHECK_STOP and LOAD are not supported yet */
2403 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2404 				       KVM_MP_STATE_OPERATING;
2405 }
2406 
2407 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2408 				    struct kvm_mp_state *mp_state)
2409 {
2410 	int rc = 0;
2411 
2412 	/* user space knows about this interface - let it control the state */
2413 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2414 
2415 	switch (mp_state->mp_state) {
2416 	case KVM_MP_STATE_STOPPED:
2417 		kvm_s390_vcpu_stop(vcpu);
2418 		break;
2419 	case KVM_MP_STATE_OPERATING:
2420 		kvm_s390_vcpu_start(vcpu);
2421 		break;
2422 	case KVM_MP_STATE_LOAD:
2423 	case KVM_MP_STATE_CHECK_STOP:
2424 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2425 	default:
2426 		rc = -ENXIO;
2427 	}
2428 
2429 	return rc;
2430 }
2431 
2432 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2433 {
2434 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2435 }
2436 
2437 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2438 {
2439 retry:
2440 	kvm_s390_vcpu_request_handled(vcpu);
2441 	if (!vcpu->requests)
2442 		return 0;
2443 	/*
2444 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2445 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2446 	 * This ensures that the ipte instruction for this request has
2447 	 * already finished. We might race against a second unmapper that
2448 	 * wants to set the blocking bit. Lets just retry the request loop.
2449 	 */
2450 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2451 		int rc;
2452 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2453 					  kvm_s390_get_prefix(vcpu),
2454 					  PAGE_SIZE * 2, PROT_WRITE);
2455 		if (rc) {
2456 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2457 			return rc;
2458 		}
2459 		goto retry;
2460 	}
2461 
2462 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2463 		vcpu->arch.sie_block->ihcpu = 0xffff;
2464 		goto retry;
2465 	}
2466 
2467 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2468 		if (!ibs_enabled(vcpu)) {
2469 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2470 			atomic_or(CPUSTAT_IBS,
2471 					&vcpu->arch.sie_block->cpuflags);
2472 		}
2473 		goto retry;
2474 	}
2475 
2476 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2477 		if (ibs_enabled(vcpu)) {
2478 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2479 			atomic_andnot(CPUSTAT_IBS,
2480 					  &vcpu->arch.sie_block->cpuflags);
2481 		}
2482 		goto retry;
2483 	}
2484 
2485 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2486 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2487 		goto retry;
2488 	}
2489 
2490 	/* nothing to do, just clear the request */
2491 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2492 
2493 	return 0;
2494 }
2495 
2496 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2497 {
2498 	struct kvm_vcpu *vcpu;
2499 	int i;
2500 
2501 	mutex_lock(&kvm->lock);
2502 	preempt_disable();
2503 	kvm->arch.epoch = tod - get_tod_clock();
2504 	kvm_s390_vcpu_block_all(kvm);
2505 	kvm_for_each_vcpu(i, vcpu, kvm)
2506 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2507 	kvm_s390_vcpu_unblock_all(kvm);
2508 	preempt_enable();
2509 	mutex_unlock(&kvm->lock);
2510 }
2511 
2512 /**
2513  * kvm_arch_fault_in_page - fault-in guest page if necessary
2514  * @vcpu: The corresponding virtual cpu
2515  * @gpa: Guest physical address
2516  * @writable: Whether the page should be writable or not
2517  *
2518  * Make sure that a guest page has been faulted-in on the host.
2519  *
2520  * Return: Zero on success, negative error code otherwise.
2521  */
2522 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2523 {
2524 	return gmap_fault(vcpu->arch.gmap, gpa,
2525 			  writable ? FAULT_FLAG_WRITE : 0);
2526 }
2527 
2528 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2529 				      unsigned long token)
2530 {
2531 	struct kvm_s390_interrupt inti;
2532 	struct kvm_s390_irq irq;
2533 
2534 	if (start_token) {
2535 		irq.u.ext.ext_params2 = token;
2536 		irq.type = KVM_S390_INT_PFAULT_INIT;
2537 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2538 	} else {
2539 		inti.type = KVM_S390_INT_PFAULT_DONE;
2540 		inti.parm64 = token;
2541 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2542 	}
2543 }
2544 
2545 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2546 				     struct kvm_async_pf *work)
2547 {
2548 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2549 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2550 }
2551 
2552 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2553 				 struct kvm_async_pf *work)
2554 {
2555 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2556 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2557 }
2558 
2559 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2560 			       struct kvm_async_pf *work)
2561 {
2562 	/* s390 will always inject the page directly */
2563 }
2564 
2565 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2566 {
2567 	/*
2568 	 * s390 will always inject the page directly,
2569 	 * but we still want check_async_completion to cleanup
2570 	 */
2571 	return true;
2572 }
2573 
2574 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2575 {
2576 	hva_t hva;
2577 	struct kvm_arch_async_pf arch;
2578 	int rc;
2579 
2580 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2581 		return 0;
2582 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2583 	    vcpu->arch.pfault_compare)
2584 		return 0;
2585 	if (psw_extint_disabled(vcpu))
2586 		return 0;
2587 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
2588 		return 0;
2589 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2590 		return 0;
2591 	if (!vcpu->arch.gmap->pfault_enabled)
2592 		return 0;
2593 
2594 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2595 	hva += current->thread.gmap_addr & ~PAGE_MASK;
2596 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2597 		return 0;
2598 
2599 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2600 	return rc;
2601 }
2602 
2603 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2604 {
2605 	int rc, cpuflags;
2606 
2607 	/*
2608 	 * On s390 notifications for arriving pages will be delivered directly
2609 	 * to the guest but the house keeping for completed pfaults is
2610 	 * handled outside the worker.
2611 	 */
2612 	kvm_check_async_pf_completion(vcpu);
2613 
2614 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2615 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2616 
2617 	if (need_resched())
2618 		schedule();
2619 
2620 	if (test_cpu_flag(CIF_MCCK_PENDING))
2621 		s390_handle_mcck();
2622 
2623 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2624 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
2625 		if (rc)
2626 			return rc;
2627 	}
2628 
2629 	rc = kvm_s390_handle_requests(vcpu);
2630 	if (rc)
2631 		return rc;
2632 
2633 	if (guestdbg_enabled(vcpu)) {
2634 		kvm_s390_backup_guest_per_regs(vcpu);
2635 		kvm_s390_patch_guest_per_regs(vcpu);
2636 	}
2637 
2638 	vcpu->arch.sie_block->icptcode = 0;
2639 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2640 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2641 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
2642 
2643 	return 0;
2644 }
2645 
2646 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2647 {
2648 	struct kvm_s390_pgm_info pgm_info = {
2649 		.code = PGM_ADDRESSING,
2650 	};
2651 	u8 opcode, ilen;
2652 	int rc;
2653 
2654 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2655 	trace_kvm_s390_sie_fault(vcpu);
2656 
2657 	/*
2658 	 * We want to inject an addressing exception, which is defined as a
2659 	 * suppressing or terminating exception. However, since we came here
2660 	 * by a DAT access exception, the PSW still points to the faulting
2661 	 * instruction since DAT exceptions are nullifying. So we've got
2662 	 * to look up the current opcode to get the length of the instruction
2663 	 * to be able to forward the PSW.
2664 	 */
2665 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2666 	ilen = insn_length(opcode);
2667 	if (rc < 0) {
2668 		return rc;
2669 	} else if (rc) {
2670 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
2671 		 * Forward by arbitrary ilc, injection will take care of
2672 		 * nullification if necessary.
2673 		 */
2674 		pgm_info = vcpu->arch.pgm;
2675 		ilen = 4;
2676 	}
2677 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2678 	kvm_s390_forward_psw(vcpu, ilen);
2679 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2680 }
2681 
2682 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2683 {
2684 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2685 		   vcpu->arch.sie_block->icptcode);
2686 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2687 
2688 	if (guestdbg_enabled(vcpu))
2689 		kvm_s390_restore_guest_per_regs(vcpu);
2690 
2691 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2692 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2693 
2694 	if (vcpu->arch.sie_block->icptcode > 0) {
2695 		int rc = kvm_handle_sie_intercept(vcpu);
2696 
2697 		if (rc != -EOPNOTSUPP)
2698 			return rc;
2699 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2700 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2701 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2702 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2703 		return -EREMOTE;
2704 	} else if (exit_reason != -EFAULT) {
2705 		vcpu->stat.exit_null++;
2706 		return 0;
2707 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
2708 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2709 		vcpu->run->s390_ucontrol.trans_exc_code =
2710 						current->thread.gmap_addr;
2711 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
2712 		return -EREMOTE;
2713 	} else if (current->thread.gmap_pfault) {
2714 		trace_kvm_s390_major_guest_pfault(vcpu);
2715 		current->thread.gmap_pfault = 0;
2716 		if (kvm_arch_setup_async_pf(vcpu))
2717 			return 0;
2718 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2719 	}
2720 	return vcpu_post_run_fault_in_sie(vcpu);
2721 }
2722 
2723 static int __vcpu_run(struct kvm_vcpu *vcpu)
2724 {
2725 	int rc, exit_reason;
2726 
2727 	/*
2728 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2729 	 * ning the guest), so that memslots (and other stuff) are protected
2730 	 */
2731 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2732 
2733 	do {
2734 		rc = vcpu_pre_run(vcpu);
2735 		if (rc)
2736 			break;
2737 
2738 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2739 		/*
2740 		 * As PF_VCPU will be used in fault handler, between
2741 		 * guest_enter and guest_exit should be no uaccess.
2742 		 */
2743 		local_irq_disable();
2744 		guest_enter_irqoff();
2745 		__disable_cpu_timer_accounting(vcpu);
2746 		local_irq_enable();
2747 		exit_reason = sie64a(vcpu->arch.sie_block,
2748 				     vcpu->run->s.regs.gprs);
2749 		local_irq_disable();
2750 		__enable_cpu_timer_accounting(vcpu);
2751 		guest_exit_irqoff();
2752 		local_irq_enable();
2753 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2754 
2755 		rc = vcpu_post_run(vcpu, exit_reason);
2756 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2757 
2758 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2759 	return rc;
2760 }
2761 
2762 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2763 {
2764 	struct runtime_instr_cb *riccb;
2765 	struct gs_cb *gscb;
2766 
2767 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2768 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
2769 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2770 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2771 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2772 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2773 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2774 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2775 		/* some control register changes require a tlb flush */
2776 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2777 	}
2778 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2779 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2780 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2781 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2782 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2783 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2784 	}
2785 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2786 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2787 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2788 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2789 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2790 			kvm_clear_async_pf_completion_queue(vcpu);
2791 	}
2792 	/*
2793 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
2794 	 * we should enable RI here instead of doing the lazy enablement.
2795 	 */
2796 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2797 	    test_kvm_facility(vcpu->kvm, 64) &&
2798 	    riccb->valid &&
2799 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
2800 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
2801 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
2802 	}
2803 	/*
2804 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
2805 	 * we should enable GS here instead of doing the lazy enablement.
2806 	 */
2807 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
2808 	    test_kvm_facility(vcpu->kvm, 133) &&
2809 	    gscb->gssm &&
2810 	    !vcpu->arch.gs_enabled) {
2811 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
2812 		vcpu->arch.sie_block->ecb |= ECB_GS;
2813 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2814 		vcpu->arch.gs_enabled = 1;
2815 	}
2816 	save_access_regs(vcpu->arch.host_acrs);
2817 	restore_access_regs(vcpu->run->s.regs.acrs);
2818 	/* save host (userspace) fprs/vrs */
2819 	save_fpu_regs();
2820 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2821 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2822 	if (MACHINE_HAS_VX)
2823 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2824 	else
2825 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2826 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2827 	if (test_fp_ctl(current->thread.fpu.fpc))
2828 		/* User space provided an invalid FPC, let's clear it */
2829 		current->thread.fpu.fpc = 0;
2830 	if (MACHINE_HAS_GS) {
2831 		preempt_disable();
2832 		__ctl_set_bit(2, 4);
2833 		if (current->thread.gs_cb) {
2834 			vcpu->arch.host_gscb = current->thread.gs_cb;
2835 			save_gs_cb(vcpu->arch.host_gscb);
2836 		}
2837 		if (vcpu->arch.gs_enabled) {
2838 			current->thread.gs_cb = (struct gs_cb *)
2839 						&vcpu->run->s.regs.gscb;
2840 			restore_gs_cb(current->thread.gs_cb);
2841 		}
2842 		preempt_enable();
2843 	}
2844 
2845 	kvm_run->kvm_dirty_regs = 0;
2846 }
2847 
2848 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2849 {
2850 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2851 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2852 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2853 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2854 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2855 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2856 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2857 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2858 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2859 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2860 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2861 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2862 	save_access_regs(vcpu->run->s.regs.acrs);
2863 	restore_access_regs(vcpu->arch.host_acrs);
2864 	/* Save guest register state */
2865 	save_fpu_regs();
2866 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2867 	/* Restore will be done lazily at return */
2868 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2869 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2870 	if (MACHINE_HAS_GS) {
2871 		__ctl_set_bit(2, 4);
2872 		if (vcpu->arch.gs_enabled)
2873 			save_gs_cb(current->thread.gs_cb);
2874 		preempt_disable();
2875 		current->thread.gs_cb = vcpu->arch.host_gscb;
2876 		restore_gs_cb(vcpu->arch.host_gscb);
2877 		preempt_enable();
2878 		if (!vcpu->arch.host_gscb)
2879 			__ctl_clear_bit(2, 4);
2880 		vcpu->arch.host_gscb = NULL;
2881 	}
2882 
2883 }
2884 
2885 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2886 {
2887 	int rc;
2888 	sigset_t sigsaved;
2889 
2890 	if (kvm_run->immediate_exit)
2891 		return -EINTR;
2892 
2893 	if (guestdbg_exit_pending(vcpu)) {
2894 		kvm_s390_prepare_debug_exit(vcpu);
2895 		return 0;
2896 	}
2897 
2898 	if (vcpu->sigset_active)
2899 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2900 
2901 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2902 		kvm_s390_vcpu_start(vcpu);
2903 	} else if (is_vcpu_stopped(vcpu)) {
2904 		pr_err_ratelimited("can't run stopped vcpu %d\n",
2905 				   vcpu->vcpu_id);
2906 		return -EINVAL;
2907 	}
2908 
2909 	sync_regs(vcpu, kvm_run);
2910 	enable_cpu_timer_accounting(vcpu);
2911 
2912 	might_fault();
2913 	rc = __vcpu_run(vcpu);
2914 
2915 	if (signal_pending(current) && !rc) {
2916 		kvm_run->exit_reason = KVM_EXIT_INTR;
2917 		rc = -EINTR;
2918 	}
2919 
2920 	if (guestdbg_exit_pending(vcpu) && !rc)  {
2921 		kvm_s390_prepare_debug_exit(vcpu);
2922 		rc = 0;
2923 	}
2924 
2925 	if (rc == -EREMOTE) {
2926 		/* userspace support is needed, kvm_run has been prepared */
2927 		rc = 0;
2928 	}
2929 
2930 	disable_cpu_timer_accounting(vcpu);
2931 	store_regs(vcpu, kvm_run);
2932 
2933 	if (vcpu->sigset_active)
2934 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2935 
2936 	vcpu->stat.exit_userspace++;
2937 	return rc;
2938 }
2939 
2940 /*
2941  * store status at address
2942  * we use have two special cases:
2943  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2944  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2945  */
2946 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2947 {
2948 	unsigned char archmode = 1;
2949 	freg_t fprs[NUM_FPRS];
2950 	unsigned int px;
2951 	u64 clkcomp, cputm;
2952 	int rc;
2953 
2954 	px = kvm_s390_get_prefix(vcpu);
2955 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2956 		if (write_guest_abs(vcpu, 163, &archmode, 1))
2957 			return -EFAULT;
2958 		gpa = 0;
2959 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2960 		if (write_guest_real(vcpu, 163, &archmode, 1))
2961 			return -EFAULT;
2962 		gpa = px;
2963 	} else
2964 		gpa -= __LC_FPREGS_SAVE_AREA;
2965 
2966 	/* manually convert vector registers if necessary */
2967 	if (MACHINE_HAS_VX) {
2968 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2969 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2970 				     fprs, 128);
2971 	} else {
2972 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2973 				     vcpu->run->s.regs.fprs, 128);
2974 	}
2975 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2976 			      vcpu->run->s.regs.gprs, 128);
2977 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2978 			      &vcpu->arch.sie_block->gpsw, 16);
2979 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2980 			      &px, 4);
2981 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2982 			      &vcpu->run->s.regs.fpc, 4);
2983 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2984 			      &vcpu->arch.sie_block->todpr, 4);
2985 	cputm = kvm_s390_get_cpu_timer(vcpu);
2986 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2987 			      &cputm, 8);
2988 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2989 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2990 			      &clkcomp, 8);
2991 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2992 			      &vcpu->run->s.regs.acrs, 64);
2993 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2994 			      &vcpu->arch.sie_block->gcr, 128);
2995 	return rc ? -EFAULT : 0;
2996 }
2997 
2998 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2999 {
3000 	/*
3001 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3002 	 * switch in the run ioctl. Let's update our copies before we save
3003 	 * it into the save area
3004 	 */
3005 	save_fpu_regs();
3006 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3007 	save_access_regs(vcpu->run->s.regs.acrs);
3008 
3009 	return kvm_s390_store_status_unloaded(vcpu, addr);
3010 }
3011 
3012 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3013 {
3014 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3015 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3016 }
3017 
3018 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3019 {
3020 	unsigned int i;
3021 	struct kvm_vcpu *vcpu;
3022 
3023 	kvm_for_each_vcpu(i, vcpu, kvm) {
3024 		__disable_ibs_on_vcpu(vcpu);
3025 	}
3026 }
3027 
3028 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3029 {
3030 	if (!sclp.has_ibs)
3031 		return;
3032 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3033 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3034 }
3035 
3036 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3037 {
3038 	int i, online_vcpus, started_vcpus = 0;
3039 
3040 	if (!is_vcpu_stopped(vcpu))
3041 		return;
3042 
3043 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3044 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3045 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3046 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3047 
3048 	for (i = 0; i < online_vcpus; i++) {
3049 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3050 			started_vcpus++;
3051 	}
3052 
3053 	if (started_vcpus == 0) {
3054 		/* we're the only active VCPU -> speed it up */
3055 		__enable_ibs_on_vcpu(vcpu);
3056 	} else if (started_vcpus == 1) {
3057 		/*
3058 		 * As we are starting a second VCPU, we have to disable
3059 		 * the IBS facility on all VCPUs to remove potentially
3060 		 * oustanding ENABLE requests.
3061 		 */
3062 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3063 	}
3064 
3065 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3066 	/*
3067 	 * Another VCPU might have used IBS while we were offline.
3068 	 * Let's play safe and flush the VCPU at startup.
3069 	 */
3070 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3071 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3072 	return;
3073 }
3074 
3075 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3076 {
3077 	int i, online_vcpus, started_vcpus = 0;
3078 	struct kvm_vcpu *started_vcpu = NULL;
3079 
3080 	if (is_vcpu_stopped(vcpu))
3081 		return;
3082 
3083 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3084 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3085 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3086 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3087 
3088 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3089 	kvm_s390_clear_stop_irq(vcpu);
3090 
3091 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3092 	__disable_ibs_on_vcpu(vcpu);
3093 
3094 	for (i = 0; i < online_vcpus; i++) {
3095 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3096 			started_vcpus++;
3097 			started_vcpu = vcpu->kvm->vcpus[i];
3098 		}
3099 	}
3100 
3101 	if (started_vcpus == 1) {
3102 		/*
3103 		 * As we only have one VCPU left, we want to enable the
3104 		 * IBS facility for that VCPU to speed it up.
3105 		 */
3106 		__enable_ibs_on_vcpu(started_vcpu);
3107 	}
3108 
3109 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3110 	return;
3111 }
3112 
3113 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3114 				     struct kvm_enable_cap *cap)
3115 {
3116 	int r;
3117 
3118 	if (cap->flags)
3119 		return -EINVAL;
3120 
3121 	switch (cap->cap) {
3122 	case KVM_CAP_S390_CSS_SUPPORT:
3123 		if (!vcpu->kvm->arch.css_support) {
3124 			vcpu->kvm->arch.css_support = 1;
3125 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3126 			trace_kvm_s390_enable_css(vcpu->kvm);
3127 		}
3128 		r = 0;
3129 		break;
3130 	default:
3131 		r = -EINVAL;
3132 		break;
3133 	}
3134 	return r;
3135 }
3136 
3137 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3138 				  struct kvm_s390_mem_op *mop)
3139 {
3140 	void __user *uaddr = (void __user *)mop->buf;
3141 	void *tmpbuf = NULL;
3142 	int r, srcu_idx;
3143 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3144 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3145 
3146 	if (mop->flags & ~supported_flags)
3147 		return -EINVAL;
3148 
3149 	if (mop->size > MEM_OP_MAX_SIZE)
3150 		return -E2BIG;
3151 
3152 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3153 		tmpbuf = vmalloc(mop->size);
3154 		if (!tmpbuf)
3155 			return -ENOMEM;
3156 	}
3157 
3158 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3159 
3160 	switch (mop->op) {
3161 	case KVM_S390_MEMOP_LOGICAL_READ:
3162 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3163 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3164 					    mop->size, GACC_FETCH);
3165 			break;
3166 		}
3167 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3168 		if (r == 0) {
3169 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3170 				r = -EFAULT;
3171 		}
3172 		break;
3173 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3174 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3175 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3176 					    mop->size, GACC_STORE);
3177 			break;
3178 		}
3179 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3180 			r = -EFAULT;
3181 			break;
3182 		}
3183 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3184 		break;
3185 	default:
3186 		r = -EINVAL;
3187 	}
3188 
3189 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3190 
3191 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3192 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3193 
3194 	vfree(tmpbuf);
3195 	return r;
3196 }
3197 
3198 long kvm_arch_vcpu_ioctl(struct file *filp,
3199 			 unsigned int ioctl, unsigned long arg)
3200 {
3201 	struct kvm_vcpu *vcpu = filp->private_data;
3202 	void __user *argp = (void __user *)arg;
3203 	int idx;
3204 	long r;
3205 
3206 	switch (ioctl) {
3207 	case KVM_S390_IRQ: {
3208 		struct kvm_s390_irq s390irq;
3209 
3210 		r = -EFAULT;
3211 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3212 			break;
3213 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3214 		break;
3215 	}
3216 	case KVM_S390_INTERRUPT: {
3217 		struct kvm_s390_interrupt s390int;
3218 		struct kvm_s390_irq s390irq;
3219 
3220 		r = -EFAULT;
3221 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3222 			break;
3223 		if (s390int_to_s390irq(&s390int, &s390irq))
3224 			return -EINVAL;
3225 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3226 		break;
3227 	}
3228 	case KVM_S390_STORE_STATUS:
3229 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3230 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3231 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3232 		break;
3233 	case KVM_S390_SET_INITIAL_PSW: {
3234 		psw_t psw;
3235 
3236 		r = -EFAULT;
3237 		if (copy_from_user(&psw, argp, sizeof(psw)))
3238 			break;
3239 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3240 		break;
3241 	}
3242 	case KVM_S390_INITIAL_RESET:
3243 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3244 		break;
3245 	case KVM_SET_ONE_REG:
3246 	case KVM_GET_ONE_REG: {
3247 		struct kvm_one_reg reg;
3248 		r = -EFAULT;
3249 		if (copy_from_user(&reg, argp, sizeof(reg)))
3250 			break;
3251 		if (ioctl == KVM_SET_ONE_REG)
3252 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3253 		else
3254 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3255 		break;
3256 	}
3257 #ifdef CONFIG_KVM_S390_UCONTROL
3258 	case KVM_S390_UCAS_MAP: {
3259 		struct kvm_s390_ucas_mapping ucasmap;
3260 
3261 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3262 			r = -EFAULT;
3263 			break;
3264 		}
3265 
3266 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3267 			r = -EINVAL;
3268 			break;
3269 		}
3270 
3271 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3272 				     ucasmap.vcpu_addr, ucasmap.length);
3273 		break;
3274 	}
3275 	case KVM_S390_UCAS_UNMAP: {
3276 		struct kvm_s390_ucas_mapping ucasmap;
3277 
3278 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3279 			r = -EFAULT;
3280 			break;
3281 		}
3282 
3283 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3284 			r = -EINVAL;
3285 			break;
3286 		}
3287 
3288 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3289 			ucasmap.length);
3290 		break;
3291 	}
3292 #endif
3293 	case KVM_S390_VCPU_FAULT: {
3294 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3295 		break;
3296 	}
3297 	case KVM_ENABLE_CAP:
3298 	{
3299 		struct kvm_enable_cap cap;
3300 		r = -EFAULT;
3301 		if (copy_from_user(&cap, argp, sizeof(cap)))
3302 			break;
3303 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3304 		break;
3305 	}
3306 	case KVM_S390_MEM_OP: {
3307 		struct kvm_s390_mem_op mem_op;
3308 
3309 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3310 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3311 		else
3312 			r = -EFAULT;
3313 		break;
3314 	}
3315 	case KVM_S390_SET_IRQ_STATE: {
3316 		struct kvm_s390_irq_state irq_state;
3317 
3318 		r = -EFAULT;
3319 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3320 			break;
3321 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3322 		    irq_state.len == 0 ||
3323 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3324 			r = -EINVAL;
3325 			break;
3326 		}
3327 		r = kvm_s390_set_irq_state(vcpu,
3328 					   (void __user *) irq_state.buf,
3329 					   irq_state.len);
3330 		break;
3331 	}
3332 	case KVM_S390_GET_IRQ_STATE: {
3333 		struct kvm_s390_irq_state irq_state;
3334 
3335 		r = -EFAULT;
3336 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3337 			break;
3338 		if (irq_state.len == 0) {
3339 			r = -EINVAL;
3340 			break;
3341 		}
3342 		r = kvm_s390_get_irq_state(vcpu,
3343 					   (__u8 __user *)  irq_state.buf,
3344 					   irq_state.len);
3345 		break;
3346 	}
3347 	default:
3348 		r = -ENOTTY;
3349 	}
3350 	return r;
3351 }
3352 
3353 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3354 {
3355 #ifdef CONFIG_KVM_S390_UCONTROL
3356 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3357 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3358 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3359 		get_page(vmf->page);
3360 		return 0;
3361 	}
3362 #endif
3363 	return VM_FAULT_SIGBUS;
3364 }
3365 
3366 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3367 			    unsigned long npages)
3368 {
3369 	return 0;
3370 }
3371 
3372 /* Section: memory related */
3373 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3374 				   struct kvm_memory_slot *memslot,
3375 				   const struct kvm_userspace_memory_region *mem,
3376 				   enum kvm_mr_change change)
3377 {
3378 	/* A few sanity checks. We can have memory slots which have to be
3379 	   located/ended at a segment boundary (1MB). The memory in userland is
3380 	   ok to be fragmented into various different vmas. It is okay to mmap()
3381 	   and munmap() stuff in this slot after doing this call at any time */
3382 
3383 	if (mem->userspace_addr & 0xffffful)
3384 		return -EINVAL;
3385 
3386 	if (mem->memory_size & 0xffffful)
3387 		return -EINVAL;
3388 
3389 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3390 		return -EINVAL;
3391 
3392 	return 0;
3393 }
3394 
3395 void kvm_arch_commit_memory_region(struct kvm *kvm,
3396 				const struct kvm_userspace_memory_region *mem,
3397 				const struct kvm_memory_slot *old,
3398 				const struct kvm_memory_slot *new,
3399 				enum kvm_mr_change change)
3400 {
3401 	int rc;
3402 
3403 	/* If the basics of the memslot do not change, we do not want
3404 	 * to update the gmap. Every update causes several unnecessary
3405 	 * segment translation exceptions. This is usually handled just
3406 	 * fine by the normal fault handler + gmap, but it will also
3407 	 * cause faults on the prefix page of running guest CPUs.
3408 	 */
3409 	if (old->userspace_addr == mem->userspace_addr &&
3410 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3411 	    old->npages * PAGE_SIZE == mem->memory_size)
3412 		return;
3413 
3414 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3415 		mem->guest_phys_addr, mem->memory_size);
3416 	if (rc)
3417 		pr_warn("failed to commit memory region\n");
3418 	return;
3419 }
3420 
3421 static inline unsigned long nonhyp_mask(int i)
3422 {
3423 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3424 
3425 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3426 }
3427 
3428 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3429 {
3430 	vcpu->valid_wakeup = false;
3431 }
3432 
3433 static int __init kvm_s390_init(void)
3434 {
3435 	int i;
3436 
3437 	if (!sclp.has_sief2) {
3438 		pr_info("SIE not available\n");
3439 		return -ENODEV;
3440 	}
3441 
3442 	for (i = 0; i < 16; i++)
3443 		kvm_s390_fac_list_mask[i] |=
3444 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3445 
3446 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3447 }
3448 
3449 static void __exit kvm_s390_exit(void)
3450 {
3451 	kvm_exit();
3452 }
3453 
3454 module_init(kvm_s390_init);
3455 module_exit(kvm_s390_exit);
3456 
3457 /*
3458  * Enable autoloading of the kvm module.
3459  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3460  * since x86 takes a different approach.
3461  */
3462 #include <linux/miscdevice.h>
3463 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3464 MODULE_ALIAS("devname:kvm");
3465