xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision ca90578000afb0d8f177ea36f7259a9c3640cf49)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31 
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45 
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53 
54 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
58 
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61 
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
64 	{ "exit_null", VCPU_STAT(exit_null) },
65 	{ "exit_validity", VCPU_STAT(exit_validity) },
66 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
67 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
68 	{ "exit_io_request", VCPU_STAT(exit_io_request) },
69 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
71 	{ "exit_pei", VCPU_STAT(exit_pei) },
72 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
79 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
81 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
82 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
83 	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
84 	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
85 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
87 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88 	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
89 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92 	{ "deliver_program", VCPU_STAT(deliver_program) },
93 	{ "deliver_io", VCPU_STAT(deliver_io) },
94 	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
95 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
96 	{ "inject_ckc", VCPU_STAT(inject_ckc) },
97 	{ "inject_cputm", VCPU_STAT(inject_cputm) },
98 	{ "inject_external_call", VCPU_STAT(inject_external_call) },
99 	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
100 	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
101 	{ "inject_io", VM_STAT(inject_io) },
102 	{ "inject_mchk", VCPU_STAT(inject_mchk) },
103 	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
104 	{ "inject_program", VCPU_STAT(inject_program) },
105 	{ "inject_restart", VCPU_STAT(inject_restart) },
106 	{ "inject_service_signal", VM_STAT(inject_service_signal) },
107 	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
108 	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
109 	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
110 	{ "inject_virtio", VM_STAT(inject_virtio) },
111 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
112 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
113 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
114 	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
115 	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
116 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
117 	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
118 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
119 	{ "instruction_sck", VCPU_STAT(instruction_sck) },
120 	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
121 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
122 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
123 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
124 	{ "instruction_iske", VCPU_STAT(instruction_iske) },
125 	{ "instruction_ri", VCPU_STAT(instruction_ri) },
126 	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
127 	{ "instruction_sske", VCPU_STAT(instruction_sske) },
128 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
129 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
130 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
131 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
132 	{ "instruction_tb", VCPU_STAT(instruction_tb) },
133 	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
134 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
135 	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
136 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
137 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
138 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
139 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
140 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
141 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
142 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
143 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
144 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
145 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
146 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
147 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
148 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
149 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
150 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
151 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
152 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
153 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
154 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
155 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
156 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
157 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
158 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
159 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
160 	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
161 	{ NULL }
162 };
163 
164 struct kvm_s390_tod_clock_ext {
165 	__u8 epoch_idx;
166 	__u64 tod;
167 	__u8 reserved[7];
168 } __packed;
169 
170 /* allow nested virtualization in KVM (if enabled by user space) */
171 static int nested;
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
174 
175 
176 /*
177  * For now we handle at most 16 double words as this is what the s390 base
178  * kernel handles and stores in the prefix page. If we ever need to go beyond
179  * this, this requires changes to code, but the external uapi can stay.
180  */
181 #define SIZE_INTERNAL 16
182 
183 /*
184  * Base feature mask that defines default mask for facilities. Consists of the
185  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
186  */
187 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
188 /*
189  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
190  * and defines the facilities that can be enabled via a cpu model.
191  */
192 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
193 
194 static unsigned long kvm_s390_fac_size(void)
195 {
196 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
197 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
198 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
199 		sizeof(S390_lowcore.stfle_fac_list));
200 
201 	return SIZE_INTERNAL;
202 }
203 
204 /* available cpu features supported by kvm */
205 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
206 /* available subfunctions indicated via query / "test bit" */
207 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
208 
209 static struct gmap_notifier gmap_notifier;
210 static struct gmap_notifier vsie_gmap_notifier;
211 debug_info_t *kvm_s390_dbf;
212 
213 /* Section: not file related */
214 int kvm_arch_hardware_enable(void)
215 {
216 	/* every s390 is virtualization enabled ;-) */
217 	return 0;
218 }
219 
220 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
221 			      unsigned long end);
222 
223 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
224 {
225 	u8 delta_idx = 0;
226 
227 	/*
228 	 * The TOD jumps by delta, we have to compensate this by adding
229 	 * -delta to the epoch.
230 	 */
231 	delta = -delta;
232 
233 	/* sign-extension - we're adding to signed values below */
234 	if ((s64)delta < 0)
235 		delta_idx = -1;
236 
237 	scb->epoch += delta;
238 	if (scb->ecd & ECD_MEF) {
239 		scb->epdx += delta_idx;
240 		if (scb->epoch < delta)
241 			scb->epdx += 1;
242 	}
243 }
244 
245 /*
246  * This callback is executed during stop_machine(). All CPUs are therefore
247  * temporarily stopped. In order not to change guest behavior, we have to
248  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
249  * so a CPU won't be stopped while calculating with the epoch.
250  */
251 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
252 			  void *v)
253 {
254 	struct kvm *kvm;
255 	struct kvm_vcpu *vcpu;
256 	int i;
257 	unsigned long long *delta = v;
258 
259 	list_for_each_entry(kvm, &vm_list, vm_list) {
260 		kvm_for_each_vcpu(i, vcpu, kvm) {
261 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
262 			if (i == 0) {
263 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
264 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
265 			}
266 			if (vcpu->arch.cputm_enabled)
267 				vcpu->arch.cputm_start += *delta;
268 			if (vcpu->arch.vsie_block)
269 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
270 						   *delta);
271 		}
272 	}
273 	return NOTIFY_OK;
274 }
275 
276 static struct notifier_block kvm_clock_notifier = {
277 	.notifier_call = kvm_clock_sync,
278 };
279 
280 int kvm_arch_hardware_setup(void)
281 {
282 	gmap_notifier.notifier_call = kvm_gmap_notifier;
283 	gmap_register_pte_notifier(&gmap_notifier);
284 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
285 	gmap_register_pte_notifier(&vsie_gmap_notifier);
286 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
287 				       &kvm_clock_notifier);
288 	return 0;
289 }
290 
291 void kvm_arch_hardware_unsetup(void)
292 {
293 	gmap_unregister_pte_notifier(&gmap_notifier);
294 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
295 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
296 					 &kvm_clock_notifier);
297 }
298 
299 static void allow_cpu_feat(unsigned long nr)
300 {
301 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
302 }
303 
304 static inline int plo_test_bit(unsigned char nr)
305 {
306 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
307 	int cc;
308 
309 	asm volatile(
310 		/* Parameter registers are ignored for "test bit" */
311 		"	plo	0,0,0,0(0)\n"
312 		"	ipm	%0\n"
313 		"	srl	%0,28\n"
314 		: "=d" (cc)
315 		: "d" (r0)
316 		: "cc");
317 	return cc == 0;
318 }
319 
320 static void kvm_s390_cpu_feat_init(void)
321 {
322 	int i;
323 
324 	for (i = 0; i < 256; ++i) {
325 		if (plo_test_bit(i))
326 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
327 	}
328 
329 	if (test_facility(28)) /* TOD-clock steering */
330 		ptff(kvm_s390_available_subfunc.ptff,
331 		     sizeof(kvm_s390_available_subfunc.ptff),
332 		     PTFF_QAF);
333 
334 	if (test_facility(17)) { /* MSA */
335 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
336 			      kvm_s390_available_subfunc.kmac);
337 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
338 			      kvm_s390_available_subfunc.kmc);
339 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
340 			      kvm_s390_available_subfunc.km);
341 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
342 			      kvm_s390_available_subfunc.kimd);
343 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
344 			      kvm_s390_available_subfunc.klmd);
345 	}
346 	if (test_facility(76)) /* MSA3 */
347 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
348 			      kvm_s390_available_subfunc.pckmo);
349 	if (test_facility(77)) { /* MSA4 */
350 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
351 			      kvm_s390_available_subfunc.kmctr);
352 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
353 			      kvm_s390_available_subfunc.kmf);
354 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
355 			      kvm_s390_available_subfunc.kmo);
356 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
357 			      kvm_s390_available_subfunc.pcc);
358 	}
359 	if (test_facility(57)) /* MSA5 */
360 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
361 			      kvm_s390_available_subfunc.ppno);
362 
363 	if (test_facility(146)) /* MSA8 */
364 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
365 			      kvm_s390_available_subfunc.kma);
366 
367 	if (MACHINE_HAS_ESOP)
368 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
369 	/*
370 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
371 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
372 	 */
373 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
374 	    !test_facility(3) || !nested)
375 		return;
376 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
377 	if (sclp.has_64bscao)
378 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
379 	if (sclp.has_siif)
380 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
381 	if (sclp.has_gpere)
382 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
383 	if (sclp.has_gsls)
384 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
385 	if (sclp.has_ib)
386 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
387 	if (sclp.has_cei)
388 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
389 	if (sclp.has_ibs)
390 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
391 	if (sclp.has_kss)
392 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
393 	/*
394 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
395 	 * all skey handling functions read/set the skey from the PGSTE
396 	 * instead of the real storage key.
397 	 *
398 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
399 	 * pages being detected as preserved although they are resident.
400 	 *
401 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
402 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
403 	 *
404 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
405 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
406 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
407 	 *
408 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
409 	 * cannot easily shadow the SCA because of the ipte lock.
410 	 */
411 }
412 
413 int kvm_arch_init(void *opaque)
414 {
415 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
416 	if (!kvm_s390_dbf)
417 		return -ENOMEM;
418 
419 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
420 		debug_unregister(kvm_s390_dbf);
421 		return -ENOMEM;
422 	}
423 
424 	kvm_s390_cpu_feat_init();
425 
426 	/* Register floating interrupt controller interface. */
427 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
428 }
429 
430 void kvm_arch_exit(void)
431 {
432 	debug_unregister(kvm_s390_dbf);
433 }
434 
435 /* Section: device related */
436 long kvm_arch_dev_ioctl(struct file *filp,
437 			unsigned int ioctl, unsigned long arg)
438 {
439 	if (ioctl == KVM_S390_ENABLE_SIE)
440 		return s390_enable_sie();
441 	return -EINVAL;
442 }
443 
444 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
445 {
446 	int r;
447 
448 	switch (ext) {
449 	case KVM_CAP_S390_PSW:
450 	case KVM_CAP_S390_GMAP:
451 	case KVM_CAP_SYNC_MMU:
452 #ifdef CONFIG_KVM_S390_UCONTROL
453 	case KVM_CAP_S390_UCONTROL:
454 #endif
455 	case KVM_CAP_ASYNC_PF:
456 	case KVM_CAP_SYNC_REGS:
457 	case KVM_CAP_ONE_REG:
458 	case KVM_CAP_ENABLE_CAP:
459 	case KVM_CAP_S390_CSS_SUPPORT:
460 	case KVM_CAP_IOEVENTFD:
461 	case KVM_CAP_DEVICE_CTRL:
462 	case KVM_CAP_ENABLE_CAP_VM:
463 	case KVM_CAP_S390_IRQCHIP:
464 	case KVM_CAP_VM_ATTRIBUTES:
465 	case KVM_CAP_MP_STATE:
466 	case KVM_CAP_IMMEDIATE_EXIT:
467 	case KVM_CAP_S390_INJECT_IRQ:
468 	case KVM_CAP_S390_USER_SIGP:
469 	case KVM_CAP_S390_USER_STSI:
470 	case KVM_CAP_S390_SKEYS:
471 	case KVM_CAP_S390_IRQ_STATE:
472 	case KVM_CAP_S390_USER_INSTR0:
473 	case KVM_CAP_S390_CMMA_MIGRATION:
474 	case KVM_CAP_S390_AIS:
475 	case KVM_CAP_S390_AIS_MIGRATION:
476 		r = 1;
477 		break;
478 	case KVM_CAP_S390_MEM_OP:
479 		r = MEM_OP_MAX_SIZE;
480 		break;
481 	case KVM_CAP_NR_VCPUS:
482 	case KVM_CAP_MAX_VCPUS:
483 		r = KVM_S390_BSCA_CPU_SLOTS;
484 		if (!kvm_s390_use_sca_entries())
485 			r = KVM_MAX_VCPUS;
486 		else if (sclp.has_esca && sclp.has_64bscao)
487 			r = KVM_S390_ESCA_CPU_SLOTS;
488 		break;
489 	case KVM_CAP_NR_MEMSLOTS:
490 		r = KVM_USER_MEM_SLOTS;
491 		break;
492 	case KVM_CAP_S390_COW:
493 		r = MACHINE_HAS_ESOP;
494 		break;
495 	case KVM_CAP_S390_VECTOR_REGISTERS:
496 		r = MACHINE_HAS_VX;
497 		break;
498 	case KVM_CAP_S390_RI:
499 		r = test_facility(64);
500 		break;
501 	case KVM_CAP_S390_GS:
502 		r = test_facility(133);
503 		break;
504 	case KVM_CAP_S390_BPB:
505 		r = test_facility(82);
506 		break;
507 	default:
508 		r = 0;
509 	}
510 	return r;
511 }
512 
513 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
514 					struct kvm_memory_slot *memslot)
515 {
516 	gfn_t cur_gfn, last_gfn;
517 	unsigned long address;
518 	struct gmap *gmap = kvm->arch.gmap;
519 
520 	/* Loop over all guest pages */
521 	last_gfn = memslot->base_gfn + memslot->npages;
522 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
523 		address = gfn_to_hva_memslot(memslot, cur_gfn);
524 
525 		if (test_and_clear_guest_dirty(gmap->mm, address))
526 			mark_page_dirty(kvm, cur_gfn);
527 		if (fatal_signal_pending(current))
528 			return;
529 		cond_resched();
530 	}
531 }
532 
533 /* Section: vm related */
534 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
535 
536 /*
537  * Get (and clear) the dirty memory log for a memory slot.
538  */
539 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
540 			       struct kvm_dirty_log *log)
541 {
542 	int r;
543 	unsigned long n;
544 	struct kvm_memslots *slots;
545 	struct kvm_memory_slot *memslot;
546 	int is_dirty = 0;
547 
548 	if (kvm_is_ucontrol(kvm))
549 		return -EINVAL;
550 
551 	mutex_lock(&kvm->slots_lock);
552 
553 	r = -EINVAL;
554 	if (log->slot >= KVM_USER_MEM_SLOTS)
555 		goto out;
556 
557 	slots = kvm_memslots(kvm);
558 	memslot = id_to_memslot(slots, log->slot);
559 	r = -ENOENT;
560 	if (!memslot->dirty_bitmap)
561 		goto out;
562 
563 	kvm_s390_sync_dirty_log(kvm, memslot);
564 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
565 	if (r)
566 		goto out;
567 
568 	/* Clear the dirty log */
569 	if (is_dirty) {
570 		n = kvm_dirty_bitmap_bytes(memslot);
571 		memset(memslot->dirty_bitmap, 0, n);
572 	}
573 	r = 0;
574 out:
575 	mutex_unlock(&kvm->slots_lock);
576 	return r;
577 }
578 
579 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
580 {
581 	unsigned int i;
582 	struct kvm_vcpu *vcpu;
583 
584 	kvm_for_each_vcpu(i, vcpu, kvm) {
585 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
586 	}
587 }
588 
589 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
590 {
591 	int r;
592 
593 	if (cap->flags)
594 		return -EINVAL;
595 
596 	switch (cap->cap) {
597 	case KVM_CAP_S390_IRQCHIP:
598 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
599 		kvm->arch.use_irqchip = 1;
600 		r = 0;
601 		break;
602 	case KVM_CAP_S390_USER_SIGP:
603 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
604 		kvm->arch.user_sigp = 1;
605 		r = 0;
606 		break;
607 	case KVM_CAP_S390_VECTOR_REGISTERS:
608 		mutex_lock(&kvm->lock);
609 		if (kvm->created_vcpus) {
610 			r = -EBUSY;
611 		} else if (MACHINE_HAS_VX) {
612 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
613 			set_kvm_facility(kvm->arch.model.fac_list, 129);
614 			if (test_facility(134)) {
615 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
616 				set_kvm_facility(kvm->arch.model.fac_list, 134);
617 			}
618 			if (test_facility(135)) {
619 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
620 				set_kvm_facility(kvm->arch.model.fac_list, 135);
621 			}
622 			r = 0;
623 		} else
624 			r = -EINVAL;
625 		mutex_unlock(&kvm->lock);
626 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
627 			 r ? "(not available)" : "(success)");
628 		break;
629 	case KVM_CAP_S390_RI:
630 		r = -EINVAL;
631 		mutex_lock(&kvm->lock);
632 		if (kvm->created_vcpus) {
633 			r = -EBUSY;
634 		} else if (test_facility(64)) {
635 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
636 			set_kvm_facility(kvm->arch.model.fac_list, 64);
637 			r = 0;
638 		}
639 		mutex_unlock(&kvm->lock);
640 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
641 			 r ? "(not available)" : "(success)");
642 		break;
643 	case KVM_CAP_S390_AIS:
644 		mutex_lock(&kvm->lock);
645 		if (kvm->created_vcpus) {
646 			r = -EBUSY;
647 		} else {
648 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
649 			set_kvm_facility(kvm->arch.model.fac_list, 72);
650 			r = 0;
651 		}
652 		mutex_unlock(&kvm->lock);
653 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
654 			 r ? "(not available)" : "(success)");
655 		break;
656 	case KVM_CAP_S390_GS:
657 		r = -EINVAL;
658 		mutex_lock(&kvm->lock);
659 		if (kvm->created_vcpus) {
660 			r = -EBUSY;
661 		} else if (test_facility(133)) {
662 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
663 			set_kvm_facility(kvm->arch.model.fac_list, 133);
664 			r = 0;
665 		}
666 		mutex_unlock(&kvm->lock);
667 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
668 			 r ? "(not available)" : "(success)");
669 		break;
670 	case KVM_CAP_S390_USER_STSI:
671 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
672 		kvm->arch.user_stsi = 1;
673 		r = 0;
674 		break;
675 	case KVM_CAP_S390_USER_INSTR0:
676 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
677 		kvm->arch.user_instr0 = 1;
678 		icpt_operexc_on_all_vcpus(kvm);
679 		r = 0;
680 		break;
681 	default:
682 		r = -EINVAL;
683 		break;
684 	}
685 	return r;
686 }
687 
688 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
689 {
690 	int ret;
691 
692 	switch (attr->attr) {
693 	case KVM_S390_VM_MEM_LIMIT_SIZE:
694 		ret = 0;
695 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
696 			 kvm->arch.mem_limit);
697 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
698 			ret = -EFAULT;
699 		break;
700 	default:
701 		ret = -ENXIO;
702 		break;
703 	}
704 	return ret;
705 }
706 
707 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
708 {
709 	int ret;
710 	unsigned int idx;
711 	switch (attr->attr) {
712 	case KVM_S390_VM_MEM_ENABLE_CMMA:
713 		ret = -ENXIO;
714 		if (!sclp.has_cmma)
715 			break;
716 
717 		ret = -EBUSY;
718 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
719 		mutex_lock(&kvm->lock);
720 		if (!kvm->created_vcpus) {
721 			kvm->arch.use_cmma = 1;
722 			/* Not compatible with cmma. */
723 			kvm->arch.use_pfmfi = 0;
724 			ret = 0;
725 		}
726 		mutex_unlock(&kvm->lock);
727 		break;
728 	case KVM_S390_VM_MEM_CLR_CMMA:
729 		ret = -ENXIO;
730 		if (!sclp.has_cmma)
731 			break;
732 		ret = -EINVAL;
733 		if (!kvm->arch.use_cmma)
734 			break;
735 
736 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
737 		mutex_lock(&kvm->lock);
738 		idx = srcu_read_lock(&kvm->srcu);
739 		s390_reset_cmma(kvm->arch.gmap->mm);
740 		srcu_read_unlock(&kvm->srcu, idx);
741 		mutex_unlock(&kvm->lock);
742 		ret = 0;
743 		break;
744 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
745 		unsigned long new_limit;
746 
747 		if (kvm_is_ucontrol(kvm))
748 			return -EINVAL;
749 
750 		if (get_user(new_limit, (u64 __user *)attr->addr))
751 			return -EFAULT;
752 
753 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
754 		    new_limit > kvm->arch.mem_limit)
755 			return -E2BIG;
756 
757 		if (!new_limit)
758 			return -EINVAL;
759 
760 		/* gmap_create takes last usable address */
761 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
762 			new_limit -= 1;
763 
764 		ret = -EBUSY;
765 		mutex_lock(&kvm->lock);
766 		if (!kvm->created_vcpus) {
767 			/* gmap_create will round the limit up */
768 			struct gmap *new = gmap_create(current->mm, new_limit);
769 
770 			if (!new) {
771 				ret = -ENOMEM;
772 			} else {
773 				gmap_remove(kvm->arch.gmap);
774 				new->private = kvm;
775 				kvm->arch.gmap = new;
776 				ret = 0;
777 			}
778 		}
779 		mutex_unlock(&kvm->lock);
780 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
781 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
782 			 (void *) kvm->arch.gmap->asce);
783 		break;
784 	}
785 	default:
786 		ret = -ENXIO;
787 		break;
788 	}
789 	return ret;
790 }
791 
792 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
793 
794 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
795 {
796 	struct kvm_vcpu *vcpu;
797 	int i;
798 
799 	if (!test_kvm_facility(kvm, 76))
800 		return -EINVAL;
801 
802 	mutex_lock(&kvm->lock);
803 	switch (attr->attr) {
804 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
805 		get_random_bytes(
806 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
807 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
808 		kvm->arch.crypto.aes_kw = 1;
809 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
810 		break;
811 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
812 		get_random_bytes(
813 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
814 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
815 		kvm->arch.crypto.dea_kw = 1;
816 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
817 		break;
818 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
819 		kvm->arch.crypto.aes_kw = 0;
820 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
821 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
822 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
823 		break;
824 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
825 		kvm->arch.crypto.dea_kw = 0;
826 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
827 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
828 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
829 		break;
830 	default:
831 		mutex_unlock(&kvm->lock);
832 		return -ENXIO;
833 	}
834 
835 	kvm_for_each_vcpu(i, vcpu, kvm) {
836 		kvm_s390_vcpu_crypto_setup(vcpu);
837 		exit_sie(vcpu);
838 	}
839 	mutex_unlock(&kvm->lock);
840 	return 0;
841 }
842 
843 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
844 {
845 	int cx;
846 	struct kvm_vcpu *vcpu;
847 
848 	kvm_for_each_vcpu(cx, vcpu, kvm)
849 		kvm_s390_sync_request(req, vcpu);
850 }
851 
852 /*
853  * Must be called with kvm->srcu held to avoid races on memslots, and with
854  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
855  */
856 static int kvm_s390_vm_start_migration(struct kvm *kvm)
857 {
858 	struct kvm_s390_migration_state *mgs;
859 	struct kvm_memory_slot *ms;
860 	/* should be the only one */
861 	struct kvm_memslots *slots;
862 	unsigned long ram_pages;
863 	int slotnr;
864 
865 	/* migration mode already enabled */
866 	if (kvm->arch.migration_state)
867 		return 0;
868 
869 	slots = kvm_memslots(kvm);
870 	if (!slots || !slots->used_slots)
871 		return -EINVAL;
872 
873 	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
874 	if (!mgs)
875 		return -ENOMEM;
876 	kvm->arch.migration_state = mgs;
877 
878 	if (kvm->arch.use_cmma) {
879 		/*
880 		 * Get the first slot. They are reverse sorted by base_gfn, so
881 		 * the first slot is also the one at the end of the address
882 		 * space. We have verified above that at least one slot is
883 		 * present.
884 		 */
885 		ms = slots->memslots;
886 		/* round up so we only use full longs */
887 		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
888 		/* allocate enough bytes to store all the bits */
889 		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
890 		if (!mgs->pgste_bitmap) {
891 			kfree(mgs);
892 			kvm->arch.migration_state = NULL;
893 			return -ENOMEM;
894 		}
895 
896 		mgs->bitmap_size = ram_pages;
897 		atomic64_set(&mgs->dirty_pages, ram_pages);
898 		/* mark all the pages in active slots as dirty */
899 		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
900 			ms = slots->memslots + slotnr;
901 			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
902 		}
903 
904 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
905 	}
906 	return 0;
907 }
908 
909 /*
910  * Must be called with kvm->slots_lock to avoid races with ourselves and
911  * kvm_s390_vm_start_migration.
912  */
913 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
914 {
915 	struct kvm_s390_migration_state *mgs;
916 
917 	/* migration mode already disabled */
918 	if (!kvm->arch.migration_state)
919 		return 0;
920 	mgs = kvm->arch.migration_state;
921 	kvm->arch.migration_state = NULL;
922 
923 	if (kvm->arch.use_cmma) {
924 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
925 		/* We have to wait for the essa emulation to finish */
926 		synchronize_srcu(&kvm->srcu);
927 		vfree(mgs->pgste_bitmap);
928 	}
929 	kfree(mgs);
930 	return 0;
931 }
932 
933 static int kvm_s390_vm_set_migration(struct kvm *kvm,
934 				     struct kvm_device_attr *attr)
935 {
936 	int res = -ENXIO;
937 
938 	mutex_lock(&kvm->slots_lock);
939 	switch (attr->attr) {
940 	case KVM_S390_VM_MIGRATION_START:
941 		res = kvm_s390_vm_start_migration(kvm);
942 		break;
943 	case KVM_S390_VM_MIGRATION_STOP:
944 		res = kvm_s390_vm_stop_migration(kvm);
945 		break;
946 	default:
947 		break;
948 	}
949 	mutex_unlock(&kvm->slots_lock);
950 
951 	return res;
952 }
953 
954 static int kvm_s390_vm_get_migration(struct kvm *kvm,
955 				     struct kvm_device_attr *attr)
956 {
957 	u64 mig = (kvm->arch.migration_state != NULL);
958 
959 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
960 		return -ENXIO;
961 
962 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
963 		return -EFAULT;
964 	return 0;
965 }
966 
967 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
968 {
969 	struct kvm_s390_vm_tod_clock gtod;
970 
971 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
972 		return -EFAULT;
973 
974 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
975 		return -EINVAL;
976 	kvm_s390_set_tod_clock(kvm, &gtod);
977 
978 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
979 		gtod.epoch_idx, gtod.tod);
980 
981 	return 0;
982 }
983 
984 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
985 {
986 	u8 gtod_high;
987 
988 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
989 					   sizeof(gtod_high)))
990 		return -EFAULT;
991 
992 	if (gtod_high != 0)
993 		return -EINVAL;
994 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
995 
996 	return 0;
997 }
998 
999 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1000 {
1001 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1002 
1003 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1004 			   sizeof(gtod.tod)))
1005 		return -EFAULT;
1006 
1007 	kvm_s390_set_tod_clock(kvm, &gtod);
1008 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1009 	return 0;
1010 }
1011 
1012 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1013 {
1014 	int ret;
1015 
1016 	if (attr->flags)
1017 		return -EINVAL;
1018 
1019 	switch (attr->attr) {
1020 	case KVM_S390_VM_TOD_EXT:
1021 		ret = kvm_s390_set_tod_ext(kvm, attr);
1022 		break;
1023 	case KVM_S390_VM_TOD_HIGH:
1024 		ret = kvm_s390_set_tod_high(kvm, attr);
1025 		break;
1026 	case KVM_S390_VM_TOD_LOW:
1027 		ret = kvm_s390_set_tod_low(kvm, attr);
1028 		break;
1029 	default:
1030 		ret = -ENXIO;
1031 		break;
1032 	}
1033 	return ret;
1034 }
1035 
1036 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
1037 					struct kvm_s390_vm_tod_clock *gtod)
1038 {
1039 	struct kvm_s390_tod_clock_ext htod;
1040 
1041 	preempt_disable();
1042 
1043 	get_tod_clock_ext((char *)&htod);
1044 
1045 	gtod->tod = htod.tod + kvm->arch.epoch;
1046 	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1047 
1048 	if (gtod->tod < htod.tod)
1049 		gtod->epoch_idx += 1;
1050 
1051 	preempt_enable();
1052 }
1053 
1054 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1055 {
1056 	struct kvm_s390_vm_tod_clock gtod;
1057 
1058 	memset(&gtod, 0, sizeof(gtod));
1059 
1060 	if (test_kvm_facility(kvm, 139))
1061 		kvm_s390_get_tod_clock_ext(kvm, &gtod);
1062 	else
1063 		gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
1064 
1065 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1066 		return -EFAULT;
1067 
1068 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1069 		gtod.epoch_idx, gtod.tod);
1070 	return 0;
1071 }
1072 
1073 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1074 {
1075 	u8 gtod_high = 0;
1076 
1077 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1078 					 sizeof(gtod_high)))
1079 		return -EFAULT;
1080 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1081 
1082 	return 0;
1083 }
1084 
1085 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1086 {
1087 	u64 gtod;
1088 
1089 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1090 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1091 		return -EFAULT;
1092 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1093 
1094 	return 0;
1095 }
1096 
1097 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1098 {
1099 	int ret;
1100 
1101 	if (attr->flags)
1102 		return -EINVAL;
1103 
1104 	switch (attr->attr) {
1105 	case KVM_S390_VM_TOD_EXT:
1106 		ret = kvm_s390_get_tod_ext(kvm, attr);
1107 		break;
1108 	case KVM_S390_VM_TOD_HIGH:
1109 		ret = kvm_s390_get_tod_high(kvm, attr);
1110 		break;
1111 	case KVM_S390_VM_TOD_LOW:
1112 		ret = kvm_s390_get_tod_low(kvm, attr);
1113 		break;
1114 	default:
1115 		ret = -ENXIO;
1116 		break;
1117 	}
1118 	return ret;
1119 }
1120 
1121 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123 	struct kvm_s390_vm_cpu_processor *proc;
1124 	u16 lowest_ibc, unblocked_ibc;
1125 	int ret = 0;
1126 
1127 	mutex_lock(&kvm->lock);
1128 	if (kvm->created_vcpus) {
1129 		ret = -EBUSY;
1130 		goto out;
1131 	}
1132 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1133 	if (!proc) {
1134 		ret = -ENOMEM;
1135 		goto out;
1136 	}
1137 	if (!copy_from_user(proc, (void __user *)attr->addr,
1138 			    sizeof(*proc))) {
1139 		kvm->arch.model.cpuid = proc->cpuid;
1140 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1141 		unblocked_ibc = sclp.ibc & 0xfff;
1142 		if (lowest_ibc && proc->ibc) {
1143 			if (proc->ibc > unblocked_ibc)
1144 				kvm->arch.model.ibc = unblocked_ibc;
1145 			else if (proc->ibc < lowest_ibc)
1146 				kvm->arch.model.ibc = lowest_ibc;
1147 			else
1148 				kvm->arch.model.ibc = proc->ibc;
1149 		}
1150 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1151 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1152 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1153 			 kvm->arch.model.ibc,
1154 			 kvm->arch.model.cpuid);
1155 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1156 			 kvm->arch.model.fac_list[0],
1157 			 kvm->arch.model.fac_list[1],
1158 			 kvm->arch.model.fac_list[2]);
1159 	} else
1160 		ret = -EFAULT;
1161 	kfree(proc);
1162 out:
1163 	mutex_unlock(&kvm->lock);
1164 	return ret;
1165 }
1166 
1167 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1168 				       struct kvm_device_attr *attr)
1169 {
1170 	struct kvm_s390_vm_cpu_feat data;
1171 
1172 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1173 		return -EFAULT;
1174 	if (!bitmap_subset((unsigned long *) data.feat,
1175 			   kvm_s390_available_cpu_feat,
1176 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1177 		return -EINVAL;
1178 
1179 	mutex_lock(&kvm->lock);
1180 	if (kvm->created_vcpus) {
1181 		mutex_unlock(&kvm->lock);
1182 		return -EBUSY;
1183 	}
1184 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1185 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1186 	mutex_unlock(&kvm->lock);
1187 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1188 			 data.feat[0],
1189 			 data.feat[1],
1190 			 data.feat[2]);
1191 	return 0;
1192 }
1193 
1194 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1195 					  struct kvm_device_attr *attr)
1196 {
1197 	/*
1198 	 * Once supported by kernel + hw, we have to store the subfunctions
1199 	 * in kvm->arch and remember that user space configured them.
1200 	 */
1201 	return -ENXIO;
1202 }
1203 
1204 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1205 {
1206 	int ret = -ENXIO;
1207 
1208 	switch (attr->attr) {
1209 	case KVM_S390_VM_CPU_PROCESSOR:
1210 		ret = kvm_s390_set_processor(kvm, attr);
1211 		break;
1212 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1213 		ret = kvm_s390_set_processor_feat(kvm, attr);
1214 		break;
1215 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1216 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1217 		break;
1218 	}
1219 	return ret;
1220 }
1221 
1222 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1223 {
1224 	struct kvm_s390_vm_cpu_processor *proc;
1225 	int ret = 0;
1226 
1227 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1228 	if (!proc) {
1229 		ret = -ENOMEM;
1230 		goto out;
1231 	}
1232 	proc->cpuid = kvm->arch.model.cpuid;
1233 	proc->ibc = kvm->arch.model.ibc;
1234 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1235 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1236 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1237 		 kvm->arch.model.ibc,
1238 		 kvm->arch.model.cpuid);
1239 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1240 		 kvm->arch.model.fac_list[0],
1241 		 kvm->arch.model.fac_list[1],
1242 		 kvm->arch.model.fac_list[2]);
1243 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1244 		ret = -EFAULT;
1245 	kfree(proc);
1246 out:
1247 	return ret;
1248 }
1249 
1250 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1251 {
1252 	struct kvm_s390_vm_cpu_machine *mach;
1253 	int ret = 0;
1254 
1255 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1256 	if (!mach) {
1257 		ret = -ENOMEM;
1258 		goto out;
1259 	}
1260 	get_cpu_id((struct cpuid *) &mach->cpuid);
1261 	mach->ibc = sclp.ibc;
1262 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1263 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1264 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1265 	       sizeof(S390_lowcore.stfle_fac_list));
1266 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1267 		 kvm->arch.model.ibc,
1268 		 kvm->arch.model.cpuid);
1269 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1270 		 mach->fac_mask[0],
1271 		 mach->fac_mask[1],
1272 		 mach->fac_mask[2]);
1273 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1274 		 mach->fac_list[0],
1275 		 mach->fac_list[1],
1276 		 mach->fac_list[2]);
1277 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1278 		ret = -EFAULT;
1279 	kfree(mach);
1280 out:
1281 	return ret;
1282 }
1283 
1284 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1285 				       struct kvm_device_attr *attr)
1286 {
1287 	struct kvm_s390_vm_cpu_feat data;
1288 
1289 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1290 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1291 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1292 		return -EFAULT;
1293 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1294 			 data.feat[0],
1295 			 data.feat[1],
1296 			 data.feat[2]);
1297 	return 0;
1298 }
1299 
1300 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1301 				     struct kvm_device_attr *attr)
1302 {
1303 	struct kvm_s390_vm_cpu_feat data;
1304 
1305 	bitmap_copy((unsigned long *) data.feat,
1306 		    kvm_s390_available_cpu_feat,
1307 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1308 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1309 		return -EFAULT;
1310 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1311 			 data.feat[0],
1312 			 data.feat[1],
1313 			 data.feat[2]);
1314 	return 0;
1315 }
1316 
1317 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1318 					  struct kvm_device_attr *attr)
1319 {
1320 	/*
1321 	 * Once we can actually configure subfunctions (kernel + hw support),
1322 	 * we have to check if they were already set by user space, if so copy
1323 	 * them from kvm->arch.
1324 	 */
1325 	return -ENXIO;
1326 }
1327 
1328 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1329 					struct kvm_device_attr *attr)
1330 {
1331 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1332 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1333 		return -EFAULT;
1334 	return 0;
1335 }
1336 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1337 {
1338 	int ret = -ENXIO;
1339 
1340 	switch (attr->attr) {
1341 	case KVM_S390_VM_CPU_PROCESSOR:
1342 		ret = kvm_s390_get_processor(kvm, attr);
1343 		break;
1344 	case KVM_S390_VM_CPU_MACHINE:
1345 		ret = kvm_s390_get_machine(kvm, attr);
1346 		break;
1347 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1348 		ret = kvm_s390_get_processor_feat(kvm, attr);
1349 		break;
1350 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1351 		ret = kvm_s390_get_machine_feat(kvm, attr);
1352 		break;
1353 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1354 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1355 		break;
1356 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1357 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1358 		break;
1359 	}
1360 	return ret;
1361 }
1362 
1363 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1364 {
1365 	int ret;
1366 
1367 	switch (attr->group) {
1368 	case KVM_S390_VM_MEM_CTRL:
1369 		ret = kvm_s390_set_mem_control(kvm, attr);
1370 		break;
1371 	case KVM_S390_VM_TOD:
1372 		ret = kvm_s390_set_tod(kvm, attr);
1373 		break;
1374 	case KVM_S390_VM_CPU_MODEL:
1375 		ret = kvm_s390_set_cpu_model(kvm, attr);
1376 		break;
1377 	case KVM_S390_VM_CRYPTO:
1378 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1379 		break;
1380 	case KVM_S390_VM_MIGRATION:
1381 		ret = kvm_s390_vm_set_migration(kvm, attr);
1382 		break;
1383 	default:
1384 		ret = -ENXIO;
1385 		break;
1386 	}
1387 
1388 	return ret;
1389 }
1390 
1391 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1392 {
1393 	int ret;
1394 
1395 	switch (attr->group) {
1396 	case KVM_S390_VM_MEM_CTRL:
1397 		ret = kvm_s390_get_mem_control(kvm, attr);
1398 		break;
1399 	case KVM_S390_VM_TOD:
1400 		ret = kvm_s390_get_tod(kvm, attr);
1401 		break;
1402 	case KVM_S390_VM_CPU_MODEL:
1403 		ret = kvm_s390_get_cpu_model(kvm, attr);
1404 		break;
1405 	case KVM_S390_VM_MIGRATION:
1406 		ret = kvm_s390_vm_get_migration(kvm, attr);
1407 		break;
1408 	default:
1409 		ret = -ENXIO;
1410 		break;
1411 	}
1412 
1413 	return ret;
1414 }
1415 
1416 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1417 {
1418 	int ret;
1419 
1420 	switch (attr->group) {
1421 	case KVM_S390_VM_MEM_CTRL:
1422 		switch (attr->attr) {
1423 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1424 		case KVM_S390_VM_MEM_CLR_CMMA:
1425 			ret = sclp.has_cmma ? 0 : -ENXIO;
1426 			break;
1427 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1428 			ret = 0;
1429 			break;
1430 		default:
1431 			ret = -ENXIO;
1432 			break;
1433 		}
1434 		break;
1435 	case KVM_S390_VM_TOD:
1436 		switch (attr->attr) {
1437 		case KVM_S390_VM_TOD_LOW:
1438 		case KVM_S390_VM_TOD_HIGH:
1439 			ret = 0;
1440 			break;
1441 		default:
1442 			ret = -ENXIO;
1443 			break;
1444 		}
1445 		break;
1446 	case KVM_S390_VM_CPU_MODEL:
1447 		switch (attr->attr) {
1448 		case KVM_S390_VM_CPU_PROCESSOR:
1449 		case KVM_S390_VM_CPU_MACHINE:
1450 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1451 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1452 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1453 			ret = 0;
1454 			break;
1455 		/* configuring subfunctions is not supported yet */
1456 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1457 		default:
1458 			ret = -ENXIO;
1459 			break;
1460 		}
1461 		break;
1462 	case KVM_S390_VM_CRYPTO:
1463 		switch (attr->attr) {
1464 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1465 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1466 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1467 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1468 			ret = 0;
1469 			break;
1470 		default:
1471 			ret = -ENXIO;
1472 			break;
1473 		}
1474 		break;
1475 	case KVM_S390_VM_MIGRATION:
1476 		ret = 0;
1477 		break;
1478 	default:
1479 		ret = -ENXIO;
1480 		break;
1481 	}
1482 
1483 	return ret;
1484 }
1485 
1486 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1487 {
1488 	uint8_t *keys;
1489 	uint64_t hva;
1490 	int srcu_idx, i, r = 0;
1491 
1492 	if (args->flags != 0)
1493 		return -EINVAL;
1494 
1495 	/* Is this guest using storage keys? */
1496 	if (!mm_use_skey(current->mm))
1497 		return KVM_S390_GET_SKEYS_NONE;
1498 
1499 	/* Enforce sane limit on memory allocation */
1500 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1501 		return -EINVAL;
1502 
1503 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1504 	if (!keys)
1505 		return -ENOMEM;
1506 
1507 	down_read(&current->mm->mmap_sem);
1508 	srcu_idx = srcu_read_lock(&kvm->srcu);
1509 	for (i = 0; i < args->count; i++) {
1510 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1511 		if (kvm_is_error_hva(hva)) {
1512 			r = -EFAULT;
1513 			break;
1514 		}
1515 
1516 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1517 		if (r)
1518 			break;
1519 	}
1520 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1521 	up_read(&current->mm->mmap_sem);
1522 
1523 	if (!r) {
1524 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1525 				 sizeof(uint8_t) * args->count);
1526 		if (r)
1527 			r = -EFAULT;
1528 	}
1529 
1530 	kvfree(keys);
1531 	return r;
1532 }
1533 
1534 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1535 {
1536 	uint8_t *keys;
1537 	uint64_t hva;
1538 	int srcu_idx, i, r = 0;
1539 
1540 	if (args->flags != 0)
1541 		return -EINVAL;
1542 
1543 	/* Enforce sane limit on memory allocation */
1544 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1545 		return -EINVAL;
1546 
1547 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1548 	if (!keys)
1549 		return -ENOMEM;
1550 
1551 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1552 			   sizeof(uint8_t) * args->count);
1553 	if (r) {
1554 		r = -EFAULT;
1555 		goto out;
1556 	}
1557 
1558 	/* Enable storage key handling for the guest */
1559 	r = s390_enable_skey();
1560 	if (r)
1561 		goto out;
1562 
1563 	down_read(&current->mm->mmap_sem);
1564 	srcu_idx = srcu_read_lock(&kvm->srcu);
1565 	for (i = 0; i < args->count; i++) {
1566 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1567 		if (kvm_is_error_hva(hva)) {
1568 			r = -EFAULT;
1569 			break;
1570 		}
1571 
1572 		/* Lowest order bit is reserved */
1573 		if (keys[i] & 0x01) {
1574 			r = -EINVAL;
1575 			break;
1576 		}
1577 
1578 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1579 		if (r)
1580 			break;
1581 	}
1582 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1583 	up_read(&current->mm->mmap_sem);
1584 out:
1585 	kvfree(keys);
1586 	return r;
1587 }
1588 
1589 /*
1590  * Base address and length must be sent at the start of each block, therefore
1591  * it's cheaper to send some clean data, as long as it's less than the size of
1592  * two longs.
1593  */
1594 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1595 /* for consistency */
1596 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1597 
1598 /*
1599  * This function searches for the next page with dirty CMMA attributes, and
1600  * saves the attributes in the buffer up to either the end of the buffer or
1601  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1602  * no trailing clean bytes are saved.
1603  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1604  * output buffer will indicate 0 as length.
1605  */
1606 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1607 				  struct kvm_s390_cmma_log *args)
1608 {
1609 	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1610 	unsigned long bufsize, hva, pgstev, i, next, cur;
1611 	int srcu_idx, peek, r = 0, rr;
1612 	u8 *res;
1613 
1614 	cur = args->start_gfn;
1615 	i = next = pgstev = 0;
1616 
1617 	if (unlikely(!kvm->arch.use_cmma))
1618 		return -ENXIO;
1619 	/* Invalid/unsupported flags were specified */
1620 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1621 		return -EINVAL;
1622 	/* Migration mode query, and we are not doing a migration */
1623 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1624 	if (!peek && !s)
1625 		return -EINVAL;
1626 	/* CMMA is disabled or was not used, or the buffer has length zero */
1627 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1628 	if (!bufsize || !kvm->mm->context.uses_cmm) {
1629 		memset(args, 0, sizeof(*args));
1630 		return 0;
1631 	}
1632 
1633 	if (!peek) {
1634 		/* We are not peeking, and there are no dirty pages */
1635 		if (!atomic64_read(&s->dirty_pages)) {
1636 			memset(args, 0, sizeof(*args));
1637 			return 0;
1638 		}
1639 		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1640 				    args->start_gfn);
1641 		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1642 			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1643 		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1644 			memset(args, 0, sizeof(*args));
1645 			return 0;
1646 		}
1647 		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1648 	}
1649 
1650 	res = vmalloc(bufsize);
1651 	if (!res)
1652 		return -ENOMEM;
1653 
1654 	args->start_gfn = cur;
1655 
1656 	down_read(&kvm->mm->mmap_sem);
1657 	srcu_idx = srcu_read_lock(&kvm->srcu);
1658 	while (i < bufsize) {
1659 		hva = gfn_to_hva(kvm, cur);
1660 		if (kvm_is_error_hva(hva)) {
1661 			r = -EFAULT;
1662 			break;
1663 		}
1664 		/* decrement only if we actually flipped the bit to 0 */
1665 		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1666 			atomic64_dec(&s->dirty_pages);
1667 		r = get_pgste(kvm->mm, hva, &pgstev);
1668 		if (r < 0)
1669 			pgstev = 0;
1670 		/* save the value */
1671 		res[i++] = (pgstev >> 24) & 0x43;
1672 		/*
1673 		 * if the next bit is too far away, stop.
1674 		 * if we reached the previous "next", find the next one
1675 		 */
1676 		if (!peek) {
1677 			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1678 				break;
1679 			if (cur == next)
1680 				next = find_next_bit(s->pgste_bitmap,
1681 						     s->bitmap_size, cur + 1);
1682 		/* reached the end of the bitmap or of the buffer, stop */
1683 			if ((next >= s->bitmap_size) ||
1684 			    (next >= args->start_gfn + bufsize))
1685 				break;
1686 		}
1687 		cur++;
1688 	}
1689 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1690 	up_read(&kvm->mm->mmap_sem);
1691 	args->count = i;
1692 	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1693 
1694 	rr = copy_to_user((void __user *)args->values, res, args->count);
1695 	if (rr)
1696 		r = -EFAULT;
1697 
1698 	vfree(res);
1699 	return r;
1700 }
1701 
1702 /*
1703  * This function sets the CMMA attributes for the given pages. If the input
1704  * buffer has zero length, no action is taken, otherwise the attributes are
1705  * set and the mm->context.uses_cmm flag is set.
1706  */
1707 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1708 				  const struct kvm_s390_cmma_log *args)
1709 {
1710 	unsigned long hva, mask, pgstev, i;
1711 	uint8_t *bits;
1712 	int srcu_idx, r = 0;
1713 
1714 	mask = args->mask;
1715 
1716 	if (!kvm->arch.use_cmma)
1717 		return -ENXIO;
1718 	/* invalid/unsupported flags */
1719 	if (args->flags != 0)
1720 		return -EINVAL;
1721 	/* Enforce sane limit on memory allocation */
1722 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1723 		return -EINVAL;
1724 	/* Nothing to do */
1725 	if (args->count == 0)
1726 		return 0;
1727 
1728 	bits = vmalloc(sizeof(*bits) * args->count);
1729 	if (!bits)
1730 		return -ENOMEM;
1731 
1732 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1733 	if (r) {
1734 		r = -EFAULT;
1735 		goto out;
1736 	}
1737 
1738 	down_read(&kvm->mm->mmap_sem);
1739 	srcu_idx = srcu_read_lock(&kvm->srcu);
1740 	for (i = 0; i < args->count; i++) {
1741 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1742 		if (kvm_is_error_hva(hva)) {
1743 			r = -EFAULT;
1744 			break;
1745 		}
1746 
1747 		pgstev = bits[i];
1748 		pgstev = pgstev << 24;
1749 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1750 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1751 	}
1752 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1753 	up_read(&kvm->mm->mmap_sem);
1754 
1755 	if (!kvm->mm->context.uses_cmm) {
1756 		down_write(&kvm->mm->mmap_sem);
1757 		kvm->mm->context.uses_cmm = 1;
1758 		up_write(&kvm->mm->mmap_sem);
1759 	}
1760 out:
1761 	vfree(bits);
1762 	return r;
1763 }
1764 
1765 long kvm_arch_vm_ioctl(struct file *filp,
1766 		       unsigned int ioctl, unsigned long arg)
1767 {
1768 	struct kvm *kvm = filp->private_data;
1769 	void __user *argp = (void __user *)arg;
1770 	struct kvm_device_attr attr;
1771 	int r;
1772 
1773 	switch (ioctl) {
1774 	case KVM_S390_INTERRUPT: {
1775 		struct kvm_s390_interrupt s390int;
1776 
1777 		r = -EFAULT;
1778 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1779 			break;
1780 		r = kvm_s390_inject_vm(kvm, &s390int);
1781 		break;
1782 	}
1783 	case KVM_ENABLE_CAP: {
1784 		struct kvm_enable_cap cap;
1785 		r = -EFAULT;
1786 		if (copy_from_user(&cap, argp, sizeof(cap)))
1787 			break;
1788 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1789 		break;
1790 	}
1791 	case KVM_CREATE_IRQCHIP: {
1792 		struct kvm_irq_routing_entry routing;
1793 
1794 		r = -EINVAL;
1795 		if (kvm->arch.use_irqchip) {
1796 			/* Set up dummy routing. */
1797 			memset(&routing, 0, sizeof(routing));
1798 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1799 		}
1800 		break;
1801 	}
1802 	case KVM_SET_DEVICE_ATTR: {
1803 		r = -EFAULT;
1804 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1805 			break;
1806 		r = kvm_s390_vm_set_attr(kvm, &attr);
1807 		break;
1808 	}
1809 	case KVM_GET_DEVICE_ATTR: {
1810 		r = -EFAULT;
1811 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1812 			break;
1813 		r = kvm_s390_vm_get_attr(kvm, &attr);
1814 		break;
1815 	}
1816 	case KVM_HAS_DEVICE_ATTR: {
1817 		r = -EFAULT;
1818 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1819 			break;
1820 		r = kvm_s390_vm_has_attr(kvm, &attr);
1821 		break;
1822 	}
1823 	case KVM_S390_GET_SKEYS: {
1824 		struct kvm_s390_skeys args;
1825 
1826 		r = -EFAULT;
1827 		if (copy_from_user(&args, argp,
1828 				   sizeof(struct kvm_s390_skeys)))
1829 			break;
1830 		r = kvm_s390_get_skeys(kvm, &args);
1831 		break;
1832 	}
1833 	case KVM_S390_SET_SKEYS: {
1834 		struct kvm_s390_skeys args;
1835 
1836 		r = -EFAULT;
1837 		if (copy_from_user(&args, argp,
1838 				   sizeof(struct kvm_s390_skeys)))
1839 			break;
1840 		r = kvm_s390_set_skeys(kvm, &args);
1841 		break;
1842 	}
1843 	case KVM_S390_GET_CMMA_BITS: {
1844 		struct kvm_s390_cmma_log args;
1845 
1846 		r = -EFAULT;
1847 		if (copy_from_user(&args, argp, sizeof(args)))
1848 			break;
1849 		mutex_lock(&kvm->slots_lock);
1850 		r = kvm_s390_get_cmma_bits(kvm, &args);
1851 		mutex_unlock(&kvm->slots_lock);
1852 		if (!r) {
1853 			r = copy_to_user(argp, &args, sizeof(args));
1854 			if (r)
1855 				r = -EFAULT;
1856 		}
1857 		break;
1858 	}
1859 	case KVM_S390_SET_CMMA_BITS: {
1860 		struct kvm_s390_cmma_log args;
1861 
1862 		r = -EFAULT;
1863 		if (copy_from_user(&args, argp, sizeof(args)))
1864 			break;
1865 		mutex_lock(&kvm->slots_lock);
1866 		r = kvm_s390_set_cmma_bits(kvm, &args);
1867 		mutex_unlock(&kvm->slots_lock);
1868 		break;
1869 	}
1870 	default:
1871 		r = -ENOTTY;
1872 	}
1873 
1874 	return r;
1875 }
1876 
1877 static int kvm_s390_query_ap_config(u8 *config)
1878 {
1879 	u32 fcn_code = 0x04000000UL;
1880 	u32 cc = 0;
1881 
1882 	memset(config, 0, 128);
1883 	asm volatile(
1884 		"lgr 0,%1\n"
1885 		"lgr 2,%2\n"
1886 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1887 		"0: ipm %0\n"
1888 		"srl %0,28\n"
1889 		"1:\n"
1890 		EX_TABLE(0b, 1b)
1891 		: "+r" (cc)
1892 		: "r" (fcn_code), "r" (config)
1893 		: "cc", "0", "2", "memory"
1894 	);
1895 
1896 	return cc;
1897 }
1898 
1899 static int kvm_s390_apxa_installed(void)
1900 {
1901 	u8 config[128];
1902 	int cc;
1903 
1904 	if (test_facility(12)) {
1905 		cc = kvm_s390_query_ap_config(config);
1906 
1907 		if (cc)
1908 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1909 		else
1910 			return config[0] & 0x40;
1911 	}
1912 
1913 	return 0;
1914 }
1915 
1916 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1917 {
1918 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1919 
1920 	if (kvm_s390_apxa_installed())
1921 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1922 	else
1923 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1924 }
1925 
1926 static u64 kvm_s390_get_initial_cpuid(void)
1927 {
1928 	struct cpuid cpuid;
1929 
1930 	get_cpu_id(&cpuid);
1931 	cpuid.version = 0xff;
1932 	return *((u64 *) &cpuid);
1933 }
1934 
1935 static void kvm_s390_crypto_init(struct kvm *kvm)
1936 {
1937 	if (!test_kvm_facility(kvm, 76))
1938 		return;
1939 
1940 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1941 	kvm_s390_set_crycb_format(kvm);
1942 
1943 	/* Enable AES/DEA protected key functions by default */
1944 	kvm->arch.crypto.aes_kw = 1;
1945 	kvm->arch.crypto.dea_kw = 1;
1946 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1947 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1948 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1949 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1950 }
1951 
1952 static void sca_dispose(struct kvm *kvm)
1953 {
1954 	if (kvm->arch.use_esca)
1955 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1956 	else
1957 		free_page((unsigned long)(kvm->arch.sca));
1958 	kvm->arch.sca = NULL;
1959 }
1960 
1961 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1962 {
1963 	gfp_t alloc_flags = GFP_KERNEL;
1964 	int i, rc;
1965 	char debug_name[16];
1966 	static unsigned long sca_offset;
1967 
1968 	rc = -EINVAL;
1969 #ifdef CONFIG_KVM_S390_UCONTROL
1970 	if (type & ~KVM_VM_S390_UCONTROL)
1971 		goto out_err;
1972 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1973 		goto out_err;
1974 #else
1975 	if (type)
1976 		goto out_err;
1977 #endif
1978 
1979 	rc = s390_enable_sie();
1980 	if (rc)
1981 		goto out_err;
1982 
1983 	rc = -ENOMEM;
1984 
1985 	kvm->arch.use_esca = 0; /* start with basic SCA */
1986 	if (!sclp.has_64bscao)
1987 		alloc_flags |= GFP_DMA;
1988 	rwlock_init(&kvm->arch.sca_lock);
1989 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1990 	if (!kvm->arch.sca)
1991 		goto out_err;
1992 	spin_lock(&kvm_lock);
1993 	sca_offset += 16;
1994 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1995 		sca_offset = 0;
1996 	kvm->arch.sca = (struct bsca_block *)
1997 			((char *) kvm->arch.sca + sca_offset);
1998 	spin_unlock(&kvm_lock);
1999 
2000 	sprintf(debug_name, "kvm-%u", current->pid);
2001 
2002 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2003 	if (!kvm->arch.dbf)
2004 		goto out_err;
2005 
2006 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2007 	kvm->arch.sie_page2 =
2008 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2009 	if (!kvm->arch.sie_page2)
2010 		goto out_err;
2011 
2012 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2013 
2014 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2015 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2016 					      (kvm_s390_fac_base[i] |
2017 					       kvm_s390_fac_ext[i]);
2018 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2019 					      kvm_s390_fac_base[i];
2020 	}
2021 
2022 	/* we are always in czam mode - even on pre z14 machines */
2023 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2024 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2025 	/* we emulate STHYI in kvm */
2026 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2027 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2028 	if (MACHINE_HAS_TLB_GUEST) {
2029 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2030 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2031 	}
2032 
2033 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2034 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2035 
2036 	kvm_s390_crypto_init(kvm);
2037 
2038 	mutex_init(&kvm->arch.float_int.ais_lock);
2039 	kvm->arch.float_int.simm = 0;
2040 	kvm->arch.float_int.nimm = 0;
2041 	spin_lock_init(&kvm->arch.float_int.lock);
2042 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2043 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2044 	init_waitqueue_head(&kvm->arch.ipte_wq);
2045 	mutex_init(&kvm->arch.ipte_mutex);
2046 
2047 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2048 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2049 
2050 	if (type & KVM_VM_S390_UCONTROL) {
2051 		kvm->arch.gmap = NULL;
2052 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2053 	} else {
2054 		if (sclp.hamax == U64_MAX)
2055 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2056 		else
2057 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2058 						    sclp.hamax + 1);
2059 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2060 		if (!kvm->arch.gmap)
2061 			goto out_err;
2062 		kvm->arch.gmap->private = kvm;
2063 		kvm->arch.gmap->pfault_enabled = 0;
2064 	}
2065 
2066 	kvm->arch.css_support = 0;
2067 	kvm->arch.use_irqchip = 0;
2068 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2069 	kvm->arch.epoch = 0;
2070 
2071 	spin_lock_init(&kvm->arch.start_stop_lock);
2072 	kvm_s390_vsie_init(kvm);
2073 	kvm_s390_gisa_init(kvm);
2074 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2075 
2076 	return 0;
2077 out_err:
2078 	free_page((unsigned long)kvm->arch.sie_page2);
2079 	debug_unregister(kvm->arch.dbf);
2080 	sca_dispose(kvm);
2081 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2082 	return rc;
2083 }
2084 
2085 bool kvm_arch_has_vcpu_debugfs(void)
2086 {
2087 	return false;
2088 }
2089 
2090 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2091 {
2092 	return 0;
2093 }
2094 
2095 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2096 {
2097 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2098 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2099 	kvm_s390_clear_local_irqs(vcpu);
2100 	kvm_clear_async_pf_completion_queue(vcpu);
2101 	if (!kvm_is_ucontrol(vcpu->kvm))
2102 		sca_del_vcpu(vcpu);
2103 
2104 	if (kvm_is_ucontrol(vcpu->kvm))
2105 		gmap_remove(vcpu->arch.gmap);
2106 
2107 	if (vcpu->kvm->arch.use_cmma)
2108 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2109 	free_page((unsigned long)(vcpu->arch.sie_block));
2110 
2111 	kvm_vcpu_uninit(vcpu);
2112 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2113 }
2114 
2115 static void kvm_free_vcpus(struct kvm *kvm)
2116 {
2117 	unsigned int i;
2118 	struct kvm_vcpu *vcpu;
2119 
2120 	kvm_for_each_vcpu(i, vcpu, kvm)
2121 		kvm_arch_vcpu_destroy(vcpu);
2122 
2123 	mutex_lock(&kvm->lock);
2124 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2125 		kvm->vcpus[i] = NULL;
2126 
2127 	atomic_set(&kvm->online_vcpus, 0);
2128 	mutex_unlock(&kvm->lock);
2129 }
2130 
2131 void kvm_arch_destroy_vm(struct kvm *kvm)
2132 {
2133 	kvm_free_vcpus(kvm);
2134 	sca_dispose(kvm);
2135 	debug_unregister(kvm->arch.dbf);
2136 	kvm_s390_gisa_destroy(kvm);
2137 	free_page((unsigned long)kvm->arch.sie_page2);
2138 	if (!kvm_is_ucontrol(kvm))
2139 		gmap_remove(kvm->arch.gmap);
2140 	kvm_s390_destroy_adapters(kvm);
2141 	kvm_s390_clear_float_irqs(kvm);
2142 	kvm_s390_vsie_destroy(kvm);
2143 	if (kvm->arch.migration_state) {
2144 		vfree(kvm->arch.migration_state->pgste_bitmap);
2145 		kfree(kvm->arch.migration_state);
2146 	}
2147 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2148 }
2149 
2150 /* Section: vcpu related */
2151 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2152 {
2153 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2154 	if (!vcpu->arch.gmap)
2155 		return -ENOMEM;
2156 	vcpu->arch.gmap->private = vcpu->kvm;
2157 
2158 	return 0;
2159 }
2160 
2161 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2162 {
2163 	if (!kvm_s390_use_sca_entries())
2164 		return;
2165 	read_lock(&vcpu->kvm->arch.sca_lock);
2166 	if (vcpu->kvm->arch.use_esca) {
2167 		struct esca_block *sca = vcpu->kvm->arch.sca;
2168 
2169 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2170 		sca->cpu[vcpu->vcpu_id].sda = 0;
2171 	} else {
2172 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2173 
2174 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2175 		sca->cpu[vcpu->vcpu_id].sda = 0;
2176 	}
2177 	read_unlock(&vcpu->kvm->arch.sca_lock);
2178 }
2179 
2180 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2181 {
2182 	if (!kvm_s390_use_sca_entries()) {
2183 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2184 
2185 		/* we still need the basic sca for the ipte control */
2186 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2187 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2188 		return;
2189 	}
2190 	read_lock(&vcpu->kvm->arch.sca_lock);
2191 	if (vcpu->kvm->arch.use_esca) {
2192 		struct esca_block *sca = vcpu->kvm->arch.sca;
2193 
2194 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2195 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2196 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2197 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2198 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2199 	} else {
2200 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2201 
2202 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2203 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2204 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2205 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2206 	}
2207 	read_unlock(&vcpu->kvm->arch.sca_lock);
2208 }
2209 
2210 /* Basic SCA to Extended SCA data copy routines */
2211 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2212 {
2213 	d->sda = s->sda;
2214 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2215 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2216 }
2217 
2218 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2219 {
2220 	int i;
2221 
2222 	d->ipte_control = s->ipte_control;
2223 	d->mcn[0] = s->mcn;
2224 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2225 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2226 }
2227 
2228 static int sca_switch_to_extended(struct kvm *kvm)
2229 {
2230 	struct bsca_block *old_sca = kvm->arch.sca;
2231 	struct esca_block *new_sca;
2232 	struct kvm_vcpu *vcpu;
2233 	unsigned int vcpu_idx;
2234 	u32 scaol, scaoh;
2235 
2236 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2237 	if (!new_sca)
2238 		return -ENOMEM;
2239 
2240 	scaoh = (u32)((u64)(new_sca) >> 32);
2241 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2242 
2243 	kvm_s390_vcpu_block_all(kvm);
2244 	write_lock(&kvm->arch.sca_lock);
2245 
2246 	sca_copy_b_to_e(new_sca, old_sca);
2247 
2248 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2249 		vcpu->arch.sie_block->scaoh = scaoh;
2250 		vcpu->arch.sie_block->scaol = scaol;
2251 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2252 	}
2253 	kvm->arch.sca = new_sca;
2254 	kvm->arch.use_esca = 1;
2255 
2256 	write_unlock(&kvm->arch.sca_lock);
2257 	kvm_s390_vcpu_unblock_all(kvm);
2258 
2259 	free_page((unsigned long)old_sca);
2260 
2261 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2262 		 old_sca, kvm->arch.sca);
2263 	return 0;
2264 }
2265 
2266 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2267 {
2268 	int rc;
2269 
2270 	if (!kvm_s390_use_sca_entries()) {
2271 		if (id < KVM_MAX_VCPUS)
2272 			return true;
2273 		return false;
2274 	}
2275 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2276 		return true;
2277 	if (!sclp.has_esca || !sclp.has_64bscao)
2278 		return false;
2279 
2280 	mutex_lock(&kvm->lock);
2281 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2282 	mutex_unlock(&kvm->lock);
2283 
2284 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2285 }
2286 
2287 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2288 {
2289 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2290 	kvm_clear_async_pf_completion_queue(vcpu);
2291 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2292 				    KVM_SYNC_GPRS |
2293 				    KVM_SYNC_ACRS |
2294 				    KVM_SYNC_CRS |
2295 				    KVM_SYNC_ARCH0 |
2296 				    KVM_SYNC_PFAULT;
2297 	kvm_s390_set_prefix(vcpu, 0);
2298 	if (test_kvm_facility(vcpu->kvm, 64))
2299 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2300 	if (test_kvm_facility(vcpu->kvm, 82))
2301 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2302 	if (test_kvm_facility(vcpu->kvm, 133))
2303 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2304 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2305 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2306 	 */
2307 	if (MACHINE_HAS_VX)
2308 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2309 	else
2310 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2311 
2312 	if (kvm_is_ucontrol(vcpu->kvm))
2313 		return __kvm_ucontrol_vcpu_init(vcpu);
2314 
2315 	return 0;
2316 }
2317 
2318 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2319 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2320 {
2321 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2322 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2323 	vcpu->arch.cputm_start = get_tod_clock_fast();
2324 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2325 }
2326 
2327 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2328 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2329 {
2330 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2331 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2332 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2333 	vcpu->arch.cputm_start = 0;
2334 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2335 }
2336 
2337 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2338 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2339 {
2340 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2341 	vcpu->arch.cputm_enabled = true;
2342 	__start_cpu_timer_accounting(vcpu);
2343 }
2344 
2345 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2346 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2347 {
2348 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2349 	__stop_cpu_timer_accounting(vcpu);
2350 	vcpu->arch.cputm_enabled = false;
2351 }
2352 
2353 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2354 {
2355 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2356 	__enable_cpu_timer_accounting(vcpu);
2357 	preempt_enable();
2358 }
2359 
2360 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2361 {
2362 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2363 	__disable_cpu_timer_accounting(vcpu);
2364 	preempt_enable();
2365 }
2366 
2367 /* set the cpu timer - may only be called from the VCPU thread itself */
2368 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2369 {
2370 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2371 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2372 	if (vcpu->arch.cputm_enabled)
2373 		vcpu->arch.cputm_start = get_tod_clock_fast();
2374 	vcpu->arch.sie_block->cputm = cputm;
2375 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2376 	preempt_enable();
2377 }
2378 
2379 /* update and get the cpu timer - can also be called from other VCPU threads */
2380 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2381 {
2382 	unsigned int seq;
2383 	__u64 value;
2384 
2385 	if (unlikely(!vcpu->arch.cputm_enabled))
2386 		return vcpu->arch.sie_block->cputm;
2387 
2388 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2389 	do {
2390 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2391 		/*
2392 		 * If the writer would ever execute a read in the critical
2393 		 * section, e.g. in irq context, we have a deadlock.
2394 		 */
2395 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2396 		value = vcpu->arch.sie_block->cputm;
2397 		/* if cputm_start is 0, accounting is being started/stopped */
2398 		if (likely(vcpu->arch.cputm_start))
2399 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2400 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2401 	preempt_enable();
2402 	return value;
2403 }
2404 
2405 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2406 {
2407 
2408 	gmap_enable(vcpu->arch.enabled_gmap);
2409 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2410 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2411 		__start_cpu_timer_accounting(vcpu);
2412 	vcpu->cpu = cpu;
2413 }
2414 
2415 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2416 {
2417 	vcpu->cpu = -1;
2418 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2419 		__stop_cpu_timer_accounting(vcpu);
2420 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2421 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2422 	gmap_disable(vcpu->arch.enabled_gmap);
2423 
2424 }
2425 
2426 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2427 {
2428 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2429 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2430 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2431 	kvm_s390_set_prefix(vcpu, 0);
2432 	kvm_s390_set_cpu_timer(vcpu, 0);
2433 	vcpu->arch.sie_block->ckc       = 0UL;
2434 	vcpu->arch.sie_block->todpr     = 0;
2435 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2436 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2437 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2438 	/* make sure the new fpc will be lazily loaded */
2439 	save_fpu_regs();
2440 	current->thread.fpu.fpc = 0;
2441 	vcpu->arch.sie_block->gbea = 1;
2442 	vcpu->arch.sie_block->pp = 0;
2443 	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2444 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2445 	kvm_clear_async_pf_completion_queue(vcpu);
2446 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2447 		kvm_s390_vcpu_stop(vcpu);
2448 	kvm_s390_clear_local_irqs(vcpu);
2449 }
2450 
2451 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2452 {
2453 	mutex_lock(&vcpu->kvm->lock);
2454 	preempt_disable();
2455 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2456 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2457 	preempt_enable();
2458 	mutex_unlock(&vcpu->kvm->lock);
2459 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2460 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2461 		sca_add_vcpu(vcpu);
2462 	}
2463 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2464 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2465 	/* make vcpu_load load the right gmap on the first trigger */
2466 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2467 }
2468 
2469 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2470 {
2471 	if (!test_kvm_facility(vcpu->kvm, 76))
2472 		return;
2473 
2474 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2475 
2476 	if (vcpu->kvm->arch.crypto.aes_kw)
2477 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2478 	if (vcpu->kvm->arch.crypto.dea_kw)
2479 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2480 
2481 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2482 }
2483 
2484 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2485 {
2486 	free_page(vcpu->arch.sie_block->cbrlo);
2487 	vcpu->arch.sie_block->cbrlo = 0;
2488 }
2489 
2490 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2491 {
2492 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2493 	if (!vcpu->arch.sie_block->cbrlo)
2494 		return -ENOMEM;
2495 	return 0;
2496 }
2497 
2498 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2499 {
2500 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2501 
2502 	vcpu->arch.sie_block->ibc = model->ibc;
2503 	if (test_kvm_facility(vcpu->kvm, 7))
2504 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2505 }
2506 
2507 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2508 {
2509 	int rc = 0;
2510 
2511 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2512 						    CPUSTAT_SM |
2513 						    CPUSTAT_STOPPED);
2514 
2515 	if (test_kvm_facility(vcpu->kvm, 78))
2516 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2517 	else if (test_kvm_facility(vcpu->kvm, 8))
2518 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2519 
2520 	kvm_s390_vcpu_setup_model(vcpu);
2521 
2522 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2523 	if (MACHINE_HAS_ESOP)
2524 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2525 	if (test_kvm_facility(vcpu->kvm, 9))
2526 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2527 	if (test_kvm_facility(vcpu->kvm, 73))
2528 		vcpu->arch.sie_block->ecb |= ECB_TE;
2529 
2530 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2531 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2532 	if (test_kvm_facility(vcpu->kvm, 130))
2533 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2534 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2535 	if (sclp.has_cei)
2536 		vcpu->arch.sie_block->eca |= ECA_CEI;
2537 	if (sclp.has_ib)
2538 		vcpu->arch.sie_block->eca |= ECA_IB;
2539 	if (sclp.has_siif)
2540 		vcpu->arch.sie_block->eca |= ECA_SII;
2541 	if (sclp.has_sigpif)
2542 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2543 	if (test_kvm_facility(vcpu->kvm, 129)) {
2544 		vcpu->arch.sie_block->eca |= ECA_VX;
2545 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2546 	}
2547 	if (test_kvm_facility(vcpu->kvm, 139))
2548 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2549 
2550 	if (vcpu->arch.sie_block->gd) {
2551 		vcpu->arch.sie_block->eca |= ECA_AIV;
2552 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2553 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2554 	}
2555 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2556 					| SDNXC;
2557 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2558 
2559 	if (sclp.has_kss)
2560 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2561 	else
2562 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2563 
2564 	if (vcpu->kvm->arch.use_cmma) {
2565 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2566 		if (rc)
2567 			return rc;
2568 	}
2569 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2570 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2571 
2572 	kvm_s390_vcpu_crypto_setup(vcpu);
2573 
2574 	return rc;
2575 }
2576 
2577 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2578 				      unsigned int id)
2579 {
2580 	struct kvm_vcpu *vcpu;
2581 	struct sie_page *sie_page;
2582 	int rc = -EINVAL;
2583 
2584 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2585 		goto out;
2586 
2587 	rc = -ENOMEM;
2588 
2589 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2590 	if (!vcpu)
2591 		goto out;
2592 
2593 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2594 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2595 	if (!sie_page)
2596 		goto out_free_cpu;
2597 
2598 	vcpu->arch.sie_block = &sie_page->sie_block;
2599 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2600 
2601 	/* the real guest size will always be smaller than msl */
2602 	vcpu->arch.sie_block->mso = 0;
2603 	vcpu->arch.sie_block->msl = sclp.hamax;
2604 
2605 	vcpu->arch.sie_block->icpua = id;
2606 	spin_lock_init(&vcpu->arch.local_int.lock);
2607 	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2608 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2609 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2610 	seqcount_init(&vcpu->arch.cputm_seqcount);
2611 
2612 	rc = kvm_vcpu_init(vcpu, kvm, id);
2613 	if (rc)
2614 		goto out_free_sie_block;
2615 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2616 		 vcpu->arch.sie_block);
2617 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2618 
2619 	return vcpu;
2620 out_free_sie_block:
2621 	free_page((unsigned long)(vcpu->arch.sie_block));
2622 out_free_cpu:
2623 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2624 out:
2625 	return ERR_PTR(rc);
2626 }
2627 
2628 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2629 {
2630 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2631 }
2632 
2633 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2634 {
2635 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2636 }
2637 
2638 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2639 {
2640 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2641 	exit_sie(vcpu);
2642 }
2643 
2644 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2645 {
2646 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2647 }
2648 
2649 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2650 {
2651 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2652 	exit_sie(vcpu);
2653 }
2654 
2655 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2656 {
2657 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2658 }
2659 
2660 /*
2661  * Kick a guest cpu out of SIE and wait until SIE is not running.
2662  * If the CPU is not running (e.g. waiting as idle) the function will
2663  * return immediately. */
2664 void exit_sie(struct kvm_vcpu *vcpu)
2665 {
2666 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2667 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2668 		cpu_relax();
2669 }
2670 
2671 /* Kick a guest cpu out of SIE to process a request synchronously */
2672 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2673 {
2674 	kvm_make_request(req, vcpu);
2675 	kvm_s390_vcpu_request(vcpu);
2676 }
2677 
2678 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2679 			      unsigned long end)
2680 {
2681 	struct kvm *kvm = gmap->private;
2682 	struct kvm_vcpu *vcpu;
2683 	unsigned long prefix;
2684 	int i;
2685 
2686 	if (gmap_is_shadow(gmap))
2687 		return;
2688 	if (start >= 1UL << 31)
2689 		/* We are only interested in prefix pages */
2690 		return;
2691 	kvm_for_each_vcpu(i, vcpu, kvm) {
2692 		/* match against both prefix pages */
2693 		prefix = kvm_s390_get_prefix(vcpu);
2694 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2695 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2696 				   start, end);
2697 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2698 		}
2699 	}
2700 }
2701 
2702 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2703 {
2704 	/* kvm common code refers to this, but never calls it */
2705 	BUG();
2706 	return 0;
2707 }
2708 
2709 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2710 					   struct kvm_one_reg *reg)
2711 {
2712 	int r = -EINVAL;
2713 
2714 	switch (reg->id) {
2715 	case KVM_REG_S390_TODPR:
2716 		r = put_user(vcpu->arch.sie_block->todpr,
2717 			     (u32 __user *)reg->addr);
2718 		break;
2719 	case KVM_REG_S390_EPOCHDIFF:
2720 		r = put_user(vcpu->arch.sie_block->epoch,
2721 			     (u64 __user *)reg->addr);
2722 		break;
2723 	case KVM_REG_S390_CPU_TIMER:
2724 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2725 			     (u64 __user *)reg->addr);
2726 		break;
2727 	case KVM_REG_S390_CLOCK_COMP:
2728 		r = put_user(vcpu->arch.sie_block->ckc,
2729 			     (u64 __user *)reg->addr);
2730 		break;
2731 	case KVM_REG_S390_PFTOKEN:
2732 		r = put_user(vcpu->arch.pfault_token,
2733 			     (u64 __user *)reg->addr);
2734 		break;
2735 	case KVM_REG_S390_PFCOMPARE:
2736 		r = put_user(vcpu->arch.pfault_compare,
2737 			     (u64 __user *)reg->addr);
2738 		break;
2739 	case KVM_REG_S390_PFSELECT:
2740 		r = put_user(vcpu->arch.pfault_select,
2741 			     (u64 __user *)reg->addr);
2742 		break;
2743 	case KVM_REG_S390_PP:
2744 		r = put_user(vcpu->arch.sie_block->pp,
2745 			     (u64 __user *)reg->addr);
2746 		break;
2747 	case KVM_REG_S390_GBEA:
2748 		r = put_user(vcpu->arch.sie_block->gbea,
2749 			     (u64 __user *)reg->addr);
2750 		break;
2751 	default:
2752 		break;
2753 	}
2754 
2755 	return r;
2756 }
2757 
2758 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2759 					   struct kvm_one_reg *reg)
2760 {
2761 	int r = -EINVAL;
2762 	__u64 val;
2763 
2764 	switch (reg->id) {
2765 	case KVM_REG_S390_TODPR:
2766 		r = get_user(vcpu->arch.sie_block->todpr,
2767 			     (u32 __user *)reg->addr);
2768 		break;
2769 	case KVM_REG_S390_EPOCHDIFF:
2770 		r = get_user(vcpu->arch.sie_block->epoch,
2771 			     (u64 __user *)reg->addr);
2772 		break;
2773 	case KVM_REG_S390_CPU_TIMER:
2774 		r = get_user(val, (u64 __user *)reg->addr);
2775 		if (!r)
2776 			kvm_s390_set_cpu_timer(vcpu, val);
2777 		break;
2778 	case KVM_REG_S390_CLOCK_COMP:
2779 		r = get_user(vcpu->arch.sie_block->ckc,
2780 			     (u64 __user *)reg->addr);
2781 		break;
2782 	case KVM_REG_S390_PFTOKEN:
2783 		r = get_user(vcpu->arch.pfault_token,
2784 			     (u64 __user *)reg->addr);
2785 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2786 			kvm_clear_async_pf_completion_queue(vcpu);
2787 		break;
2788 	case KVM_REG_S390_PFCOMPARE:
2789 		r = get_user(vcpu->arch.pfault_compare,
2790 			     (u64 __user *)reg->addr);
2791 		break;
2792 	case KVM_REG_S390_PFSELECT:
2793 		r = get_user(vcpu->arch.pfault_select,
2794 			     (u64 __user *)reg->addr);
2795 		break;
2796 	case KVM_REG_S390_PP:
2797 		r = get_user(vcpu->arch.sie_block->pp,
2798 			     (u64 __user *)reg->addr);
2799 		break;
2800 	case KVM_REG_S390_GBEA:
2801 		r = get_user(vcpu->arch.sie_block->gbea,
2802 			     (u64 __user *)reg->addr);
2803 		break;
2804 	default:
2805 		break;
2806 	}
2807 
2808 	return r;
2809 }
2810 
2811 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2812 {
2813 	kvm_s390_vcpu_initial_reset(vcpu);
2814 	return 0;
2815 }
2816 
2817 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2818 {
2819 	vcpu_load(vcpu);
2820 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2821 	vcpu_put(vcpu);
2822 	return 0;
2823 }
2824 
2825 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2826 {
2827 	vcpu_load(vcpu);
2828 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2829 	vcpu_put(vcpu);
2830 	return 0;
2831 }
2832 
2833 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2834 				  struct kvm_sregs *sregs)
2835 {
2836 	vcpu_load(vcpu);
2837 
2838 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2839 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2840 
2841 	vcpu_put(vcpu);
2842 	return 0;
2843 }
2844 
2845 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2846 				  struct kvm_sregs *sregs)
2847 {
2848 	vcpu_load(vcpu);
2849 
2850 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2851 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2852 
2853 	vcpu_put(vcpu);
2854 	return 0;
2855 }
2856 
2857 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2858 {
2859 	int ret = 0;
2860 
2861 	vcpu_load(vcpu);
2862 
2863 	if (test_fp_ctl(fpu->fpc)) {
2864 		ret = -EINVAL;
2865 		goto out;
2866 	}
2867 	vcpu->run->s.regs.fpc = fpu->fpc;
2868 	if (MACHINE_HAS_VX)
2869 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2870 				 (freg_t *) fpu->fprs);
2871 	else
2872 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2873 
2874 out:
2875 	vcpu_put(vcpu);
2876 	return ret;
2877 }
2878 
2879 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2880 {
2881 	vcpu_load(vcpu);
2882 
2883 	/* make sure we have the latest values */
2884 	save_fpu_regs();
2885 	if (MACHINE_HAS_VX)
2886 		convert_vx_to_fp((freg_t *) fpu->fprs,
2887 				 (__vector128 *) vcpu->run->s.regs.vrs);
2888 	else
2889 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2890 	fpu->fpc = vcpu->run->s.regs.fpc;
2891 
2892 	vcpu_put(vcpu);
2893 	return 0;
2894 }
2895 
2896 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2897 {
2898 	int rc = 0;
2899 
2900 	if (!is_vcpu_stopped(vcpu))
2901 		rc = -EBUSY;
2902 	else {
2903 		vcpu->run->psw_mask = psw.mask;
2904 		vcpu->run->psw_addr = psw.addr;
2905 	}
2906 	return rc;
2907 }
2908 
2909 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2910 				  struct kvm_translation *tr)
2911 {
2912 	return -EINVAL; /* not implemented yet */
2913 }
2914 
2915 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2916 			      KVM_GUESTDBG_USE_HW_BP | \
2917 			      KVM_GUESTDBG_ENABLE)
2918 
2919 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2920 					struct kvm_guest_debug *dbg)
2921 {
2922 	int rc = 0;
2923 
2924 	vcpu_load(vcpu);
2925 
2926 	vcpu->guest_debug = 0;
2927 	kvm_s390_clear_bp_data(vcpu);
2928 
2929 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
2930 		rc = -EINVAL;
2931 		goto out;
2932 	}
2933 	if (!sclp.has_gpere) {
2934 		rc = -EINVAL;
2935 		goto out;
2936 	}
2937 
2938 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2939 		vcpu->guest_debug = dbg->control;
2940 		/* enforce guest PER */
2941 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
2942 
2943 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2944 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2945 	} else {
2946 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2947 		vcpu->arch.guestdbg.last_bp = 0;
2948 	}
2949 
2950 	if (rc) {
2951 		vcpu->guest_debug = 0;
2952 		kvm_s390_clear_bp_data(vcpu);
2953 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2954 	}
2955 
2956 out:
2957 	vcpu_put(vcpu);
2958 	return rc;
2959 }
2960 
2961 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2962 				    struct kvm_mp_state *mp_state)
2963 {
2964 	int ret;
2965 
2966 	vcpu_load(vcpu);
2967 
2968 	/* CHECK_STOP and LOAD are not supported yet */
2969 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2970 				      KVM_MP_STATE_OPERATING;
2971 
2972 	vcpu_put(vcpu);
2973 	return ret;
2974 }
2975 
2976 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2977 				    struct kvm_mp_state *mp_state)
2978 {
2979 	int rc = 0;
2980 
2981 	vcpu_load(vcpu);
2982 
2983 	/* user space knows about this interface - let it control the state */
2984 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2985 
2986 	switch (mp_state->mp_state) {
2987 	case KVM_MP_STATE_STOPPED:
2988 		kvm_s390_vcpu_stop(vcpu);
2989 		break;
2990 	case KVM_MP_STATE_OPERATING:
2991 		kvm_s390_vcpu_start(vcpu);
2992 		break;
2993 	case KVM_MP_STATE_LOAD:
2994 	case KVM_MP_STATE_CHECK_STOP:
2995 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2996 	default:
2997 		rc = -ENXIO;
2998 	}
2999 
3000 	vcpu_put(vcpu);
3001 	return rc;
3002 }
3003 
3004 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3005 {
3006 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3007 }
3008 
3009 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3010 {
3011 retry:
3012 	kvm_s390_vcpu_request_handled(vcpu);
3013 	if (!kvm_request_pending(vcpu))
3014 		return 0;
3015 	/*
3016 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3017 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3018 	 * This ensures that the ipte instruction for this request has
3019 	 * already finished. We might race against a second unmapper that
3020 	 * wants to set the blocking bit. Lets just retry the request loop.
3021 	 */
3022 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3023 		int rc;
3024 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3025 					  kvm_s390_get_prefix(vcpu),
3026 					  PAGE_SIZE * 2, PROT_WRITE);
3027 		if (rc) {
3028 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3029 			return rc;
3030 		}
3031 		goto retry;
3032 	}
3033 
3034 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3035 		vcpu->arch.sie_block->ihcpu = 0xffff;
3036 		goto retry;
3037 	}
3038 
3039 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3040 		if (!ibs_enabled(vcpu)) {
3041 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3042 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3043 		}
3044 		goto retry;
3045 	}
3046 
3047 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3048 		if (ibs_enabled(vcpu)) {
3049 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3050 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3051 		}
3052 		goto retry;
3053 	}
3054 
3055 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3056 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3057 		goto retry;
3058 	}
3059 
3060 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3061 		/*
3062 		 * Disable CMM virtualization; we will emulate the ESSA
3063 		 * instruction manually, in order to provide additional
3064 		 * functionalities needed for live migration.
3065 		 */
3066 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3067 		goto retry;
3068 	}
3069 
3070 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3071 		/*
3072 		 * Re-enable CMM virtualization if CMMA is available and
3073 		 * CMM has been used.
3074 		 */
3075 		if ((vcpu->kvm->arch.use_cmma) &&
3076 		    (vcpu->kvm->mm->context.uses_cmm))
3077 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3078 		goto retry;
3079 	}
3080 
3081 	/* nothing to do, just clear the request */
3082 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3083 
3084 	return 0;
3085 }
3086 
3087 void kvm_s390_set_tod_clock(struct kvm *kvm,
3088 			    const struct kvm_s390_vm_tod_clock *gtod)
3089 {
3090 	struct kvm_vcpu *vcpu;
3091 	struct kvm_s390_tod_clock_ext htod;
3092 	int i;
3093 
3094 	mutex_lock(&kvm->lock);
3095 	preempt_disable();
3096 
3097 	get_tod_clock_ext((char *)&htod);
3098 
3099 	kvm->arch.epoch = gtod->tod - htod.tod;
3100 	kvm->arch.epdx = 0;
3101 	if (test_kvm_facility(kvm, 139)) {
3102 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3103 		if (kvm->arch.epoch > gtod->tod)
3104 			kvm->arch.epdx -= 1;
3105 	}
3106 
3107 	kvm_s390_vcpu_block_all(kvm);
3108 	kvm_for_each_vcpu(i, vcpu, kvm) {
3109 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3110 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3111 	}
3112 
3113 	kvm_s390_vcpu_unblock_all(kvm);
3114 	preempt_enable();
3115 	mutex_unlock(&kvm->lock);
3116 }
3117 
3118 /**
3119  * kvm_arch_fault_in_page - fault-in guest page if necessary
3120  * @vcpu: The corresponding virtual cpu
3121  * @gpa: Guest physical address
3122  * @writable: Whether the page should be writable or not
3123  *
3124  * Make sure that a guest page has been faulted-in on the host.
3125  *
3126  * Return: Zero on success, negative error code otherwise.
3127  */
3128 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3129 {
3130 	return gmap_fault(vcpu->arch.gmap, gpa,
3131 			  writable ? FAULT_FLAG_WRITE : 0);
3132 }
3133 
3134 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3135 				      unsigned long token)
3136 {
3137 	struct kvm_s390_interrupt inti;
3138 	struct kvm_s390_irq irq;
3139 
3140 	if (start_token) {
3141 		irq.u.ext.ext_params2 = token;
3142 		irq.type = KVM_S390_INT_PFAULT_INIT;
3143 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3144 	} else {
3145 		inti.type = KVM_S390_INT_PFAULT_DONE;
3146 		inti.parm64 = token;
3147 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3148 	}
3149 }
3150 
3151 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3152 				     struct kvm_async_pf *work)
3153 {
3154 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3155 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3156 }
3157 
3158 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3159 				 struct kvm_async_pf *work)
3160 {
3161 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3162 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3163 }
3164 
3165 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3166 			       struct kvm_async_pf *work)
3167 {
3168 	/* s390 will always inject the page directly */
3169 }
3170 
3171 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3172 {
3173 	/*
3174 	 * s390 will always inject the page directly,
3175 	 * but we still want check_async_completion to cleanup
3176 	 */
3177 	return true;
3178 }
3179 
3180 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3181 {
3182 	hva_t hva;
3183 	struct kvm_arch_async_pf arch;
3184 	int rc;
3185 
3186 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3187 		return 0;
3188 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3189 	    vcpu->arch.pfault_compare)
3190 		return 0;
3191 	if (psw_extint_disabled(vcpu))
3192 		return 0;
3193 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3194 		return 0;
3195 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3196 		return 0;
3197 	if (!vcpu->arch.gmap->pfault_enabled)
3198 		return 0;
3199 
3200 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3201 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3202 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3203 		return 0;
3204 
3205 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3206 	return rc;
3207 }
3208 
3209 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3210 {
3211 	int rc, cpuflags;
3212 
3213 	/*
3214 	 * On s390 notifications for arriving pages will be delivered directly
3215 	 * to the guest but the house keeping for completed pfaults is
3216 	 * handled outside the worker.
3217 	 */
3218 	kvm_check_async_pf_completion(vcpu);
3219 
3220 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3221 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3222 
3223 	if (need_resched())
3224 		schedule();
3225 
3226 	if (test_cpu_flag(CIF_MCCK_PENDING))
3227 		s390_handle_mcck();
3228 
3229 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3230 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3231 		if (rc)
3232 			return rc;
3233 	}
3234 
3235 	rc = kvm_s390_handle_requests(vcpu);
3236 	if (rc)
3237 		return rc;
3238 
3239 	if (guestdbg_enabled(vcpu)) {
3240 		kvm_s390_backup_guest_per_regs(vcpu);
3241 		kvm_s390_patch_guest_per_regs(vcpu);
3242 	}
3243 
3244 	vcpu->arch.sie_block->icptcode = 0;
3245 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3246 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3247 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3248 
3249 	return 0;
3250 }
3251 
3252 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3253 {
3254 	struct kvm_s390_pgm_info pgm_info = {
3255 		.code = PGM_ADDRESSING,
3256 	};
3257 	u8 opcode, ilen;
3258 	int rc;
3259 
3260 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3261 	trace_kvm_s390_sie_fault(vcpu);
3262 
3263 	/*
3264 	 * We want to inject an addressing exception, which is defined as a
3265 	 * suppressing or terminating exception. However, since we came here
3266 	 * by a DAT access exception, the PSW still points to the faulting
3267 	 * instruction since DAT exceptions are nullifying. So we've got
3268 	 * to look up the current opcode to get the length of the instruction
3269 	 * to be able to forward the PSW.
3270 	 */
3271 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3272 	ilen = insn_length(opcode);
3273 	if (rc < 0) {
3274 		return rc;
3275 	} else if (rc) {
3276 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3277 		 * Forward by arbitrary ilc, injection will take care of
3278 		 * nullification if necessary.
3279 		 */
3280 		pgm_info = vcpu->arch.pgm;
3281 		ilen = 4;
3282 	}
3283 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3284 	kvm_s390_forward_psw(vcpu, ilen);
3285 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3286 }
3287 
3288 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3289 {
3290 	struct mcck_volatile_info *mcck_info;
3291 	struct sie_page *sie_page;
3292 
3293 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3294 		   vcpu->arch.sie_block->icptcode);
3295 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3296 
3297 	if (guestdbg_enabled(vcpu))
3298 		kvm_s390_restore_guest_per_regs(vcpu);
3299 
3300 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3301 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3302 
3303 	if (exit_reason == -EINTR) {
3304 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3305 		sie_page = container_of(vcpu->arch.sie_block,
3306 					struct sie_page, sie_block);
3307 		mcck_info = &sie_page->mcck_info;
3308 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3309 		return 0;
3310 	}
3311 
3312 	if (vcpu->arch.sie_block->icptcode > 0) {
3313 		int rc = kvm_handle_sie_intercept(vcpu);
3314 
3315 		if (rc != -EOPNOTSUPP)
3316 			return rc;
3317 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3318 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3319 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3320 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3321 		return -EREMOTE;
3322 	} else if (exit_reason != -EFAULT) {
3323 		vcpu->stat.exit_null++;
3324 		return 0;
3325 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3326 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3327 		vcpu->run->s390_ucontrol.trans_exc_code =
3328 						current->thread.gmap_addr;
3329 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3330 		return -EREMOTE;
3331 	} else if (current->thread.gmap_pfault) {
3332 		trace_kvm_s390_major_guest_pfault(vcpu);
3333 		current->thread.gmap_pfault = 0;
3334 		if (kvm_arch_setup_async_pf(vcpu))
3335 			return 0;
3336 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3337 	}
3338 	return vcpu_post_run_fault_in_sie(vcpu);
3339 }
3340 
3341 static int __vcpu_run(struct kvm_vcpu *vcpu)
3342 {
3343 	int rc, exit_reason;
3344 
3345 	/*
3346 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3347 	 * ning the guest), so that memslots (and other stuff) are protected
3348 	 */
3349 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3350 
3351 	do {
3352 		rc = vcpu_pre_run(vcpu);
3353 		if (rc)
3354 			break;
3355 
3356 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3357 		/*
3358 		 * As PF_VCPU will be used in fault handler, between
3359 		 * guest_enter and guest_exit should be no uaccess.
3360 		 */
3361 		local_irq_disable();
3362 		guest_enter_irqoff();
3363 		__disable_cpu_timer_accounting(vcpu);
3364 		local_irq_enable();
3365 		exit_reason = sie64a(vcpu->arch.sie_block,
3366 				     vcpu->run->s.regs.gprs);
3367 		local_irq_disable();
3368 		__enable_cpu_timer_accounting(vcpu);
3369 		guest_exit_irqoff();
3370 		local_irq_enable();
3371 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3372 
3373 		rc = vcpu_post_run(vcpu, exit_reason);
3374 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3375 
3376 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3377 	return rc;
3378 }
3379 
3380 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3381 {
3382 	struct runtime_instr_cb *riccb;
3383 	struct gs_cb *gscb;
3384 
3385 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3386 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3387 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3388 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3389 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3390 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3391 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3392 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3393 		/* some control register changes require a tlb flush */
3394 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3395 	}
3396 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3397 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3398 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3399 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3400 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3401 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3402 	}
3403 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3404 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3405 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3406 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3407 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3408 			kvm_clear_async_pf_completion_queue(vcpu);
3409 	}
3410 	/*
3411 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3412 	 * we should enable RI here instead of doing the lazy enablement.
3413 	 */
3414 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3415 	    test_kvm_facility(vcpu->kvm, 64) &&
3416 	    riccb->v &&
3417 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3418 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3419 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3420 	}
3421 	/*
3422 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3423 	 * we should enable GS here instead of doing the lazy enablement.
3424 	 */
3425 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3426 	    test_kvm_facility(vcpu->kvm, 133) &&
3427 	    gscb->gssm &&
3428 	    !vcpu->arch.gs_enabled) {
3429 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3430 		vcpu->arch.sie_block->ecb |= ECB_GS;
3431 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3432 		vcpu->arch.gs_enabled = 1;
3433 	}
3434 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3435 	    test_kvm_facility(vcpu->kvm, 82)) {
3436 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3437 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3438 	}
3439 	save_access_regs(vcpu->arch.host_acrs);
3440 	restore_access_regs(vcpu->run->s.regs.acrs);
3441 	/* save host (userspace) fprs/vrs */
3442 	save_fpu_regs();
3443 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3444 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3445 	if (MACHINE_HAS_VX)
3446 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3447 	else
3448 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3449 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3450 	if (test_fp_ctl(current->thread.fpu.fpc))
3451 		/* User space provided an invalid FPC, let's clear it */
3452 		current->thread.fpu.fpc = 0;
3453 	if (MACHINE_HAS_GS) {
3454 		preempt_disable();
3455 		__ctl_set_bit(2, 4);
3456 		if (current->thread.gs_cb) {
3457 			vcpu->arch.host_gscb = current->thread.gs_cb;
3458 			save_gs_cb(vcpu->arch.host_gscb);
3459 		}
3460 		if (vcpu->arch.gs_enabled) {
3461 			current->thread.gs_cb = (struct gs_cb *)
3462 						&vcpu->run->s.regs.gscb;
3463 			restore_gs_cb(current->thread.gs_cb);
3464 		}
3465 		preempt_enable();
3466 	}
3467 
3468 	kvm_run->kvm_dirty_regs = 0;
3469 }
3470 
3471 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3472 {
3473 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3474 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3475 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3476 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3477 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3478 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3479 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3480 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3481 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3482 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3483 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3484 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3485 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3486 	save_access_regs(vcpu->run->s.regs.acrs);
3487 	restore_access_regs(vcpu->arch.host_acrs);
3488 	/* Save guest register state */
3489 	save_fpu_regs();
3490 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3491 	/* Restore will be done lazily at return */
3492 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3493 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3494 	if (MACHINE_HAS_GS) {
3495 		__ctl_set_bit(2, 4);
3496 		if (vcpu->arch.gs_enabled)
3497 			save_gs_cb(current->thread.gs_cb);
3498 		preempt_disable();
3499 		current->thread.gs_cb = vcpu->arch.host_gscb;
3500 		restore_gs_cb(vcpu->arch.host_gscb);
3501 		preempt_enable();
3502 		if (!vcpu->arch.host_gscb)
3503 			__ctl_clear_bit(2, 4);
3504 		vcpu->arch.host_gscb = NULL;
3505 	}
3506 
3507 }
3508 
3509 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3510 {
3511 	int rc;
3512 
3513 	if (kvm_run->immediate_exit)
3514 		return -EINTR;
3515 
3516 	vcpu_load(vcpu);
3517 
3518 	if (guestdbg_exit_pending(vcpu)) {
3519 		kvm_s390_prepare_debug_exit(vcpu);
3520 		rc = 0;
3521 		goto out;
3522 	}
3523 
3524 	kvm_sigset_activate(vcpu);
3525 
3526 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3527 		kvm_s390_vcpu_start(vcpu);
3528 	} else if (is_vcpu_stopped(vcpu)) {
3529 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3530 				   vcpu->vcpu_id);
3531 		rc = -EINVAL;
3532 		goto out;
3533 	}
3534 
3535 	sync_regs(vcpu, kvm_run);
3536 	enable_cpu_timer_accounting(vcpu);
3537 
3538 	might_fault();
3539 	rc = __vcpu_run(vcpu);
3540 
3541 	if (signal_pending(current) && !rc) {
3542 		kvm_run->exit_reason = KVM_EXIT_INTR;
3543 		rc = -EINTR;
3544 	}
3545 
3546 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3547 		kvm_s390_prepare_debug_exit(vcpu);
3548 		rc = 0;
3549 	}
3550 
3551 	if (rc == -EREMOTE) {
3552 		/* userspace support is needed, kvm_run has been prepared */
3553 		rc = 0;
3554 	}
3555 
3556 	disable_cpu_timer_accounting(vcpu);
3557 	store_regs(vcpu, kvm_run);
3558 
3559 	kvm_sigset_deactivate(vcpu);
3560 
3561 	vcpu->stat.exit_userspace++;
3562 out:
3563 	vcpu_put(vcpu);
3564 	return rc;
3565 }
3566 
3567 /*
3568  * store status at address
3569  * we use have two special cases:
3570  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3571  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3572  */
3573 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3574 {
3575 	unsigned char archmode = 1;
3576 	freg_t fprs[NUM_FPRS];
3577 	unsigned int px;
3578 	u64 clkcomp, cputm;
3579 	int rc;
3580 
3581 	px = kvm_s390_get_prefix(vcpu);
3582 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3583 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3584 			return -EFAULT;
3585 		gpa = 0;
3586 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3587 		if (write_guest_real(vcpu, 163, &archmode, 1))
3588 			return -EFAULT;
3589 		gpa = px;
3590 	} else
3591 		gpa -= __LC_FPREGS_SAVE_AREA;
3592 
3593 	/* manually convert vector registers if necessary */
3594 	if (MACHINE_HAS_VX) {
3595 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3596 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3597 				     fprs, 128);
3598 	} else {
3599 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3600 				     vcpu->run->s.regs.fprs, 128);
3601 	}
3602 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3603 			      vcpu->run->s.regs.gprs, 128);
3604 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3605 			      &vcpu->arch.sie_block->gpsw, 16);
3606 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3607 			      &px, 4);
3608 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3609 			      &vcpu->run->s.regs.fpc, 4);
3610 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3611 			      &vcpu->arch.sie_block->todpr, 4);
3612 	cputm = kvm_s390_get_cpu_timer(vcpu);
3613 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3614 			      &cputm, 8);
3615 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3616 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3617 			      &clkcomp, 8);
3618 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3619 			      &vcpu->run->s.regs.acrs, 64);
3620 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3621 			      &vcpu->arch.sie_block->gcr, 128);
3622 	return rc ? -EFAULT : 0;
3623 }
3624 
3625 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3626 {
3627 	/*
3628 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3629 	 * switch in the run ioctl. Let's update our copies before we save
3630 	 * it into the save area
3631 	 */
3632 	save_fpu_regs();
3633 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3634 	save_access_regs(vcpu->run->s.regs.acrs);
3635 
3636 	return kvm_s390_store_status_unloaded(vcpu, addr);
3637 }
3638 
3639 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3640 {
3641 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3642 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3643 }
3644 
3645 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3646 {
3647 	unsigned int i;
3648 	struct kvm_vcpu *vcpu;
3649 
3650 	kvm_for_each_vcpu(i, vcpu, kvm) {
3651 		__disable_ibs_on_vcpu(vcpu);
3652 	}
3653 }
3654 
3655 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3656 {
3657 	if (!sclp.has_ibs)
3658 		return;
3659 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3660 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3661 }
3662 
3663 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3664 {
3665 	int i, online_vcpus, started_vcpus = 0;
3666 
3667 	if (!is_vcpu_stopped(vcpu))
3668 		return;
3669 
3670 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3671 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3672 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3673 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3674 
3675 	for (i = 0; i < online_vcpus; i++) {
3676 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3677 			started_vcpus++;
3678 	}
3679 
3680 	if (started_vcpus == 0) {
3681 		/* we're the only active VCPU -> speed it up */
3682 		__enable_ibs_on_vcpu(vcpu);
3683 	} else if (started_vcpus == 1) {
3684 		/*
3685 		 * As we are starting a second VCPU, we have to disable
3686 		 * the IBS facility on all VCPUs to remove potentially
3687 		 * oustanding ENABLE requests.
3688 		 */
3689 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3690 	}
3691 
3692 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3693 	/*
3694 	 * Another VCPU might have used IBS while we were offline.
3695 	 * Let's play safe and flush the VCPU at startup.
3696 	 */
3697 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3698 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3699 	return;
3700 }
3701 
3702 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3703 {
3704 	int i, online_vcpus, started_vcpus = 0;
3705 	struct kvm_vcpu *started_vcpu = NULL;
3706 
3707 	if (is_vcpu_stopped(vcpu))
3708 		return;
3709 
3710 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3711 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3712 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3713 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3714 
3715 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3716 	kvm_s390_clear_stop_irq(vcpu);
3717 
3718 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3719 	__disable_ibs_on_vcpu(vcpu);
3720 
3721 	for (i = 0; i < online_vcpus; i++) {
3722 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3723 			started_vcpus++;
3724 			started_vcpu = vcpu->kvm->vcpus[i];
3725 		}
3726 	}
3727 
3728 	if (started_vcpus == 1) {
3729 		/*
3730 		 * As we only have one VCPU left, we want to enable the
3731 		 * IBS facility for that VCPU to speed it up.
3732 		 */
3733 		__enable_ibs_on_vcpu(started_vcpu);
3734 	}
3735 
3736 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3737 	return;
3738 }
3739 
3740 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3741 				     struct kvm_enable_cap *cap)
3742 {
3743 	int r;
3744 
3745 	if (cap->flags)
3746 		return -EINVAL;
3747 
3748 	switch (cap->cap) {
3749 	case KVM_CAP_S390_CSS_SUPPORT:
3750 		if (!vcpu->kvm->arch.css_support) {
3751 			vcpu->kvm->arch.css_support = 1;
3752 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3753 			trace_kvm_s390_enable_css(vcpu->kvm);
3754 		}
3755 		r = 0;
3756 		break;
3757 	default:
3758 		r = -EINVAL;
3759 		break;
3760 	}
3761 	return r;
3762 }
3763 
3764 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3765 				  struct kvm_s390_mem_op *mop)
3766 {
3767 	void __user *uaddr = (void __user *)mop->buf;
3768 	void *tmpbuf = NULL;
3769 	int r, srcu_idx;
3770 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3771 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3772 
3773 	if (mop->flags & ~supported_flags)
3774 		return -EINVAL;
3775 
3776 	if (mop->size > MEM_OP_MAX_SIZE)
3777 		return -E2BIG;
3778 
3779 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3780 		tmpbuf = vmalloc(mop->size);
3781 		if (!tmpbuf)
3782 			return -ENOMEM;
3783 	}
3784 
3785 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3786 
3787 	switch (mop->op) {
3788 	case KVM_S390_MEMOP_LOGICAL_READ:
3789 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3790 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3791 					    mop->size, GACC_FETCH);
3792 			break;
3793 		}
3794 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3795 		if (r == 0) {
3796 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3797 				r = -EFAULT;
3798 		}
3799 		break;
3800 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3801 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3802 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3803 					    mop->size, GACC_STORE);
3804 			break;
3805 		}
3806 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3807 			r = -EFAULT;
3808 			break;
3809 		}
3810 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3811 		break;
3812 	default:
3813 		r = -EINVAL;
3814 	}
3815 
3816 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3817 
3818 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3819 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3820 
3821 	vfree(tmpbuf);
3822 	return r;
3823 }
3824 
3825 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3826 			       unsigned int ioctl, unsigned long arg)
3827 {
3828 	struct kvm_vcpu *vcpu = filp->private_data;
3829 	void __user *argp = (void __user *)arg;
3830 
3831 	switch (ioctl) {
3832 	case KVM_S390_IRQ: {
3833 		struct kvm_s390_irq s390irq;
3834 
3835 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3836 			return -EFAULT;
3837 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
3838 	}
3839 	case KVM_S390_INTERRUPT: {
3840 		struct kvm_s390_interrupt s390int;
3841 		struct kvm_s390_irq s390irq;
3842 
3843 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3844 			return -EFAULT;
3845 		if (s390int_to_s390irq(&s390int, &s390irq))
3846 			return -EINVAL;
3847 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
3848 	}
3849 	}
3850 	return -ENOIOCTLCMD;
3851 }
3852 
3853 long kvm_arch_vcpu_ioctl(struct file *filp,
3854 			 unsigned int ioctl, unsigned long arg)
3855 {
3856 	struct kvm_vcpu *vcpu = filp->private_data;
3857 	void __user *argp = (void __user *)arg;
3858 	int idx;
3859 	long r;
3860 
3861 	vcpu_load(vcpu);
3862 
3863 	switch (ioctl) {
3864 	case KVM_S390_STORE_STATUS:
3865 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3866 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3867 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3868 		break;
3869 	case KVM_S390_SET_INITIAL_PSW: {
3870 		psw_t psw;
3871 
3872 		r = -EFAULT;
3873 		if (copy_from_user(&psw, argp, sizeof(psw)))
3874 			break;
3875 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3876 		break;
3877 	}
3878 	case KVM_S390_INITIAL_RESET:
3879 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3880 		break;
3881 	case KVM_SET_ONE_REG:
3882 	case KVM_GET_ONE_REG: {
3883 		struct kvm_one_reg reg;
3884 		r = -EFAULT;
3885 		if (copy_from_user(&reg, argp, sizeof(reg)))
3886 			break;
3887 		if (ioctl == KVM_SET_ONE_REG)
3888 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3889 		else
3890 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3891 		break;
3892 	}
3893 #ifdef CONFIG_KVM_S390_UCONTROL
3894 	case KVM_S390_UCAS_MAP: {
3895 		struct kvm_s390_ucas_mapping ucasmap;
3896 
3897 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3898 			r = -EFAULT;
3899 			break;
3900 		}
3901 
3902 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3903 			r = -EINVAL;
3904 			break;
3905 		}
3906 
3907 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3908 				     ucasmap.vcpu_addr, ucasmap.length);
3909 		break;
3910 	}
3911 	case KVM_S390_UCAS_UNMAP: {
3912 		struct kvm_s390_ucas_mapping ucasmap;
3913 
3914 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3915 			r = -EFAULT;
3916 			break;
3917 		}
3918 
3919 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3920 			r = -EINVAL;
3921 			break;
3922 		}
3923 
3924 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3925 			ucasmap.length);
3926 		break;
3927 	}
3928 #endif
3929 	case KVM_S390_VCPU_FAULT: {
3930 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3931 		break;
3932 	}
3933 	case KVM_ENABLE_CAP:
3934 	{
3935 		struct kvm_enable_cap cap;
3936 		r = -EFAULT;
3937 		if (copy_from_user(&cap, argp, sizeof(cap)))
3938 			break;
3939 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3940 		break;
3941 	}
3942 	case KVM_S390_MEM_OP: {
3943 		struct kvm_s390_mem_op mem_op;
3944 
3945 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3946 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3947 		else
3948 			r = -EFAULT;
3949 		break;
3950 	}
3951 	case KVM_S390_SET_IRQ_STATE: {
3952 		struct kvm_s390_irq_state irq_state;
3953 
3954 		r = -EFAULT;
3955 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3956 			break;
3957 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3958 		    irq_state.len == 0 ||
3959 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3960 			r = -EINVAL;
3961 			break;
3962 		}
3963 		/* do not use irq_state.flags, it will break old QEMUs */
3964 		r = kvm_s390_set_irq_state(vcpu,
3965 					   (void __user *) irq_state.buf,
3966 					   irq_state.len);
3967 		break;
3968 	}
3969 	case KVM_S390_GET_IRQ_STATE: {
3970 		struct kvm_s390_irq_state irq_state;
3971 
3972 		r = -EFAULT;
3973 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3974 			break;
3975 		if (irq_state.len == 0) {
3976 			r = -EINVAL;
3977 			break;
3978 		}
3979 		/* do not use irq_state.flags, it will break old QEMUs */
3980 		r = kvm_s390_get_irq_state(vcpu,
3981 					   (__u8 __user *)  irq_state.buf,
3982 					   irq_state.len);
3983 		break;
3984 	}
3985 	default:
3986 		r = -ENOTTY;
3987 	}
3988 
3989 	vcpu_put(vcpu);
3990 	return r;
3991 }
3992 
3993 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3994 {
3995 #ifdef CONFIG_KVM_S390_UCONTROL
3996 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3997 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3998 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3999 		get_page(vmf->page);
4000 		return 0;
4001 	}
4002 #endif
4003 	return VM_FAULT_SIGBUS;
4004 }
4005 
4006 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4007 			    unsigned long npages)
4008 {
4009 	return 0;
4010 }
4011 
4012 /* Section: memory related */
4013 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4014 				   struct kvm_memory_slot *memslot,
4015 				   const struct kvm_userspace_memory_region *mem,
4016 				   enum kvm_mr_change change)
4017 {
4018 	/* A few sanity checks. We can have memory slots which have to be
4019 	   located/ended at a segment boundary (1MB). The memory in userland is
4020 	   ok to be fragmented into various different vmas. It is okay to mmap()
4021 	   and munmap() stuff in this slot after doing this call at any time */
4022 
4023 	if (mem->userspace_addr & 0xffffful)
4024 		return -EINVAL;
4025 
4026 	if (mem->memory_size & 0xffffful)
4027 		return -EINVAL;
4028 
4029 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4030 		return -EINVAL;
4031 
4032 	return 0;
4033 }
4034 
4035 void kvm_arch_commit_memory_region(struct kvm *kvm,
4036 				const struct kvm_userspace_memory_region *mem,
4037 				const struct kvm_memory_slot *old,
4038 				const struct kvm_memory_slot *new,
4039 				enum kvm_mr_change change)
4040 {
4041 	int rc;
4042 
4043 	/* If the basics of the memslot do not change, we do not want
4044 	 * to update the gmap. Every update causes several unnecessary
4045 	 * segment translation exceptions. This is usually handled just
4046 	 * fine by the normal fault handler + gmap, but it will also
4047 	 * cause faults on the prefix page of running guest CPUs.
4048 	 */
4049 	if (old->userspace_addr == mem->userspace_addr &&
4050 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4051 	    old->npages * PAGE_SIZE == mem->memory_size)
4052 		return;
4053 
4054 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4055 		mem->guest_phys_addr, mem->memory_size);
4056 	if (rc)
4057 		pr_warn("failed to commit memory region\n");
4058 	return;
4059 }
4060 
4061 static inline unsigned long nonhyp_mask(int i)
4062 {
4063 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4064 
4065 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4066 }
4067 
4068 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4069 {
4070 	vcpu->valid_wakeup = false;
4071 }
4072 
4073 static int __init kvm_s390_init(void)
4074 {
4075 	int i;
4076 
4077 	if (!sclp.has_sief2) {
4078 		pr_info("SIE not available\n");
4079 		return -ENODEV;
4080 	}
4081 
4082 	for (i = 0; i < 16; i++)
4083 		kvm_s390_fac_base[i] |=
4084 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4085 
4086 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4087 }
4088 
4089 static void __exit kvm_s390_exit(void)
4090 {
4091 	kvm_exit();
4092 }
4093 
4094 module_init(kvm_s390_init);
4095 module_exit(kvm_s390_exit);
4096 
4097 /*
4098  * Enable autoloading of the kvm module.
4099  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4100  * since x86 takes a different approach.
4101  */
4102 #include <linux/miscdevice.h>
4103 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4104 MODULE_ALIAS("devname:kvm");
4105