xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 9fb29c73)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include "kvm-s390.h"
48 #include "gaccess.h"
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53 
54 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
58 
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61 
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
64 	{ "exit_null", VCPU_STAT(exit_null) },
65 	{ "exit_validity", VCPU_STAT(exit_validity) },
66 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
67 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
68 	{ "exit_io_request", VCPU_STAT(exit_io_request) },
69 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
71 	{ "exit_pei", VCPU_STAT(exit_pei) },
72 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
79 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
81 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
82 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
83 	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
84 	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
85 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
87 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88 	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
89 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92 	{ "deliver_program", VCPU_STAT(deliver_program) },
93 	{ "deliver_io", VCPU_STAT(deliver_io) },
94 	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
95 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
96 	{ "inject_ckc", VCPU_STAT(inject_ckc) },
97 	{ "inject_cputm", VCPU_STAT(inject_cputm) },
98 	{ "inject_external_call", VCPU_STAT(inject_external_call) },
99 	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
100 	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
101 	{ "inject_io", VM_STAT(inject_io) },
102 	{ "inject_mchk", VCPU_STAT(inject_mchk) },
103 	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
104 	{ "inject_program", VCPU_STAT(inject_program) },
105 	{ "inject_restart", VCPU_STAT(inject_restart) },
106 	{ "inject_service_signal", VM_STAT(inject_service_signal) },
107 	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
108 	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
109 	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
110 	{ "inject_virtio", VM_STAT(inject_virtio) },
111 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
112 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
113 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
114 	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
115 	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
116 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
117 	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
118 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
119 	{ "instruction_sck", VCPU_STAT(instruction_sck) },
120 	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
121 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
122 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
123 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
124 	{ "instruction_iske", VCPU_STAT(instruction_iske) },
125 	{ "instruction_ri", VCPU_STAT(instruction_ri) },
126 	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
127 	{ "instruction_sske", VCPU_STAT(instruction_sske) },
128 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
129 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
130 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
131 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
132 	{ "instruction_tb", VCPU_STAT(instruction_tb) },
133 	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
134 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
135 	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
136 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
137 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
138 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
139 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
140 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
141 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
142 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
143 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
144 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
145 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
146 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
147 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
148 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
149 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
150 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
151 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
152 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
153 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
154 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
155 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
156 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
157 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
158 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
159 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
160 	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
161 	{ NULL }
162 };
163 
164 struct kvm_s390_tod_clock_ext {
165 	__u8 epoch_idx;
166 	__u64 tod;
167 	__u8 reserved[7];
168 } __packed;
169 
170 /* allow nested virtualization in KVM (if enabled by user space) */
171 static int nested;
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
174 
175 /* allow 1m huge page guest backing, if !nested */
176 static int hpage;
177 module_param(hpage, int, 0444);
178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
179 
180 /*
181  * For now we handle at most 16 double words as this is what the s390 base
182  * kernel handles and stores in the prefix page. If we ever need to go beyond
183  * this, this requires changes to code, but the external uapi can stay.
184  */
185 #define SIZE_INTERNAL 16
186 
187 /*
188  * Base feature mask that defines default mask for facilities. Consists of the
189  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
190  */
191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
192 /*
193  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
194  * and defines the facilities that can be enabled via a cpu model.
195  */
196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
197 
198 static unsigned long kvm_s390_fac_size(void)
199 {
200 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
201 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
202 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
203 		sizeof(S390_lowcore.stfle_fac_list));
204 
205 	return SIZE_INTERNAL;
206 }
207 
208 /* available cpu features supported by kvm */
209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
210 /* available subfunctions indicated via query / "test bit" */
211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
212 
213 static struct gmap_notifier gmap_notifier;
214 static struct gmap_notifier vsie_gmap_notifier;
215 debug_info_t *kvm_s390_dbf;
216 
217 /* Section: not file related */
218 int kvm_arch_hardware_enable(void)
219 {
220 	/* every s390 is virtualization enabled ;-) */
221 	return 0;
222 }
223 
224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
225 			      unsigned long end);
226 
227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
228 {
229 	u8 delta_idx = 0;
230 
231 	/*
232 	 * The TOD jumps by delta, we have to compensate this by adding
233 	 * -delta to the epoch.
234 	 */
235 	delta = -delta;
236 
237 	/* sign-extension - we're adding to signed values below */
238 	if ((s64)delta < 0)
239 		delta_idx = -1;
240 
241 	scb->epoch += delta;
242 	if (scb->ecd & ECD_MEF) {
243 		scb->epdx += delta_idx;
244 		if (scb->epoch < delta)
245 			scb->epdx += 1;
246 	}
247 }
248 
249 /*
250  * This callback is executed during stop_machine(). All CPUs are therefore
251  * temporarily stopped. In order not to change guest behavior, we have to
252  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
253  * so a CPU won't be stopped while calculating with the epoch.
254  */
255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
256 			  void *v)
257 {
258 	struct kvm *kvm;
259 	struct kvm_vcpu *vcpu;
260 	int i;
261 	unsigned long long *delta = v;
262 
263 	list_for_each_entry(kvm, &vm_list, vm_list) {
264 		kvm_for_each_vcpu(i, vcpu, kvm) {
265 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
266 			if (i == 0) {
267 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
268 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
269 			}
270 			if (vcpu->arch.cputm_enabled)
271 				vcpu->arch.cputm_start += *delta;
272 			if (vcpu->arch.vsie_block)
273 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
274 						   *delta);
275 		}
276 	}
277 	return NOTIFY_OK;
278 }
279 
280 static struct notifier_block kvm_clock_notifier = {
281 	.notifier_call = kvm_clock_sync,
282 };
283 
284 int kvm_arch_hardware_setup(void)
285 {
286 	gmap_notifier.notifier_call = kvm_gmap_notifier;
287 	gmap_register_pte_notifier(&gmap_notifier);
288 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
289 	gmap_register_pte_notifier(&vsie_gmap_notifier);
290 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
291 				       &kvm_clock_notifier);
292 	return 0;
293 }
294 
295 void kvm_arch_hardware_unsetup(void)
296 {
297 	gmap_unregister_pte_notifier(&gmap_notifier);
298 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
299 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
300 					 &kvm_clock_notifier);
301 }
302 
303 static void allow_cpu_feat(unsigned long nr)
304 {
305 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
306 }
307 
308 static inline int plo_test_bit(unsigned char nr)
309 {
310 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
311 	int cc;
312 
313 	asm volatile(
314 		/* Parameter registers are ignored for "test bit" */
315 		"	plo	0,0,0,0(0)\n"
316 		"	ipm	%0\n"
317 		"	srl	%0,28\n"
318 		: "=d" (cc)
319 		: "d" (r0)
320 		: "cc");
321 	return cc == 0;
322 }
323 
324 static void kvm_s390_cpu_feat_init(void)
325 {
326 	int i;
327 
328 	for (i = 0; i < 256; ++i) {
329 		if (plo_test_bit(i))
330 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
331 	}
332 
333 	if (test_facility(28)) /* TOD-clock steering */
334 		ptff(kvm_s390_available_subfunc.ptff,
335 		     sizeof(kvm_s390_available_subfunc.ptff),
336 		     PTFF_QAF);
337 
338 	if (test_facility(17)) { /* MSA */
339 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
340 			      kvm_s390_available_subfunc.kmac);
341 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
342 			      kvm_s390_available_subfunc.kmc);
343 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
344 			      kvm_s390_available_subfunc.km);
345 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
346 			      kvm_s390_available_subfunc.kimd);
347 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
348 			      kvm_s390_available_subfunc.klmd);
349 	}
350 	if (test_facility(76)) /* MSA3 */
351 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
352 			      kvm_s390_available_subfunc.pckmo);
353 	if (test_facility(77)) { /* MSA4 */
354 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
355 			      kvm_s390_available_subfunc.kmctr);
356 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
357 			      kvm_s390_available_subfunc.kmf);
358 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
359 			      kvm_s390_available_subfunc.kmo);
360 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
361 			      kvm_s390_available_subfunc.pcc);
362 	}
363 	if (test_facility(57)) /* MSA5 */
364 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
365 			      kvm_s390_available_subfunc.ppno);
366 
367 	if (test_facility(146)) /* MSA8 */
368 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
369 			      kvm_s390_available_subfunc.kma);
370 
371 	if (MACHINE_HAS_ESOP)
372 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
373 	/*
374 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
375 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
376 	 */
377 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
378 	    !test_facility(3) || !nested)
379 		return;
380 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
381 	if (sclp.has_64bscao)
382 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
383 	if (sclp.has_siif)
384 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
385 	if (sclp.has_gpere)
386 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
387 	if (sclp.has_gsls)
388 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
389 	if (sclp.has_ib)
390 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
391 	if (sclp.has_cei)
392 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
393 	if (sclp.has_ibs)
394 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
395 	if (sclp.has_kss)
396 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
397 	/*
398 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
399 	 * all skey handling functions read/set the skey from the PGSTE
400 	 * instead of the real storage key.
401 	 *
402 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
403 	 * pages being detected as preserved although they are resident.
404 	 *
405 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
406 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
407 	 *
408 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
409 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
410 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
411 	 *
412 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
413 	 * cannot easily shadow the SCA because of the ipte lock.
414 	 */
415 }
416 
417 int kvm_arch_init(void *opaque)
418 {
419 	int rc;
420 
421 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
422 	if (!kvm_s390_dbf)
423 		return -ENOMEM;
424 
425 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
426 		rc = -ENOMEM;
427 		goto out_debug_unreg;
428 	}
429 
430 	kvm_s390_cpu_feat_init();
431 
432 	/* Register floating interrupt controller interface. */
433 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
434 	if (rc) {
435 		pr_err("Failed to register FLIC rc=%d\n", rc);
436 		goto out_debug_unreg;
437 	}
438 	return 0;
439 
440 out_debug_unreg:
441 	debug_unregister(kvm_s390_dbf);
442 	return rc;
443 }
444 
445 void kvm_arch_exit(void)
446 {
447 	debug_unregister(kvm_s390_dbf);
448 }
449 
450 /* Section: device related */
451 long kvm_arch_dev_ioctl(struct file *filp,
452 			unsigned int ioctl, unsigned long arg)
453 {
454 	if (ioctl == KVM_S390_ENABLE_SIE)
455 		return s390_enable_sie();
456 	return -EINVAL;
457 }
458 
459 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
460 {
461 	int r;
462 
463 	switch (ext) {
464 	case KVM_CAP_S390_PSW:
465 	case KVM_CAP_S390_GMAP:
466 	case KVM_CAP_SYNC_MMU:
467 #ifdef CONFIG_KVM_S390_UCONTROL
468 	case KVM_CAP_S390_UCONTROL:
469 #endif
470 	case KVM_CAP_ASYNC_PF:
471 	case KVM_CAP_SYNC_REGS:
472 	case KVM_CAP_ONE_REG:
473 	case KVM_CAP_ENABLE_CAP:
474 	case KVM_CAP_S390_CSS_SUPPORT:
475 	case KVM_CAP_IOEVENTFD:
476 	case KVM_CAP_DEVICE_CTRL:
477 	case KVM_CAP_S390_IRQCHIP:
478 	case KVM_CAP_VM_ATTRIBUTES:
479 	case KVM_CAP_MP_STATE:
480 	case KVM_CAP_IMMEDIATE_EXIT:
481 	case KVM_CAP_S390_INJECT_IRQ:
482 	case KVM_CAP_S390_USER_SIGP:
483 	case KVM_CAP_S390_USER_STSI:
484 	case KVM_CAP_S390_SKEYS:
485 	case KVM_CAP_S390_IRQ_STATE:
486 	case KVM_CAP_S390_USER_INSTR0:
487 	case KVM_CAP_S390_CMMA_MIGRATION:
488 	case KVM_CAP_S390_AIS:
489 	case KVM_CAP_S390_AIS_MIGRATION:
490 		r = 1;
491 		break;
492 	case KVM_CAP_S390_HPAGE_1M:
493 		r = 0;
494 		if (hpage && !kvm_is_ucontrol(kvm))
495 			r = 1;
496 		break;
497 	case KVM_CAP_S390_MEM_OP:
498 		r = MEM_OP_MAX_SIZE;
499 		break;
500 	case KVM_CAP_NR_VCPUS:
501 	case KVM_CAP_MAX_VCPUS:
502 		r = KVM_S390_BSCA_CPU_SLOTS;
503 		if (!kvm_s390_use_sca_entries())
504 			r = KVM_MAX_VCPUS;
505 		else if (sclp.has_esca && sclp.has_64bscao)
506 			r = KVM_S390_ESCA_CPU_SLOTS;
507 		break;
508 	case KVM_CAP_NR_MEMSLOTS:
509 		r = KVM_USER_MEM_SLOTS;
510 		break;
511 	case KVM_CAP_S390_COW:
512 		r = MACHINE_HAS_ESOP;
513 		break;
514 	case KVM_CAP_S390_VECTOR_REGISTERS:
515 		r = MACHINE_HAS_VX;
516 		break;
517 	case KVM_CAP_S390_RI:
518 		r = test_facility(64);
519 		break;
520 	case KVM_CAP_S390_GS:
521 		r = test_facility(133);
522 		break;
523 	case KVM_CAP_S390_BPB:
524 		r = test_facility(82);
525 		break;
526 	default:
527 		r = 0;
528 	}
529 	return r;
530 }
531 
532 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
533 				    struct kvm_memory_slot *memslot)
534 {
535 	int i;
536 	gfn_t cur_gfn, last_gfn;
537 	unsigned long gaddr, vmaddr;
538 	struct gmap *gmap = kvm->arch.gmap;
539 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
540 
541 	/* Loop over all guest segments */
542 	cur_gfn = memslot->base_gfn;
543 	last_gfn = memslot->base_gfn + memslot->npages;
544 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
545 		gaddr = gfn_to_gpa(cur_gfn);
546 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
547 		if (kvm_is_error_hva(vmaddr))
548 			continue;
549 
550 		bitmap_zero(bitmap, _PAGE_ENTRIES);
551 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
552 		for (i = 0; i < _PAGE_ENTRIES; i++) {
553 			if (test_bit(i, bitmap))
554 				mark_page_dirty(kvm, cur_gfn + i);
555 		}
556 
557 		if (fatal_signal_pending(current))
558 			return;
559 		cond_resched();
560 	}
561 }
562 
563 /* Section: vm related */
564 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
565 
566 /*
567  * Get (and clear) the dirty memory log for a memory slot.
568  */
569 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
570 			       struct kvm_dirty_log *log)
571 {
572 	int r;
573 	unsigned long n;
574 	struct kvm_memslots *slots;
575 	struct kvm_memory_slot *memslot;
576 	int is_dirty = 0;
577 
578 	if (kvm_is_ucontrol(kvm))
579 		return -EINVAL;
580 
581 	mutex_lock(&kvm->slots_lock);
582 
583 	r = -EINVAL;
584 	if (log->slot >= KVM_USER_MEM_SLOTS)
585 		goto out;
586 
587 	slots = kvm_memslots(kvm);
588 	memslot = id_to_memslot(slots, log->slot);
589 	r = -ENOENT;
590 	if (!memslot->dirty_bitmap)
591 		goto out;
592 
593 	kvm_s390_sync_dirty_log(kvm, memslot);
594 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
595 	if (r)
596 		goto out;
597 
598 	/* Clear the dirty log */
599 	if (is_dirty) {
600 		n = kvm_dirty_bitmap_bytes(memslot);
601 		memset(memslot->dirty_bitmap, 0, n);
602 	}
603 	r = 0;
604 out:
605 	mutex_unlock(&kvm->slots_lock);
606 	return r;
607 }
608 
609 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
610 {
611 	unsigned int i;
612 	struct kvm_vcpu *vcpu;
613 
614 	kvm_for_each_vcpu(i, vcpu, kvm) {
615 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
616 	}
617 }
618 
619 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
620 {
621 	int r;
622 
623 	if (cap->flags)
624 		return -EINVAL;
625 
626 	switch (cap->cap) {
627 	case KVM_CAP_S390_IRQCHIP:
628 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
629 		kvm->arch.use_irqchip = 1;
630 		r = 0;
631 		break;
632 	case KVM_CAP_S390_USER_SIGP:
633 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
634 		kvm->arch.user_sigp = 1;
635 		r = 0;
636 		break;
637 	case KVM_CAP_S390_VECTOR_REGISTERS:
638 		mutex_lock(&kvm->lock);
639 		if (kvm->created_vcpus) {
640 			r = -EBUSY;
641 		} else if (MACHINE_HAS_VX) {
642 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
643 			set_kvm_facility(kvm->arch.model.fac_list, 129);
644 			if (test_facility(134)) {
645 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
646 				set_kvm_facility(kvm->arch.model.fac_list, 134);
647 			}
648 			if (test_facility(135)) {
649 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
650 				set_kvm_facility(kvm->arch.model.fac_list, 135);
651 			}
652 			r = 0;
653 		} else
654 			r = -EINVAL;
655 		mutex_unlock(&kvm->lock);
656 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
657 			 r ? "(not available)" : "(success)");
658 		break;
659 	case KVM_CAP_S390_RI:
660 		r = -EINVAL;
661 		mutex_lock(&kvm->lock);
662 		if (kvm->created_vcpus) {
663 			r = -EBUSY;
664 		} else if (test_facility(64)) {
665 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
666 			set_kvm_facility(kvm->arch.model.fac_list, 64);
667 			r = 0;
668 		}
669 		mutex_unlock(&kvm->lock);
670 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
671 			 r ? "(not available)" : "(success)");
672 		break;
673 	case KVM_CAP_S390_AIS:
674 		mutex_lock(&kvm->lock);
675 		if (kvm->created_vcpus) {
676 			r = -EBUSY;
677 		} else {
678 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
679 			set_kvm_facility(kvm->arch.model.fac_list, 72);
680 			r = 0;
681 		}
682 		mutex_unlock(&kvm->lock);
683 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
684 			 r ? "(not available)" : "(success)");
685 		break;
686 	case KVM_CAP_S390_GS:
687 		r = -EINVAL;
688 		mutex_lock(&kvm->lock);
689 		if (kvm->created_vcpus) {
690 			r = -EBUSY;
691 		} else if (test_facility(133)) {
692 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
693 			set_kvm_facility(kvm->arch.model.fac_list, 133);
694 			r = 0;
695 		}
696 		mutex_unlock(&kvm->lock);
697 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
698 			 r ? "(not available)" : "(success)");
699 		break;
700 	case KVM_CAP_S390_HPAGE_1M:
701 		mutex_lock(&kvm->lock);
702 		if (kvm->created_vcpus)
703 			r = -EBUSY;
704 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
705 			r = -EINVAL;
706 		else {
707 			r = 0;
708 			down_write(&kvm->mm->mmap_sem);
709 			kvm->mm->context.allow_gmap_hpage_1m = 1;
710 			up_write(&kvm->mm->mmap_sem);
711 			/*
712 			 * We might have to create fake 4k page
713 			 * tables. To avoid that the hardware works on
714 			 * stale PGSTEs, we emulate these instructions.
715 			 */
716 			kvm->arch.use_skf = 0;
717 			kvm->arch.use_pfmfi = 0;
718 		}
719 		mutex_unlock(&kvm->lock);
720 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
721 			 r ? "(not available)" : "(success)");
722 		break;
723 	case KVM_CAP_S390_USER_STSI:
724 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
725 		kvm->arch.user_stsi = 1;
726 		r = 0;
727 		break;
728 	case KVM_CAP_S390_USER_INSTR0:
729 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
730 		kvm->arch.user_instr0 = 1;
731 		icpt_operexc_on_all_vcpus(kvm);
732 		r = 0;
733 		break;
734 	default:
735 		r = -EINVAL;
736 		break;
737 	}
738 	return r;
739 }
740 
741 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
742 {
743 	int ret;
744 
745 	switch (attr->attr) {
746 	case KVM_S390_VM_MEM_LIMIT_SIZE:
747 		ret = 0;
748 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
749 			 kvm->arch.mem_limit);
750 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
751 			ret = -EFAULT;
752 		break;
753 	default:
754 		ret = -ENXIO;
755 		break;
756 	}
757 	return ret;
758 }
759 
760 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
761 {
762 	int ret;
763 	unsigned int idx;
764 	switch (attr->attr) {
765 	case KVM_S390_VM_MEM_ENABLE_CMMA:
766 		ret = -ENXIO;
767 		if (!sclp.has_cmma)
768 			break;
769 
770 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
771 		mutex_lock(&kvm->lock);
772 		if (kvm->created_vcpus)
773 			ret = -EBUSY;
774 		else if (kvm->mm->context.allow_gmap_hpage_1m)
775 			ret = -EINVAL;
776 		else {
777 			kvm->arch.use_cmma = 1;
778 			/* Not compatible with cmma. */
779 			kvm->arch.use_pfmfi = 0;
780 			ret = 0;
781 		}
782 		mutex_unlock(&kvm->lock);
783 		break;
784 	case KVM_S390_VM_MEM_CLR_CMMA:
785 		ret = -ENXIO;
786 		if (!sclp.has_cmma)
787 			break;
788 		ret = -EINVAL;
789 		if (!kvm->arch.use_cmma)
790 			break;
791 
792 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
793 		mutex_lock(&kvm->lock);
794 		idx = srcu_read_lock(&kvm->srcu);
795 		s390_reset_cmma(kvm->arch.gmap->mm);
796 		srcu_read_unlock(&kvm->srcu, idx);
797 		mutex_unlock(&kvm->lock);
798 		ret = 0;
799 		break;
800 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
801 		unsigned long new_limit;
802 
803 		if (kvm_is_ucontrol(kvm))
804 			return -EINVAL;
805 
806 		if (get_user(new_limit, (u64 __user *)attr->addr))
807 			return -EFAULT;
808 
809 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
810 		    new_limit > kvm->arch.mem_limit)
811 			return -E2BIG;
812 
813 		if (!new_limit)
814 			return -EINVAL;
815 
816 		/* gmap_create takes last usable address */
817 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
818 			new_limit -= 1;
819 
820 		ret = -EBUSY;
821 		mutex_lock(&kvm->lock);
822 		if (!kvm->created_vcpus) {
823 			/* gmap_create will round the limit up */
824 			struct gmap *new = gmap_create(current->mm, new_limit);
825 
826 			if (!new) {
827 				ret = -ENOMEM;
828 			} else {
829 				gmap_remove(kvm->arch.gmap);
830 				new->private = kvm;
831 				kvm->arch.gmap = new;
832 				ret = 0;
833 			}
834 		}
835 		mutex_unlock(&kvm->lock);
836 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
837 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
838 			 (void *) kvm->arch.gmap->asce);
839 		break;
840 	}
841 	default:
842 		ret = -ENXIO;
843 		break;
844 	}
845 	return ret;
846 }
847 
848 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
849 
850 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
851 {
852 	struct kvm_vcpu *vcpu;
853 	int i;
854 
855 	kvm_s390_vcpu_block_all(kvm);
856 
857 	kvm_for_each_vcpu(i, vcpu, kvm) {
858 		kvm_s390_vcpu_crypto_setup(vcpu);
859 		/* recreate the shadow crycb by leaving the VSIE handler */
860 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
861 	}
862 
863 	kvm_s390_vcpu_unblock_all(kvm);
864 }
865 
866 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
867 {
868 	mutex_lock(&kvm->lock);
869 	switch (attr->attr) {
870 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
871 		if (!test_kvm_facility(kvm, 76)) {
872 			mutex_unlock(&kvm->lock);
873 			return -EINVAL;
874 		}
875 		get_random_bytes(
876 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
877 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
878 		kvm->arch.crypto.aes_kw = 1;
879 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
880 		break;
881 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
882 		if (!test_kvm_facility(kvm, 76)) {
883 			mutex_unlock(&kvm->lock);
884 			return -EINVAL;
885 		}
886 		get_random_bytes(
887 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
888 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
889 		kvm->arch.crypto.dea_kw = 1;
890 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
891 		break;
892 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
893 		if (!test_kvm_facility(kvm, 76)) {
894 			mutex_unlock(&kvm->lock);
895 			return -EINVAL;
896 		}
897 		kvm->arch.crypto.aes_kw = 0;
898 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
899 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
900 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
901 		break;
902 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
903 		if (!test_kvm_facility(kvm, 76)) {
904 			mutex_unlock(&kvm->lock);
905 			return -EINVAL;
906 		}
907 		kvm->arch.crypto.dea_kw = 0;
908 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
909 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
910 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
911 		break;
912 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
913 		if (!ap_instructions_available()) {
914 			mutex_unlock(&kvm->lock);
915 			return -EOPNOTSUPP;
916 		}
917 		kvm->arch.crypto.apie = 1;
918 		break;
919 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
920 		if (!ap_instructions_available()) {
921 			mutex_unlock(&kvm->lock);
922 			return -EOPNOTSUPP;
923 		}
924 		kvm->arch.crypto.apie = 0;
925 		break;
926 	default:
927 		mutex_unlock(&kvm->lock);
928 		return -ENXIO;
929 	}
930 
931 	kvm_s390_vcpu_crypto_reset_all(kvm);
932 	mutex_unlock(&kvm->lock);
933 	return 0;
934 }
935 
936 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
937 {
938 	int cx;
939 	struct kvm_vcpu *vcpu;
940 
941 	kvm_for_each_vcpu(cx, vcpu, kvm)
942 		kvm_s390_sync_request(req, vcpu);
943 }
944 
945 /*
946  * Must be called with kvm->srcu held to avoid races on memslots, and with
947  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
948  */
949 static int kvm_s390_vm_start_migration(struct kvm *kvm)
950 {
951 	struct kvm_memory_slot *ms;
952 	struct kvm_memslots *slots;
953 	unsigned long ram_pages = 0;
954 	int slotnr;
955 
956 	/* migration mode already enabled */
957 	if (kvm->arch.migration_mode)
958 		return 0;
959 	slots = kvm_memslots(kvm);
960 	if (!slots || !slots->used_slots)
961 		return -EINVAL;
962 
963 	if (!kvm->arch.use_cmma) {
964 		kvm->arch.migration_mode = 1;
965 		return 0;
966 	}
967 	/* mark all the pages in active slots as dirty */
968 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
969 		ms = slots->memslots + slotnr;
970 		/*
971 		 * The second half of the bitmap is only used on x86,
972 		 * and would be wasted otherwise, so we put it to good
973 		 * use here to keep track of the state of the storage
974 		 * attributes.
975 		 */
976 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
977 		ram_pages += ms->npages;
978 	}
979 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
980 	kvm->arch.migration_mode = 1;
981 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
982 	return 0;
983 }
984 
985 /*
986  * Must be called with kvm->slots_lock to avoid races with ourselves and
987  * kvm_s390_vm_start_migration.
988  */
989 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
990 {
991 	/* migration mode already disabled */
992 	if (!kvm->arch.migration_mode)
993 		return 0;
994 	kvm->arch.migration_mode = 0;
995 	if (kvm->arch.use_cmma)
996 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
997 	return 0;
998 }
999 
1000 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1001 				     struct kvm_device_attr *attr)
1002 {
1003 	int res = -ENXIO;
1004 
1005 	mutex_lock(&kvm->slots_lock);
1006 	switch (attr->attr) {
1007 	case KVM_S390_VM_MIGRATION_START:
1008 		res = kvm_s390_vm_start_migration(kvm);
1009 		break;
1010 	case KVM_S390_VM_MIGRATION_STOP:
1011 		res = kvm_s390_vm_stop_migration(kvm);
1012 		break;
1013 	default:
1014 		break;
1015 	}
1016 	mutex_unlock(&kvm->slots_lock);
1017 
1018 	return res;
1019 }
1020 
1021 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1022 				     struct kvm_device_attr *attr)
1023 {
1024 	u64 mig = kvm->arch.migration_mode;
1025 
1026 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1027 		return -ENXIO;
1028 
1029 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1030 		return -EFAULT;
1031 	return 0;
1032 }
1033 
1034 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1035 {
1036 	struct kvm_s390_vm_tod_clock gtod;
1037 
1038 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1039 		return -EFAULT;
1040 
1041 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1042 		return -EINVAL;
1043 	kvm_s390_set_tod_clock(kvm, &gtod);
1044 
1045 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1046 		gtod.epoch_idx, gtod.tod);
1047 
1048 	return 0;
1049 }
1050 
1051 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1052 {
1053 	u8 gtod_high;
1054 
1055 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1056 					   sizeof(gtod_high)))
1057 		return -EFAULT;
1058 
1059 	if (gtod_high != 0)
1060 		return -EINVAL;
1061 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1062 
1063 	return 0;
1064 }
1065 
1066 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1067 {
1068 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1069 
1070 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1071 			   sizeof(gtod.tod)))
1072 		return -EFAULT;
1073 
1074 	kvm_s390_set_tod_clock(kvm, &gtod);
1075 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1076 	return 0;
1077 }
1078 
1079 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1080 {
1081 	int ret;
1082 
1083 	if (attr->flags)
1084 		return -EINVAL;
1085 
1086 	switch (attr->attr) {
1087 	case KVM_S390_VM_TOD_EXT:
1088 		ret = kvm_s390_set_tod_ext(kvm, attr);
1089 		break;
1090 	case KVM_S390_VM_TOD_HIGH:
1091 		ret = kvm_s390_set_tod_high(kvm, attr);
1092 		break;
1093 	case KVM_S390_VM_TOD_LOW:
1094 		ret = kvm_s390_set_tod_low(kvm, attr);
1095 		break;
1096 	default:
1097 		ret = -ENXIO;
1098 		break;
1099 	}
1100 	return ret;
1101 }
1102 
1103 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1104 				   struct kvm_s390_vm_tod_clock *gtod)
1105 {
1106 	struct kvm_s390_tod_clock_ext htod;
1107 
1108 	preempt_disable();
1109 
1110 	get_tod_clock_ext((char *)&htod);
1111 
1112 	gtod->tod = htod.tod + kvm->arch.epoch;
1113 	gtod->epoch_idx = 0;
1114 	if (test_kvm_facility(kvm, 139)) {
1115 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1116 		if (gtod->tod < htod.tod)
1117 			gtod->epoch_idx += 1;
1118 	}
1119 
1120 	preempt_enable();
1121 }
1122 
1123 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1124 {
1125 	struct kvm_s390_vm_tod_clock gtod;
1126 
1127 	memset(&gtod, 0, sizeof(gtod));
1128 	kvm_s390_get_tod_clock(kvm, &gtod);
1129 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1130 		return -EFAULT;
1131 
1132 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1133 		gtod.epoch_idx, gtod.tod);
1134 	return 0;
1135 }
1136 
1137 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1138 {
1139 	u8 gtod_high = 0;
1140 
1141 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1142 					 sizeof(gtod_high)))
1143 		return -EFAULT;
1144 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1145 
1146 	return 0;
1147 }
1148 
1149 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1150 {
1151 	u64 gtod;
1152 
1153 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1154 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1155 		return -EFAULT;
1156 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1157 
1158 	return 0;
1159 }
1160 
1161 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1162 {
1163 	int ret;
1164 
1165 	if (attr->flags)
1166 		return -EINVAL;
1167 
1168 	switch (attr->attr) {
1169 	case KVM_S390_VM_TOD_EXT:
1170 		ret = kvm_s390_get_tod_ext(kvm, attr);
1171 		break;
1172 	case KVM_S390_VM_TOD_HIGH:
1173 		ret = kvm_s390_get_tod_high(kvm, attr);
1174 		break;
1175 	case KVM_S390_VM_TOD_LOW:
1176 		ret = kvm_s390_get_tod_low(kvm, attr);
1177 		break;
1178 	default:
1179 		ret = -ENXIO;
1180 		break;
1181 	}
1182 	return ret;
1183 }
1184 
1185 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1186 {
1187 	struct kvm_s390_vm_cpu_processor *proc;
1188 	u16 lowest_ibc, unblocked_ibc;
1189 	int ret = 0;
1190 
1191 	mutex_lock(&kvm->lock);
1192 	if (kvm->created_vcpus) {
1193 		ret = -EBUSY;
1194 		goto out;
1195 	}
1196 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1197 	if (!proc) {
1198 		ret = -ENOMEM;
1199 		goto out;
1200 	}
1201 	if (!copy_from_user(proc, (void __user *)attr->addr,
1202 			    sizeof(*proc))) {
1203 		kvm->arch.model.cpuid = proc->cpuid;
1204 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1205 		unblocked_ibc = sclp.ibc & 0xfff;
1206 		if (lowest_ibc && proc->ibc) {
1207 			if (proc->ibc > unblocked_ibc)
1208 				kvm->arch.model.ibc = unblocked_ibc;
1209 			else if (proc->ibc < lowest_ibc)
1210 				kvm->arch.model.ibc = lowest_ibc;
1211 			else
1212 				kvm->arch.model.ibc = proc->ibc;
1213 		}
1214 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1215 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1216 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1217 			 kvm->arch.model.ibc,
1218 			 kvm->arch.model.cpuid);
1219 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1220 			 kvm->arch.model.fac_list[0],
1221 			 kvm->arch.model.fac_list[1],
1222 			 kvm->arch.model.fac_list[2]);
1223 	} else
1224 		ret = -EFAULT;
1225 	kfree(proc);
1226 out:
1227 	mutex_unlock(&kvm->lock);
1228 	return ret;
1229 }
1230 
1231 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1232 				       struct kvm_device_attr *attr)
1233 {
1234 	struct kvm_s390_vm_cpu_feat data;
1235 
1236 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1237 		return -EFAULT;
1238 	if (!bitmap_subset((unsigned long *) data.feat,
1239 			   kvm_s390_available_cpu_feat,
1240 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1241 		return -EINVAL;
1242 
1243 	mutex_lock(&kvm->lock);
1244 	if (kvm->created_vcpus) {
1245 		mutex_unlock(&kvm->lock);
1246 		return -EBUSY;
1247 	}
1248 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1249 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1250 	mutex_unlock(&kvm->lock);
1251 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1252 			 data.feat[0],
1253 			 data.feat[1],
1254 			 data.feat[2]);
1255 	return 0;
1256 }
1257 
1258 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1259 					  struct kvm_device_attr *attr)
1260 {
1261 	/*
1262 	 * Once supported by kernel + hw, we have to store the subfunctions
1263 	 * in kvm->arch and remember that user space configured them.
1264 	 */
1265 	return -ENXIO;
1266 }
1267 
1268 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1269 {
1270 	int ret = -ENXIO;
1271 
1272 	switch (attr->attr) {
1273 	case KVM_S390_VM_CPU_PROCESSOR:
1274 		ret = kvm_s390_set_processor(kvm, attr);
1275 		break;
1276 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1277 		ret = kvm_s390_set_processor_feat(kvm, attr);
1278 		break;
1279 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1280 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1281 		break;
1282 	}
1283 	return ret;
1284 }
1285 
1286 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1287 {
1288 	struct kvm_s390_vm_cpu_processor *proc;
1289 	int ret = 0;
1290 
1291 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1292 	if (!proc) {
1293 		ret = -ENOMEM;
1294 		goto out;
1295 	}
1296 	proc->cpuid = kvm->arch.model.cpuid;
1297 	proc->ibc = kvm->arch.model.ibc;
1298 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1299 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1300 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1301 		 kvm->arch.model.ibc,
1302 		 kvm->arch.model.cpuid);
1303 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1304 		 kvm->arch.model.fac_list[0],
1305 		 kvm->arch.model.fac_list[1],
1306 		 kvm->arch.model.fac_list[2]);
1307 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1308 		ret = -EFAULT;
1309 	kfree(proc);
1310 out:
1311 	return ret;
1312 }
1313 
1314 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1315 {
1316 	struct kvm_s390_vm_cpu_machine *mach;
1317 	int ret = 0;
1318 
1319 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1320 	if (!mach) {
1321 		ret = -ENOMEM;
1322 		goto out;
1323 	}
1324 	get_cpu_id((struct cpuid *) &mach->cpuid);
1325 	mach->ibc = sclp.ibc;
1326 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1327 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1328 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1329 	       sizeof(S390_lowcore.stfle_fac_list));
1330 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1331 		 kvm->arch.model.ibc,
1332 		 kvm->arch.model.cpuid);
1333 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1334 		 mach->fac_mask[0],
1335 		 mach->fac_mask[1],
1336 		 mach->fac_mask[2]);
1337 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1338 		 mach->fac_list[0],
1339 		 mach->fac_list[1],
1340 		 mach->fac_list[2]);
1341 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1342 		ret = -EFAULT;
1343 	kfree(mach);
1344 out:
1345 	return ret;
1346 }
1347 
1348 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1349 				       struct kvm_device_attr *attr)
1350 {
1351 	struct kvm_s390_vm_cpu_feat data;
1352 
1353 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1354 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1355 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1356 		return -EFAULT;
1357 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1358 			 data.feat[0],
1359 			 data.feat[1],
1360 			 data.feat[2]);
1361 	return 0;
1362 }
1363 
1364 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1365 				     struct kvm_device_attr *attr)
1366 {
1367 	struct kvm_s390_vm_cpu_feat data;
1368 
1369 	bitmap_copy((unsigned long *) data.feat,
1370 		    kvm_s390_available_cpu_feat,
1371 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1372 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1373 		return -EFAULT;
1374 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1375 			 data.feat[0],
1376 			 data.feat[1],
1377 			 data.feat[2]);
1378 	return 0;
1379 }
1380 
1381 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1382 					  struct kvm_device_attr *attr)
1383 {
1384 	/*
1385 	 * Once we can actually configure subfunctions (kernel + hw support),
1386 	 * we have to check if they were already set by user space, if so copy
1387 	 * them from kvm->arch.
1388 	 */
1389 	return -ENXIO;
1390 }
1391 
1392 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1393 					struct kvm_device_attr *attr)
1394 {
1395 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1396 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1397 		return -EFAULT;
1398 	return 0;
1399 }
1400 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1401 {
1402 	int ret = -ENXIO;
1403 
1404 	switch (attr->attr) {
1405 	case KVM_S390_VM_CPU_PROCESSOR:
1406 		ret = kvm_s390_get_processor(kvm, attr);
1407 		break;
1408 	case KVM_S390_VM_CPU_MACHINE:
1409 		ret = kvm_s390_get_machine(kvm, attr);
1410 		break;
1411 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1412 		ret = kvm_s390_get_processor_feat(kvm, attr);
1413 		break;
1414 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1415 		ret = kvm_s390_get_machine_feat(kvm, attr);
1416 		break;
1417 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1418 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1419 		break;
1420 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1421 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1422 		break;
1423 	}
1424 	return ret;
1425 }
1426 
1427 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1428 {
1429 	int ret;
1430 
1431 	switch (attr->group) {
1432 	case KVM_S390_VM_MEM_CTRL:
1433 		ret = kvm_s390_set_mem_control(kvm, attr);
1434 		break;
1435 	case KVM_S390_VM_TOD:
1436 		ret = kvm_s390_set_tod(kvm, attr);
1437 		break;
1438 	case KVM_S390_VM_CPU_MODEL:
1439 		ret = kvm_s390_set_cpu_model(kvm, attr);
1440 		break;
1441 	case KVM_S390_VM_CRYPTO:
1442 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1443 		break;
1444 	case KVM_S390_VM_MIGRATION:
1445 		ret = kvm_s390_vm_set_migration(kvm, attr);
1446 		break;
1447 	default:
1448 		ret = -ENXIO;
1449 		break;
1450 	}
1451 
1452 	return ret;
1453 }
1454 
1455 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1456 {
1457 	int ret;
1458 
1459 	switch (attr->group) {
1460 	case KVM_S390_VM_MEM_CTRL:
1461 		ret = kvm_s390_get_mem_control(kvm, attr);
1462 		break;
1463 	case KVM_S390_VM_TOD:
1464 		ret = kvm_s390_get_tod(kvm, attr);
1465 		break;
1466 	case KVM_S390_VM_CPU_MODEL:
1467 		ret = kvm_s390_get_cpu_model(kvm, attr);
1468 		break;
1469 	case KVM_S390_VM_MIGRATION:
1470 		ret = kvm_s390_vm_get_migration(kvm, attr);
1471 		break;
1472 	default:
1473 		ret = -ENXIO;
1474 		break;
1475 	}
1476 
1477 	return ret;
1478 }
1479 
1480 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1481 {
1482 	int ret;
1483 
1484 	switch (attr->group) {
1485 	case KVM_S390_VM_MEM_CTRL:
1486 		switch (attr->attr) {
1487 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1488 		case KVM_S390_VM_MEM_CLR_CMMA:
1489 			ret = sclp.has_cmma ? 0 : -ENXIO;
1490 			break;
1491 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1492 			ret = 0;
1493 			break;
1494 		default:
1495 			ret = -ENXIO;
1496 			break;
1497 		}
1498 		break;
1499 	case KVM_S390_VM_TOD:
1500 		switch (attr->attr) {
1501 		case KVM_S390_VM_TOD_LOW:
1502 		case KVM_S390_VM_TOD_HIGH:
1503 			ret = 0;
1504 			break;
1505 		default:
1506 			ret = -ENXIO;
1507 			break;
1508 		}
1509 		break;
1510 	case KVM_S390_VM_CPU_MODEL:
1511 		switch (attr->attr) {
1512 		case KVM_S390_VM_CPU_PROCESSOR:
1513 		case KVM_S390_VM_CPU_MACHINE:
1514 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1515 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1516 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1517 			ret = 0;
1518 			break;
1519 		/* configuring subfunctions is not supported yet */
1520 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1521 		default:
1522 			ret = -ENXIO;
1523 			break;
1524 		}
1525 		break;
1526 	case KVM_S390_VM_CRYPTO:
1527 		switch (attr->attr) {
1528 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1529 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1530 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1531 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1532 			ret = 0;
1533 			break;
1534 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1535 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1536 			ret = ap_instructions_available() ? 0 : -ENXIO;
1537 			break;
1538 		default:
1539 			ret = -ENXIO;
1540 			break;
1541 		}
1542 		break;
1543 	case KVM_S390_VM_MIGRATION:
1544 		ret = 0;
1545 		break;
1546 	default:
1547 		ret = -ENXIO;
1548 		break;
1549 	}
1550 
1551 	return ret;
1552 }
1553 
1554 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1555 {
1556 	uint8_t *keys;
1557 	uint64_t hva;
1558 	int srcu_idx, i, r = 0;
1559 
1560 	if (args->flags != 0)
1561 		return -EINVAL;
1562 
1563 	/* Is this guest using storage keys? */
1564 	if (!mm_uses_skeys(current->mm))
1565 		return KVM_S390_GET_SKEYS_NONE;
1566 
1567 	/* Enforce sane limit on memory allocation */
1568 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1569 		return -EINVAL;
1570 
1571 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1572 	if (!keys)
1573 		return -ENOMEM;
1574 
1575 	down_read(&current->mm->mmap_sem);
1576 	srcu_idx = srcu_read_lock(&kvm->srcu);
1577 	for (i = 0; i < args->count; i++) {
1578 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1579 		if (kvm_is_error_hva(hva)) {
1580 			r = -EFAULT;
1581 			break;
1582 		}
1583 
1584 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1585 		if (r)
1586 			break;
1587 	}
1588 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1589 	up_read(&current->mm->mmap_sem);
1590 
1591 	if (!r) {
1592 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1593 				 sizeof(uint8_t) * args->count);
1594 		if (r)
1595 			r = -EFAULT;
1596 	}
1597 
1598 	kvfree(keys);
1599 	return r;
1600 }
1601 
1602 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1603 {
1604 	uint8_t *keys;
1605 	uint64_t hva;
1606 	int srcu_idx, i, r = 0;
1607 	bool unlocked;
1608 
1609 	if (args->flags != 0)
1610 		return -EINVAL;
1611 
1612 	/* Enforce sane limit on memory allocation */
1613 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1614 		return -EINVAL;
1615 
1616 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1617 	if (!keys)
1618 		return -ENOMEM;
1619 
1620 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1621 			   sizeof(uint8_t) * args->count);
1622 	if (r) {
1623 		r = -EFAULT;
1624 		goto out;
1625 	}
1626 
1627 	/* Enable storage key handling for the guest */
1628 	r = s390_enable_skey();
1629 	if (r)
1630 		goto out;
1631 
1632 	i = 0;
1633 	down_read(&current->mm->mmap_sem);
1634 	srcu_idx = srcu_read_lock(&kvm->srcu);
1635         while (i < args->count) {
1636 		unlocked = false;
1637 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1638 		if (kvm_is_error_hva(hva)) {
1639 			r = -EFAULT;
1640 			break;
1641 		}
1642 
1643 		/* Lowest order bit is reserved */
1644 		if (keys[i] & 0x01) {
1645 			r = -EINVAL;
1646 			break;
1647 		}
1648 
1649 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1650 		if (r) {
1651 			r = fixup_user_fault(current, current->mm, hva,
1652 					     FAULT_FLAG_WRITE, &unlocked);
1653 			if (r)
1654 				break;
1655 		}
1656 		if (!r)
1657 			i++;
1658 	}
1659 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1660 	up_read(&current->mm->mmap_sem);
1661 out:
1662 	kvfree(keys);
1663 	return r;
1664 }
1665 
1666 /*
1667  * Base address and length must be sent at the start of each block, therefore
1668  * it's cheaper to send some clean data, as long as it's less than the size of
1669  * two longs.
1670  */
1671 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1672 /* for consistency */
1673 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1674 
1675 /*
1676  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1677  * address falls in a hole. In that case the index of one of the memslots
1678  * bordering the hole is returned.
1679  */
1680 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1681 {
1682 	int start = 0, end = slots->used_slots;
1683 	int slot = atomic_read(&slots->lru_slot);
1684 	struct kvm_memory_slot *memslots = slots->memslots;
1685 
1686 	if (gfn >= memslots[slot].base_gfn &&
1687 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1688 		return slot;
1689 
1690 	while (start < end) {
1691 		slot = start + (end - start) / 2;
1692 
1693 		if (gfn >= memslots[slot].base_gfn)
1694 			end = slot;
1695 		else
1696 			start = slot + 1;
1697 	}
1698 
1699 	if (gfn >= memslots[start].base_gfn &&
1700 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1701 		atomic_set(&slots->lru_slot, start);
1702 	}
1703 
1704 	return start;
1705 }
1706 
1707 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1708 			      u8 *res, unsigned long bufsize)
1709 {
1710 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1711 
1712 	args->count = 0;
1713 	while (args->count < bufsize) {
1714 		hva = gfn_to_hva(kvm, cur_gfn);
1715 		/*
1716 		 * We return an error if the first value was invalid, but we
1717 		 * return successfully if at least one value was copied.
1718 		 */
1719 		if (kvm_is_error_hva(hva))
1720 			return args->count ? 0 : -EFAULT;
1721 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1722 			pgstev = 0;
1723 		res[args->count++] = (pgstev >> 24) & 0x43;
1724 		cur_gfn++;
1725 	}
1726 
1727 	return 0;
1728 }
1729 
1730 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1731 					      unsigned long cur_gfn)
1732 {
1733 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1734 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1735 	unsigned long ofs = cur_gfn - ms->base_gfn;
1736 
1737 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1738 		slotidx--;
1739 		/* If we are above the highest slot, wrap around */
1740 		if (slotidx < 0)
1741 			slotidx = slots->used_slots - 1;
1742 
1743 		ms = slots->memslots + slotidx;
1744 		ofs = 0;
1745 	}
1746 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1747 	while ((slotidx > 0) && (ofs >= ms->npages)) {
1748 		slotidx--;
1749 		ms = slots->memslots + slotidx;
1750 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1751 	}
1752 	return ms->base_gfn + ofs;
1753 }
1754 
1755 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1756 			     u8 *res, unsigned long bufsize)
1757 {
1758 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1759 	struct kvm_memslots *slots = kvm_memslots(kvm);
1760 	struct kvm_memory_slot *ms;
1761 
1762 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1763 	ms = gfn_to_memslot(kvm, cur_gfn);
1764 	args->count = 0;
1765 	args->start_gfn = cur_gfn;
1766 	if (!ms)
1767 		return 0;
1768 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1769 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1770 
1771 	while (args->count < bufsize) {
1772 		hva = gfn_to_hva(kvm, cur_gfn);
1773 		if (kvm_is_error_hva(hva))
1774 			return 0;
1775 		/* Decrement only if we actually flipped the bit to 0 */
1776 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1777 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
1778 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1779 			pgstev = 0;
1780 		/* Save the value */
1781 		res[args->count++] = (pgstev >> 24) & 0x43;
1782 		/* If the next bit is too far away, stop. */
1783 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1784 			return 0;
1785 		/* If we reached the previous "next", find the next one */
1786 		if (cur_gfn == next_gfn)
1787 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1788 		/* Reached the end of memory or of the buffer, stop */
1789 		if ((next_gfn >= mem_end) ||
1790 		    (next_gfn - args->start_gfn >= bufsize))
1791 			return 0;
1792 		cur_gfn++;
1793 		/* Reached the end of the current memslot, take the next one. */
1794 		if (cur_gfn - ms->base_gfn >= ms->npages) {
1795 			ms = gfn_to_memslot(kvm, cur_gfn);
1796 			if (!ms)
1797 				return 0;
1798 		}
1799 	}
1800 	return 0;
1801 }
1802 
1803 /*
1804  * This function searches for the next page with dirty CMMA attributes, and
1805  * saves the attributes in the buffer up to either the end of the buffer or
1806  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1807  * no trailing clean bytes are saved.
1808  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1809  * output buffer will indicate 0 as length.
1810  */
1811 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1812 				  struct kvm_s390_cmma_log *args)
1813 {
1814 	unsigned long bufsize;
1815 	int srcu_idx, peek, ret;
1816 	u8 *values;
1817 
1818 	if (!kvm->arch.use_cmma)
1819 		return -ENXIO;
1820 	/* Invalid/unsupported flags were specified */
1821 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1822 		return -EINVAL;
1823 	/* Migration mode query, and we are not doing a migration */
1824 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1825 	if (!peek && !kvm->arch.migration_mode)
1826 		return -EINVAL;
1827 	/* CMMA is disabled or was not used, or the buffer has length zero */
1828 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1829 	if (!bufsize || !kvm->mm->context.uses_cmm) {
1830 		memset(args, 0, sizeof(*args));
1831 		return 0;
1832 	}
1833 	/* We are not peeking, and there are no dirty pages */
1834 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1835 		memset(args, 0, sizeof(*args));
1836 		return 0;
1837 	}
1838 
1839 	values = vmalloc(bufsize);
1840 	if (!values)
1841 		return -ENOMEM;
1842 
1843 	down_read(&kvm->mm->mmap_sem);
1844 	srcu_idx = srcu_read_lock(&kvm->srcu);
1845 	if (peek)
1846 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1847 	else
1848 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1849 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1850 	up_read(&kvm->mm->mmap_sem);
1851 
1852 	if (kvm->arch.migration_mode)
1853 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1854 	else
1855 		args->remaining = 0;
1856 
1857 	if (copy_to_user((void __user *)args->values, values, args->count))
1858 		ret = -EFAULT;
1859 
1860 	vfree(values);
1861 	return ret;
1862 }
1863 
1864 /*
1865  * This function sets the CMMA attributes for the given pages. If the input
1866  * buffer has zero length, no action is taken, otherwise the attributes are
1867  * set and the mm->context.uses_cmm flag is set.
1868  */
1869 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1870 				  const struct kvm_s390_cmma_log *args)
1871 {
1872 	unsigned long hva, mask, pgstev, i;
1873 	uint8_t *bits;
1874 	int srcu_idx, r = 0;
1875 
1876 	mask = args->mask;
1877 
1878 	if (!kvm->arch.use_cmma)
1879 		return -ENXIO;
1880 	/* invalid/unsupported flags */
1881 	if (args->flags != 0)
1882 		return -EINVAL;
1883 	/* Enforce sane limit on memory allocation */
1884 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1885 		return -EINVAL;
1886 	/* Nothing to do */
1887 	if (args->count == 0)
1888 		return 0;
1889 
1890 	bits = vmalloc(array_size(sizeof(*bits), args->count));
1891 	if (!bits)
1892 		return -ENOMEM;
1893 
1894 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1895 	if (r) {
1896 		r = -EFAULT;
1897 		goto out;
1898 	}
1899 
1900 	down_read(&kvm->mm->mmap_sem);
1901 	srcu_idx = srcu_read_lock(&kvm->srcu);
1902 	for (i = 0; i < args->count; i++) {
1903 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1904 		if (kvm_is_error_hva(hva)) {
1905 			r = -EFAULT;
1906 			break;
1907 		}
1908 
1909 		pgstev = bits[i];
1910 		pgstev = pgstev << 24;
1911 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1912 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1913 	}
1914 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1915 	up_read(&kvm->mm->mmap_sem);
1916 
1917 	if (!kvm->mm->context.uses_cmm) {
1918 		down_write(&kvm->mm->mmap_sem);
1919 		kvm->mm->context.uses_cmm = 1;
1920 		up_write(&kvm->mm->mmap_sem);
1921 	}
1922 out:
1923 	vfree(bits);
1924 	return r;
1925 }
1926 
1927 long kvm_arch_vm_ioctl(struct file *filp,
1928 		       unsigned int ioctl, unsigned long arg)
1929 {
1930 	struct kvm *kvm = filp->private_data;
1931 	void __user *argp = (void __user *)arg;
1932 	struct kvm_device_attr attr;
1933 	int r;
1934 
1935 	switch (ioctl) {
1936 	case KVM_S390_INTERRUPT: {
1937 		struct kvm_s390_interrupt s390int;
1938 
1939 		r = -EFAULT;
1940 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1941 			break;
1942 		r = kvm_s390_inject_vm(kvm, &s390int);
1943 		break;
1944 	}
1945 	case KVM_CREATE_IRQCHIP: {
1946 		struct kvm_irq_routing_entry routing;
1947 
1948 		r = -EINVAL;
1949 		if (kvm->arch.use_irqchip) {
1950 			/* Set up dummy routing. */
1951 			memset(&routing, 0, sizeof(routing));
1952 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1953 		}
1954 		break;
1955 	}
1956 	case KVM_SET_DEVICE_ATTR: {
1957 		r = -EFAULT;
1958 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1959 			break;
1960 		r = kvm_s390_vm_set_attr(kvm, &attr);
1961 		break;
1962 	}
1963 	case KVM_GET_DEVICE_ATTR: {
1964 		r = -EFAULT;
1965 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1966 			break;
1967 		r = kvm_s390_vm_get_attr(kvm, &attr);
1968 		break;
1969 	}
1970 	case KVM_HAS_DEVICE_ATTR: {
1971 		r = -EFAULT;
1972 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1973 			break;
1974 		r = kvm_s390_vm_has_attr(kvm, &attr);
1975 		break;
1976 	}
1977 	case KVM_S390_GET_SKEYS: {
1978 		struct kvm_s390_skeys args;
1979 
1980 		r = -EFAULT;
1981 		if (copy_from_user(&args, argp,
1982 				   sizeof(struct kvm_s390_skeys)))
1983 			break;
1984 		r = kvm_s390_get_skeys(kvm, &args);
1985 		break;
1986 	}
1987 	case KVM_S390_SET_SKEYS: {
1988 		struct kvm_s390_skeys args;
1989 
1990 		r = -EFAULT;
1991 		if (copy_from_user(&args, argp,
1992 				   sizeof(struct kvm_s390_skeys)))
1993 			break;
1994 		r = kvm_s390_set_skeys(kvm, &args);
1995 		break;
1996 	}
1997 	case KVM_S390_GET_CMMA_BITS: {
1998 		struct kvm_s390_cmma_log args;
1999 
2000 		r = -EFAULT;
2001 		if (copy_from_user(&args, argp, sizeof(args)))
2002 			break;
2003 		mutex_lock(&kvm->slots_lock);
2004 		r = kvm_s390_get_cmma_bits(kvm, &args);
2005 		mutex_unlock(&kvm->slots_lock);
2006 		if (!r) {
2007 			r = copy_to_user(argp, &args, sizeof(args));
2008 			if (r)
2009 				r = -EFAULT;
2010 		}
2011 		break;
2012 	}
2013 	case KVM_S390_SET_CMMA_BITS: {
2014 		struct kvm_s390_cmma_log args;
2015 
2016 		r = -EFAULT;
2017 		if (copy_from_user(&args, argp, sizeof(args)))
2018 			break;
2019 		mutex_lock(&kvm->slots_lock);
2020 		r = kvm_s390_set_cmma_bits(kvm, &args);
2021 		mutex_unlock(&kvm->slots_lock);
2022 		break;
2023 	}
2024 	default:
2025 		r = -ENOTTY;
2026 	}
2027 
2028 	return r;
2029 }
2030 
2031 static int kvm_s390_apxa_installed(void)
2032 {
2033 	struct ap_config_info info;
2034 
2035 	if (ap_instructions_available()) {
2036 		if (ap_qci(&info) == 0)
2037 			return info.apxa;
2038 	}
2039 
2040 	return 0;
2041 }
2042 
2043 /*
2044  * The format of the crypto control block (CRYCB) is specified in the 3 low
2045  * order bits of the CRYCB designation (CRYCBD) field as follows:
2046  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2047  *	     AP extended addressing (APXA) facility are installed.
2048  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2049  * Format 2: Both the APXA and MSAX3 facilities are installed
2050  */
2051 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2052 {
2053 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2054 
2055 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2056 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2057 
2058 	/* Check whether MSAX3 is installed */
2059 	if (!test_kvm_facility(kvm, 76))
2060 		return;
2061 
2062 	if (kvm_s390_apxa_installed())
2063 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2064 	else
2065 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2066 }
2067 
2068 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2069 			       unsigned long *aqm, unsigned long *adm)
2070 {
2071 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2072 
2073 	mutex_lock(&kvm->lock);
2074 	kvm_s390_vcpu_block_all(kvm);
2075 
2076 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2077 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2078 		memcpy(crycb->apcb1.apm, apm, 32);
2079 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2080 			 apm[0], apm[1], apm[2], apm[3]);
2081 		memcpy(crycb->apcb1.aqm, aqm, 32);
2082 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2083 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2084 		memcpy(crycb->apcb1.adm, adm, 32);
2085 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2086 			 adm[0], adm[1], adm[2], adm[3]);
2087 		break;
2088 	case CRYCB_FORMAT1:
2089 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2090 		memcpy(crycb->apcb0.apm, apm, 8);
2091 		memcpy(crycb->apcb0.aqm, aqm, 2);
2092 		memcpy(crycb->apcb0.adm, adm, 2);
2093 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2094 			 apm[0], *((unsigned short *)aqm),
2095 			 *((unsigned short *)adm));
2096 		break;
2097 	default:	/* Can not happen */
2098 		break;
2099 	}
2100 
2101 	/* recreate the shadow crycb for each vcpu */
2102 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2103 	kvm_s390_vcpu_unblock_all(kvm);
2104 	mutex_unlock(&kvm->lock);
2105 }
2106 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2107 
2108 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2109 {
2110 	mutex_lock(&kvm->lock);
2111 	kvm_s390_vcpu_block_all(kvm);
2112 
2113 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2114 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2115 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2116 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2117 
2118 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2119 	/* recreate the shadow crycb for each vcpu */
2120 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2121 	kvm_s390_vcpu_unblock_all(kvm);
2122 	mutex_unlock(&kvm->lock);
2123 }
2124 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2125 
2126 static u64 kvm_s390_get_initial_cpuid(void)
2127 {
2128 	struct cpuid cpuid;
2129 
2130 	get_cpu_id(&cpuid);
2131 	cpuid.version = 0xff;
2132 	return *((u64 *) &cpuid);
2133 }
2134 
2135 static void kvm_s390_crypto_init(struct kvm *kvm)
2136 {
2137 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2138 	kvm_s390_set_crycb_format(kvm);
2139 
2140 	if (!test_kvm_facility(kvm, 76))
2141 		return;
2142 
2143 	/* Enable AES/DEA protected key functions by default */
2144 	kvm->arch.crypto.aes_kw = 1;
2145 	kvm->arch.crypto.dea_kw = 1;
2146 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2147 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2148 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2149 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2150 }
2151 
2152 static void sca_dispose(struct kvm *kvm)
2153 {
2154 	if (kvm->arch.use_esca)
2155 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2156 	else
2157 		free_page((unsigned long)(kvm->arch.sca));
2158 	kvm->arch.sca = NULL;
2159 }
2160 
2161 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2162 {
2163 	gfp_t alloc_flags = GFP_KERNEL;
2164 	int i, rc;
2165 	char debug_name[16];
2166 	static unsigned long sca_offset;
2167 
2168 	rc = -EINVAL;
2169 #ifdef CONFIG_KVM_S390_UCONTROL
2170 	if (type & ~KVM_VM_S390_UCONTROL)
2171 		goto out_err;
2172 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2173 		goto out_err;
2174 #else
2175 	if (type)
2176 		goto out_err;
2177 #endif
2178 
2179 	rc = s390_enable_sie();
2180 	if (rc)
2181 		goto out_err;
2182 
2183 	rc = -ENOMEM;
2184 
2185 	if (!sclp.has_64bscao)
2186 		alloc_flags |= GFP_DMA;
2187 	rwlock_init(&kvm->arch.sca_lock);
2188 	/* start with basic SCA */
2189 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2190 	if (!kvm->arch.sca)
2191 		goto out_err;
2192 	spin_lock(&kvm_lock);
2193 	sca_offset += 16;
2194 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2195 		sca_offset = 0;
2196 	kvm->arch.sca = (struct bsca_block *)
2197 			((char *) kvm->arch.sca + sca_offset);
2198 	spin_unlock(&kvm_lock);
2199 
2200 	sprintf(debug_name, "kvm-%u", current->pid);
2201 
2202 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2203 	if (!kvm->arch.dbf)
2204 		goto out_err;
2205 
2206 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2207 	kvm->arch.sie_page2 =
2208 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2209 	if (!kvm->arch.sie_page2)
2210 		goto out_err;
2211 
2212 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2213 
2214 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2215 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2216 					      (kvm_s390_fac_base[i] |
2217 					       kvm_s390_fac_ext[i]);
2218 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2219 					      kvm_s390_fac_base[i];
2220 	}
2221 
2222 	/* we are always in czam mode - even on pre z14 machines */
2223 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2224 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2225 	/* we emulate STHYI in kvm */
2226 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2227 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2228 	if (MACHINE_HAS_TLB_GUEST) {
2229 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2230 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2231 	}
2232 
2233 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2234 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2235 
2236 	kvm_s390_crypto_init(kvm);
2237 
2238 	mutex_init(&kvm->arch.float_int.ais_lock);
2239 	spin_lock_init(&kvm->arch.float_int.lock);
2240 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2241 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2242 	init_waitqueue_head(&kvm->arch.ipte_wq);
2243 	mutex_init(&kvm->arch.ipte_mutex);
2244 
2245 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2246 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2247 
2248 	if (type & KVM_VM_S390_UCONTROL) {
2249 		kvm->arch.gmap = NULL;
2250 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2251 	} else {
2252 		if (sclp.hamax == U64_MAX)
2253 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2254 		else
2255 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2256 						    sclp.hamax + 1);
2257 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2258 		if (!kvm->arch.gmap)
2259 			goto out_err;
2260 		kvm->arch.gmap->private = kvm;
2261 		kvm->arch.gmap->pfault_enabled = 0;
2262 	}
2263 
2264 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2265 	kvm->arch.use_skf = sclp.has_skey;
2266 	spin_lock_init(&kvm->arch.start_stop_lock);
2267 	kvm_s390_vsie_init(kvm);
2268 	kvm_s390_gisa_init(kvm);
2269 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2270 
2271 	return 0;
2272 out_err:
2273 	free_page((unsigned long)kvm->arch.sie_page2);
2274 	debug_unregister(kvm->arch.dbf);
2275 	sca_dispose(kvm);
2276 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2277 	return rc;
2278 }
2279 
2280 bool kvm_arch_has_vcpu_debugfs(void)
2281 {
2282 	return false;
2283 }
2284 
2285 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2286 {
2287 	return 0;
2288 }
2289 
2290 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2291 {
2292 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2293 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2294 	kvm_s390_clear_local_irqs(vcpu);
2295 	kvm_clear_async_pf_completion_queue(vcpu);
2296 	if (!kvm_is_ucontrol(vcpu->kvm))
2297 		sca_del_vcpu(vcpu);
2298 
2299 	if (kvm_is_ucontrol(vcpu->kvm))
2300 		gmap_remove(vcpu->arch.gmap);
2301 
2302 	if (vcpu->kvm->arch.use_cmma)
2303 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2304 	free_page((unsigned long)(vcpu->arch.sie_block));
2305 
2306 	kvm_vcpu_uninit(vcpu);
2307 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2308 }
2309 
2310 static void kvm_free_vcpus(struct kvm *kvm)
2311 {
2312 	unsigned int i;
2313 	struct kvm_vcpu *vcpu;
2314 
2315 	kvm_for_each_vcpu(i, vcpu, kvm)
2316 		kvm_arch_vcpu_destroy(vcpu);
2317 
2318 	mutex_lock(&kvm->lock);
2319 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2320 		kvm->vcpus[i] = NULL;
2321 
2322 	atomic_set(&kvm->online_vcpus, 0);
2323 	mutex_unlock(&kvm->lock);
2324 }
2325 
2326 void kvm_arch_destroy_vm(struct kvm *kvm)
2327 {
2328 	kvm_free_vcpus(kvm);
2329 	sca_dispose(kvm);
2330 	debug_unregister(kvm->arch.dbf);
2331 	kvm_s390_gisa_destroy(kvm);
2332 	free_page((unsigned long)kvm->arch.sie_page2);
2333 	if (!kvm_is_ucontrol(kvm))
2334 		gmap_remove(kvm->arch.gmap);
2335 	kvm_s390_destroy_adapters(kvm);
2336 	kvm_s390_clear_float_irqs(kvm);
2337 	kvm_s390_vsie_destroy(kvm);
2338 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2339 }
2340 
2341 /* Section: vcpu related */
2342 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2343 {
2344 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2345 	if (!vcpu->arch.gmap)
2346 		return -ENOMEM;
2347 	vcpu->arch.gmap->private = vcpu->kvm;
2348 
2349 	return 0;
2350 }
2351 
2352 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2353 {
2354 	if (!kvm_s390_use_sca_entries())
2355 		return;
2356 	read_lock(&vcpu->kvm->arch.sca_lock);
2357 	if (vcpu->kvm->arch.use_esca) {
2358 		struct esca_block *sca = vcpu->kvm->arch.sca;
2359 
2360 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2361 		sca->cpu[vcpu->vcpu_id].sda = 0;
2362 	} else {
2363 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2364 
2365 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2366 		sca->cpu[vcpu->vcpu_id].sda = 0;
2367 	}
2368 	read_unlock(&vcpu->kvm->arch.sca_lock);
2369 }
2370 
2371 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2372 {
2373 	if (!kvm_s390_use_sca_entries()) {
2374 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2375 
2376 		/* we still need the basic sca for the ipte control */
2377 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2378 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2379 		return;
2380 	}
2381 	read_lock(&vcpu->kvm->arch.sca_lock);
2382 	if (vcpu->kvm->arch.use_esca) {
2383 		struct esca_block *sca = vcpu->kvm->arch.sca;
2384 
2385 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2386 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2387 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2388 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2389 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2390 	} else {
2391 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2392 
2393 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2394 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2395 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2396 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2397 	}
2398 	read_unlock(&vcpu->kvm->arch.sca_lock);
2399 }
2400 
2401 /* Basic SCA to Extended SCA data copy routines */
2402 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2403 {
2404 	d->sda = s->sda;
2405 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2406 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2407 }
2408 
2409 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2410 {
2411 	int i;
2412 
2413 	d->ipte_control = s->ipte_control;
2414 	d->mcn[0] = s->mcn;
2415 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2416 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2417 }
2418 
2419 static int sca_switch_to_extended(struct kvm *kvm)
2420 {
2421 	struct bsca_block *old_sca = kvm->arch.sca;
2422 	struct esca_block *new_sca;
2423 	struct kvm_vcpu *vcpu;
2424 	unsigned int vcpu_idx;
2425 	u32 scaol, scaoh;
2426 
2427 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2428 	if (!new_sca)
2429 		return -ENOMEM;
2430 
2431 	scaoh = (u32)((u64)(new_sca) >> 32);
2432 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2433 
2434 	kvm_s390_vcpu_block_all(kvm);
2435 	write_lock(&kvm->arch.sca_lock);
2436 
2437 	sca_copy_b_to_e(new_sca, old_sca);
2438 
2439 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2440 		vcpu->arch.sie_block->scaoh = scaoh;
2441 		vcpu->arch.sie_block->scaol = scaol;
2442 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2443 	}
2444 	kvm->arch.sca = new_sca;
2445 	kvm->arch.use_esca = 1;
2446 
2447 	write_unlock(&kvm->arch.sca_lock);
2448 	kvm_s390_vcpu_unblock_all(kvm);
2449 
2450 	free_page((unsigned long)old_sca);
2451 
2452 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2453 		 old_sca, kvm->arch.sca);
2454 	return 0;
2455 }
2456 
2457 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2458 {
2459 	int rc;
2460 
2461 	if (!kvm_s390_use_sca_entries()) {
2462 		if (id < KVM_MAX_VCPUS)
2463 			return true;
2464 		return false;
2465 	}
2466 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2467 		return true;
2468 	if (!sclp.has_esca || !sclp.has_64bscao)
2469 		return false;
2470 
2471 	mutex_lock(&kvm->lock);
2472 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2473 	mutex_unlock(&kvm->lock);
2474 
2475 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2476 }
2477 
2478 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2479 {
2480 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2481 	kvm_clear_async_pf_completion_queue(vcpu);
2482 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2483 				    KVM_SYNC_GPRS |
2484 				    KVM_SYNC_ACRS |
2485 				    KVM_SYNC_CRS |
2486 				    KVM_SYNC_ARCH0 |
2487 				    KVM_SYNC_PFAULT;
2488 	kvm_s390_set_prefix(vcpu, 0);
2489 	if (test_kvm_facility(vcpu->kvm, 64))
2490 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2491 	if (test_kvm_facility(vcpu->kvm, 82))
2492 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2493 	if (test_kvm_facility(vcpu->kvm, 133))
2494 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2495 	if (test_kvm_facility(vcpu->kvm, 156))
2496 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2497 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2498 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2499 	 */
2500 	if (MACHINE_HAS_VX)
2501 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2502 	else
2503 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2504 
2505 	if (kvm_is_ucontrol(vcpu->kvm))
2506 		return __kvm_ucontrol_vcpu_init(vcpu);
2507 
2508 	return 0;
2509 }
2510 
2511 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2512 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2513 {
2514 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2515 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2516 	vcpu->arch.cputm_start = get_tod_clock_fast();
2517 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2518 }
2519 
2520 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2521 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2522 {
2523 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2524 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2525 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2526 	vcpu->arch.cputm_start = 0;
2527 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2528 }
2529 
2530 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2531 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2532 {
2533 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2534 	vcpu->arch.cputm_enabled = true;
2535 	__start_cpu_timer_accounting(vcpu);
2536 }
2537 
2538 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2539 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2540 {
2541 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2542 	__stop_cpu_timer_accounting(vcpu);
2543 	vcpu->arch.cputm_enabled = false;
2544 }
2545 
2546 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2547 {
2548 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2549 	__enable_cpu_timer_accounting(vcpu);
2550 	preempt_enable();
2551 }
2552 
2553 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2554 {
2555 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2556 	__disable_cpu_timer_accounting(vcpu);
2557 	preempt_enable();
2558 }
2559 
2560 /* set the cpu timer - may only be called from the VCPU thread itself */
2561 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2562 {
2563 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2564 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2565 	if (vcpu->arch.cputm_enabled)
2566 		vcpu->arch.cputm_start = get_tod_clock_fast();
2567 	vcpu->arch.sie_block->cputm = cputm;
2568 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2569 	preempt_enable();
2570 }
2571 
2572 /* update and get the cpu timer - can also be called from other VCPU threads */
2573 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2574 {
2575 	unsigned int seq;
2576 	__u64 value;
2577 
2578 	if (unlikely(!vcpu->arch.cputm_enabled))
2579 		return vcpu->arch.sie_block->cputm;
2580 
2581 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2582 	do {
2583 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2584 		/*
2585 		 * If the writer would ever execute a read in the critical
2586 		 * section, e.g. in irq context, we have a deadlock.
2587 		 */
2588 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2589 		value = vcpu->arch.sie_block->cputm;
2590 		/* if cputm_start is 0, accounting is being started/stopped */
2591 		if (likely(vcpu->arch.cputm_start))
2592 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2593 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2594 	preempt_enable();
2595 	return value;
2596 }
2597 
2598 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2599 {
2600 
2601 	gmap_enable(vcpu->arch.enabled_gmap);
2602 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2603 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2604 		__start_cpu_timer_accounting(vcpu);
2605 	vcpu->cpu = cpu;
2606 }
2607 
2608 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2609 {
2610 	vcpu->cpu = -1;
2611 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2612 		__stop_cpu_timer_accounting(vcpu);
2613 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2614 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2615 	gmap_disable(vcpu->arch.enabled_gmap);
2616 
2617 }
2618 
2619 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2620 {
2621 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2622 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2623 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2624 	kvm_s390_set_prefix(vcpu, 0);
2625 	kvm_s390_set_cpu_timer(vcpu, 0);
2626 	vcpu->arch.sie_block->ckc       = 0UL;
2627 	vcpu->arch.sie_block->todpr     = 0;
2628 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2629 	vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2630 					CR0_INTERRUPT_KEY_SUBMASK |
2631 					CR0_MEASUREMENT_ALERT_SUBMASK;
2632 	vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2633 					CR14_UNUSED_33 |
2634 					CR14_EXTERNAL_DAMAGE_SUBMASK;
2635 	/* make sure the new fpc will be lazily loaded */
2636 	save_fpu_regs();
2637 	current->thread.fpu.fpc = 0;
2638 	vcpu->arch.sie_block->gbea = 1;
2639 	vcpu->arch.sie_block->pp = 0;
2640 	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2641 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2642 	kvm_clear_async_pf_completion_queue(vcpu);
2643 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2644 		kvm_s390_vcpu_stop(vcpu);
2645 	kvm_s390_clear_local_irqs(vcpu);
2646 }
2647 
2648 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2649 {
2650 	mutex_lock(&vcpu->kvm->lock);
2651 	preempt_disable();
2652 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2653 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2654 	preempt_enable();
2655 	mutex_unlock(&vcpu->kvm->lock);
2656 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2657 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2658 		sca_add_vcpu(vcpu);
2659 	}
2660 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2661 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2662 	/* make vcpu_load load the right gmap on the first trigger */
2663 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2664 }
2665 
2666 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2667 {
2668 	/*
2669 	 * If the AP instructions are not being interpreted and the MSAX3
2670 	 * facility is not configured for the guest, there is nothing to set up.
2671 	 */
2672 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2673 		return;
2674 
2675 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2676 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2677 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
2678 
2679 	if (vcpu->kvm->arch.crypto.apie)
2680 		vcpu->arch.sie_block->eca |= ECA_APIE;
2681 
2682 	/* Set up protected key support */
2683 	if (vcpu->kvm->arch.crypto.aes_kw)
2684 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2685 	if (vcpu->kvm->arch.crypto.dea_kw)
2686 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2687 }
2688 
2689 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2690 {
2691 	free_page(vcpu->arch.sie_block->cbrlo);
2692 	vcpu->arch.sie_block->cbrlo = 0;
2693 }
2694 
2695 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2696 {
2697 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2698 	if (!vcpu->arch.sie_block->cbrlo)
2699 		return -ENOMEM;
2700 	return 0;
2701 }
2702 
2703 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2704 {
2705 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2706 
2707 	vcpu->arch.sie_block->ibc = model->ibc;
2708 	if (test_kvm_facility(vcpu->kvm, 7))
2709 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2710 }
2711 
2712 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2713 {
2714 	int rc = 0;
2715 
2716 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2717 						    CPUSTAT_SM |
2718 						    CPUSTAT_STOPPED);
2719 
2720 	if (test_kvm_facility(vcpu->kvm, 78))
2721 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2722 	else if (test_kvm_facility(vcpu->kvm, 8))
2723 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2724 
2725 	kvm_s390_vcpu_setup_model(vcpu);
2726 
2727 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2728 	if (MACHINE_HAS_ESOP)
2729 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2730 	if (test_kvm_facility(vcpu->kvm, 9))
2731 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2732 	if (test_kvm_facility(vcpu->kvm, 73))
2733 		vcpu->arch.sie_block->ecb |= ECB_TE;
2734 
2735 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2736 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2737 	if (test_kvm_facility(vcpu->kvm, 130))
2738 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2739 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2740 	if (sclp.has_cei)
2741 		vcpu->arch.sie_block->eca |= ECA_CEI;
2742 	if (sclp.has_ib)
2743 		vcpu->arch.sie_block->eca |= ECA_IB;
2744 	if (sclp.has_siif)
2745 		vcpu->arch.sie_block->eca |= ECA_SII;
2746 	if (sclp.has_sigpif)
2747 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2748 	if (test_kvm_facility(vcpu->kvm, 129)) {
2749 		vcpu->arch.sie_block->eca |= ECA_VX;
2750 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2751 	}
2752 	if (test_kvm_facility(vcpu->kvm, 139))
2753 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2754 	if (test_kvm_facility(vcpu->kvm, 156))
2755 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2756 	if (vcpu->arch.sie_block->gd) {
2757 		vcpu->arch.sie_block->eca |= ECA_AIV;
2758 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2759 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2760 	}
2761 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2762 					| SDNXC;
2763 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2764 
2765 	if (sclp.has_kss)
2766 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2767 	else
2768 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2769 
2770 	if (vcpu->kvm->arch.use_cmma) {
2771 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2772 		if (rc)
2773 			return rc;
2774 	}
2775 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2776 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2777 
2778 	vcpu->arch.sie_block->hpid = HPID_KVM;
2779 
2780 	kvm_s390_vcpu_crypto_setup(vcpu);
2781 
2782 	return rc;
2783 }
2784 
2785 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2786 				      unsigned int id)
2787 {
2788 	struct kvm_vcpu *vcpu;
2789 	struct sie_page *sie_page;
2790 	int rc = -EINVAL;
2791 
2792 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2793 		goto out;
2794 
2795 	rc = -ENOMEM;
2796 
2797 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2798 	if (!vcpu)
2799 		goto out;
2800 
2801 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2802 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2803 	if (!sie_page)
2804 		goto out_free_cpu;
2805 
2806 	vcpu->arch.sie_block = &sie_page->sie_block;
2807 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2808 
2809 	/* the real guest size will always be smaller than msl */
2810 	vcpu->arch.sie_block->mso = 0;
2811 	vcpu->arch.sie_block->msl = sclp.hamax;
2812 
2813 	vcpu->arch.sie_block->icpua = id;
2814 	spin_lock_init(&vcpu->arch.local_int.lock);
2815 	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2816 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2817 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2818 	seqcount_init(&vcpu->arch.cputm_seqcount);
2819 
2820 	rc = kvm_vcpu_init(vcpu, kvm, id);
2821 	if (rc)
2822 		goto out_free_sie_block;
2823 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2824 		 vcpu->arch.sie_block);
2825 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2826 
2827 	return vcpu;
2828 out_free_sie_block:
2829 	free_page((unsigned long)(vcpu->arch.sie_block));
2830 out_free_cpu:
2831 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2832 out:
2833 	return ERR_PTR(rc);
2834 }
2835 
2836 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2837 {
2838 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2839 }
2840 
2841 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2842 {
2843 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2844 }
2845 
2846 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2847 {
2848 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2849 	exit_sie(vcpu);
2850 }
2851 
2852 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2853 {
2854 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2855 }
2856 
2857 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2858 {
2859 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2860 	exit_sie(vcpu);
2861 }
2862 
2863 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
2864 {
2865 	return atomic_read(&vcpu->arch.sie_block->prog20) &
2866 	       (PROG_BLOCK_SIE | PROG_REQUEST);
2867 }
2868 
2869 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2870 {
2871 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2872 }
2873 
2874 /*
2875  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
2876  * If the CPU is not running (e.g. waiting as idle) the function will
2877  * return immediately. */
2878 void exit_sie(struct kvm_vcpu *vcpu)
2879 {
2880 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2881 	kvm_s390_vsie_kick(vcpu);
2882 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2883 		cpu_relax();
2884 }
2885 
2886 /* Kick a guest cpu out of SIE to process a request synchronously */
2887 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2888 {
2889 	kvm_make_request(req, vcpu);
2890 	kvm_s390_vcpu_request(vcpu);
2891 }
2892 
2893 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2894 			      unsigned long end)
2895 {
2896 	struct kvm *kvm = gmap->private;
2897 	struct kvm_vcpu *vcpu;
2898 	unsigned long prefix;
2899 	int i;
2900 
2901 	if (gmap_is_shadow(gmap))
2902 		return;
2903 	if (start >= 1UL << 31)
2904 		/* We are only interested in prefix pages */
2905 		return;
2906 	kvm_for_each_vcpu(i, vcpu, kvm) {
2907 		/* match against both prefix pages */
2908 		prefix = kvm_s390_get_prefix(vcpu);
2909 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2910 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2911 				   start, end);
2912 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2913 		}
2914 	}
2915 }
2916 
2917 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2918 {
2919 	/* kvm common code refers to this, but never calls it */
2920 	BUG();
2921 	return 0;
2922 }
2923 
2924 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2925 					   struct kvm_one_reg *reg)
2926 {
2927 	int r = -EINVAL;
2928 
2929 	switch (reg->id) {
2930 	case KVM_REG_S390_TODPR:
2931 		r = put_user(vcpu->arch.sie_block->todpr,
2932 			     (u32 __user *)reg->addr);
2933 		break;
2934 	case KVM_REG_S390_EPOCHDIFF:
2935 		r = put_user(vcpu->arch.sie_block->epoch,
2936 			     (u64 __user *)reg->addr);
2937 		break;
2938 	case KVM_REG_S390_CPU_TIMER:
2939 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2940 			     (u64 __user *)reg->addr);
2941 		break;
2942 	case KVM_REG_S390_CLOCK_COMP:
2943 		r = put_user(vcpu->arch.sie_block->ckc,
2944 			     (u64 __user *)reg->addr);
2945 		break;
2946 	case KVM_REG_S390_PFTOKEN:
2947 		r = put_user(vcpu->arch.pfault_token,
2948 			     (u64 __user *)reg->addr);
2949 		break;
2950 	case KVM_REG_S390_PFCOMPARE:
2951 		r = put_user(vcpu->arch.pfault_compare,
2952 			     (u64 __user *)reg->addr);
2953 		break;
2954 	case KVM_REG_S390_PFSELECT:
2955 		r = put_user(vcpu->arch.pfault_select,
2956 			     (u64 __user *)reg->addr);
2957 		break;
2958 	case KVM_REG_S390_PP:
2959 		r = put_user(vcpu->arch.sie_block->pp,
2960 			     (u64 __user *)reg->addr);
2961 		break;
2962 	case KVM_REG_S390_GBEA:
2963 		r = put_user(vcpu->arch.sie_block->gbea,
2964 			     (u64 __user *)reg->addr);
2965 		break;
2966 	default:
2967 		break;
2968 	}
2969 
2970 	return r;
2971 }
2972 
2973 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2974 					   struct kvm_one_reg *reg)
2975 {
2976 	int r = -EINVAL;
2977 	__u64 val;
2978 
2979 	switch (reg->id) {
2980 	case KVM_REG_S390_TODPR:
2981 		r = get_user(vcpu->arch.sie_block->todpr,
2982 			     (u32 __user *)reg->addr);
2983 		break;
2984 	case KVM_REG_S390_EPOCHDIFF:
2985 		r = get_user(vcpu->arch.sie_block->epoch,
2986 			     (u64 __user *)reg->addr);
2987 		break;
2988 	case KVM_REG_S390_CPU_TIMER:
2989 		r = get_user(val, (u64 __user *)reg->addr);
2990 		if (!r)
2991 			kvm_s390_set_cpu_timer(vcpu, val);
2992 		break;
2993 	case KVM_REG_S390_CLOCK_COMP:
2994 		r = get_user(vcpu->arch.sie_block->ckc,
2995 			     (u64 __user *)reg->addr);
2996 		break;
2997 	case KVM_REG_S390_PFTOKEN:
2998 		r = get_user(vcpu->arch.pfault_token,
2999 			     (u64 __user *)reg->addr);
3000 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3001 			kvm_clear_async_pf_completion_queue(vcpu);
3002 		break;
3003 	case KVM_REG_S390_PFCOMPARE:
3004 		r = get_user(vcpu->arch.pfault_compare,
3005 			     (u64 __user *)reg->addr);
3006 		break;
3007 	case KVM_REG_S390_PFSELECT:
3008 		r = get_user(vcpu->arch.pfault_select,
3009 			     (u64 __user *)reg->addr);
3010 		break;
3011 	case KVM_REG_S390_PP:
3012 		r = get_user(vcpu->arch.sie_block->pp,
3013 			     (u64 __user *)reg->addr);
3014 		break;
3015 	case KVM_REG_S390_GBEA:
3016 		r = get_user(vcpu->arch.sie_block->gbea,
3017 			     (u64 __user *)reg->addr);
3018 		break;
3019 	default:
3020 		break;
3021 	}
3022 
3023 	return r;
3024 }
3025 
3026 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3027 {
3028 	kvm_s390_vcpu_initial_reset(vcpu);
3029 	return 0;
3030 }
3031 
3032 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3033 {
3034 	vcpu_load(vcpu);
3035 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3036 	vcpu_put(vcpu);
3037 	return 0;
3038 }
3039 
3040 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3041 {
3042 	vcpu_load(vcpu);
3043 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3044 	vcpu_put(vcpu);
3045 	return 0;
3046 }
3047 
3048 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3049 				  struct kvm_sregs *sregs)
3050 {
3051 	vcpu_load(vcpu);
3052 
3053 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3054 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3055 
3056 	vcpu_put(vcpu);
3057 	return 0;
3058 }
3059 
3060 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3061 				  struct kvm_sregs *sregs)
3062 {
3063 	vcpu_load(vcpu);
3064 
3065 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3066 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3067 
3068 	vcpu_put(vcpu);
3069 	return 0;
3070 }
3071 
3072 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3073 {
3074 	int ret = 0;
3075 
3076 	vcpu_load(vcpu);
3077 
3078 	if (test_fp_ctl(fpu->fpc)) {
3079 		ret = -EINVAL;
3080 		goto out;
3081 	}
3082 	vcpu->run->s.regs.fpc = fpu->fpc;
3083 	if (MACHINE_HAS_VX)
3084 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3085 				 (freg_t *) fpu->fprs);
3086 	else
3087 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3088 
3089 out:
3090 	vcpu_put(vcpu);
3091 	return ret;
3092 }
3093 
3094 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3095 {
3096 	vcpu_load(vcpu);
3097 
3098 	/* make sure we have the latest values */
3099 	save_fpu_regs();
3100 	if (MACHINE_HAS_VX)
3101 		convert_vx_to_fp((freg_t *) fpu->fprs,
3102 				 (__vector128 *) vcpu->run->s.regs.vrs);
3103 	else
3104 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3105 	fpu->fpc = vcpu->run->s.regs.fpc;
3106 
3107 	vcpu_put(vcpu);
3108 	return 0;
3109 }
3110 
3111 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3112 {
3113 	int rc = 0;
3114 
3115 	if (!is_vcpu_stopped(vcpu))
3116 		rc = -EBUSY;
3117 	else {
3118 		vcpu->run->psw_mask = psw.mask;
3119 		vcpu->run->psw_addr = psw.addr;
3120 	}
3121 	return rc;
3122 }
3123 
3124 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3125 				  struct kvm_translation *tr)
3126 {
3127 	return -EINVAL; /* not implemented yet */
3128 }
3129 
3130 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3131 			      KVM_GUESTDBG_USE_HW_BP | \
3132 			      KVM_GUESTDBG_ENABLE)
3133 
3134 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3135 					struct kvm_guest_debug *dbg)
3136 {
3137 	int rc = 0;
3138 
3139 	vcpu_load(vcpu);
3140 
3141 	vcpu->guest_debug = 0;
3142 	kvm_s390_clear_bp_data(vcpu);
3143 
3144 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3145 		rc = -EINVAL;
3146 		goto out;
3147 	}
3148 	if (!sclp.has_gpere) {
3149 		rc = -EINVAL;
3150 		goto out;
3151 	}
3152 
3153 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3154 		vcpu->guest_debug = dbg->control;
3155 		/* enforce guest PER */
3156 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3157 
3158 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3159 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3160 	} else {
3161 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3162 		vcpu->arch.guestdbg.last_bp = 0;
3163 	}
3164 
3165 	if (rc) {
3166 		vcpu->guest_debug = 0;
3167 		kvm_s390_clear_bp_data(vcpu);
3168 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3169 	}
3170 
3171 out:
3172 	vcpu_put(vcpu);
3173 	return rc;
3174 }
3175 
3176 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3177 				    struct kvm_mp_state *mp_state)
3178 {
3179 	int ret;
3180 
3181 	vcpu_load(vcpu);
3182 
3183 	/* CHECK_STOP and LOAD are not supported yet */
3184 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3185 				      KVM_MP_STATE_OPERATING;
3186 
3187 	vcpu_put(vcpu);
3188 	return ret;
3189 }
3190 
3191 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3192 				    struct kvm_mp_state *mp_state)
3193 {
3194 	int rc = 0;
3195 
3196 	vcpu_load(vcpu);
3197 
3198 	/* user space knows about this interface - let it control the state */
3199 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3200 
3201 	switch (mp_state->mp_state) {
3202 	case KVM_MP_STATE_STOPPED:
3203 		kvm_s390_vcpu_stop(vcpu);
3204 		break;
3205 	case KVM_MP_STATE_OPERATING:
3206 		kvm_s390_vcpu_start(vcpu);
3207 		break;
3208 	case KVM_MP_STATE_LOAD:
3209 	case KVM_MP_STATE_CHECK_STOP:
3210 		/* fall through - CHECK_STOP and LOAD are not supported yet */
3211 	default:
3212 		rc = -ENXIO;
3213 	}
3214 
3215 	vcpu_put(vcpu);
3216 	return rc;
3217 }
3218 
3219 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3220 {
3221 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3222 }
3223 
3224 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3225 {
3226 retry:
3227 	kvm_s390_vcpu_request_handled(vcpu);
3228 	if (!kvm_request_pending(vcpu))
3229 		return 0;
3230 	/*
3231 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3232 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3233 	 * This ensures that the ipte instruction for this request has
3234 	 * already finished. We might race against a second unmapper that
3235 	 * wants to set the blocking bit. Lets just retry the request loop.
3236 	 */
3237 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3238 		int rc;
3239 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3240 					  kvm_s390_get_prefix(vcpu),
3241 					  PAGE_SIZE * 2, PROT_WRITE);
3242 		if (rc) {
3243 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3244 			return rc;
3245 		}
3246 		goto retry;
3247 	}
3248 
3249 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3250 		vcpu->arch.sie_block->ihcpu = 0xffff;
3251 		goto retry;
3252 	}
3253 
3254 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3255 		if (!ibs_enabled(vcpu)) {
3256 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3257 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3258 		}
3259 		goto retry;
3260 	}
3261 
3262 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3263 		if (ibs_enabled(vcpu)) {
3264 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3265 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3266 		}
3267 		goto retry;
3268 	}
3269 
3270 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3271 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3272 		goto retry;
3273 	}
3274 
3275 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3276 		/*
3277 		 * Disable CMM virtualization; we will emulate the ESSA
3278 		 * instruction manually, in order to provide additional
3279 		 * functionalities needed for live migration.
3280 		 */
3281 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3282 		goto retry;
3283 	}
3284 
3285 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3286 		/*
3287 		 * Re-enable CMM virtualization if CMMA is available and
3288 		 * CMM has been used.
3289 		 */
3290 		if ((vcpu->kvm->arch.use_cmma) &&
3291 		    (vcpu->kvm->mm->context.uses_cmm))
3292 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3293 		goto retry;
3294 	}
3295 
3296 	/* nothing to do, just clear the request */
3297 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3298 	/* we left the vsie handler, nothing to do, just clear the request */
3299 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3300 
3301 	return 0;
3302 }
3303 
3304 void kvm_s390_set_tod_clock(struct kvm *kvm,
3305 			    const struct kvm_s390_vm_tod_clock *gtod)
3306 {
3307 	struct kvm_vcpu *vcpu;
3308 	struct kvm_s390_tod_clock_ext htod;
3309 	int i;
3310 
3311 	mutex_lock(&kvm->lock);
3312 	preempt_disable();
3313 
3314 	get_tod_clock_ext((char *)&htod);
3315 
3316 	kvm->arch.epoch = gtod->tod - htod.tod;
3317 	kvm->arch.epdx = 0;
3318 	if (test_kvm_facility(kvm, 139)) {
3319 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3320 		if (kvm->arch.epoch > gtod->tod)
3321 			kvm->arch.epdx -= 1;
3322 	}
3323 
3324 	kvm_s390_vcpu_block_all(kvm);
3325 	kvm_for_each_vcpu(i, vcpu, kvm) {
3326 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3327 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3328 	}
3329 
3330 	kvm_s390_vcpu_unblock_all(kvm);
3331 	preempt_enable();
3332 	mutex_unlock(&kvm->lock);
3333 }
3334 
3335 /**
3336  * kvm_arch_fault_in_page - fault-in guest page if necessary
3337  * @vcpu: The corresponding virtual cpu
3338  * @gpa: Guest physical address
3339  * @writable: Whether the page should be writable or not
3340  *
3341  * Make sure that a guest page has been faulted-in on the host.
3342  *
3343  * Return: Zero on success, negative error code otherwise.
3344  */
3345 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3346 {
3347 	return gmap_fault(vcpu->arch.gmap, gpa,
3348 			  writable ? FAULT_FLAG_WRITE : 0);
3349 }
3350 
3351 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3352 				      unsigned long token)
3353 {
3354 	struct kvm_s390_interrupt inti;
3355 	struct kvm_s390_irq irq;
3356 
3357 	if (start_token) {
3358 		irq.u.ext.ext_params2 = token;
3359 		irq.type = KVM_S390_INT_PFAULT_INIT;
3360 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3361 	} else {
3362 		inti.type = KVM_S390_INT_PFAULT_DONE;
3363 		inti.parm64 = token;
3364 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3365 	}
3366 }
3367 
3368 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3369 				     struct kvm_async_pf *work)
3370 {
3371 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3372 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3373 }
3374 
3375 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3376 				 struct kvm_async_pf *work)
3377 {
3378 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3379 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3380 }
3381 
3382 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3383 			       struct kvm_async_pf *work)
3384 {
3385 	/* s390 will always inject the page directly */
3386 }
3387 
3388 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3389 {
3390 	/*
3391 	 * s390 will always inject the page directly,
3392 	 * but we still want check_async_completion to cleanup
3393 	 */
3394 	return true;
3395 }
3396 
3397 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3398 {
3399 	hva_t hva;
3400 	struct kvm_arch_async_pf arch;
3401 	int rc;
3402 
3403 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3404 		return 0;
3405 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3406 	    vcpu->arch.pfault_compare)
3407 		return 0;
3408 	if (psw_extint_disabled(vcpu))
3409 		return 0;
3410 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3411 		return 0;
3412 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3413 		return 0;
3414 	if (!vcpu->arch.gmap->pfault_enabled)
3415 		return 0;
3416 
3417 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3418 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3419 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3420 		return 0;
3421 
3422 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3423 	return rc;
3424 }
3425 
3426 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3427 {
3428 	int rc, cpuflags;
3429 
3430 	/*
3431 	 * On s390 notifications for arriving pages will be delivered directly
3432 	 * to the guest but the house keeping for completed pfaults is
3433 	 * handled outside the worker.
3434 	 */
3435 	kvm_check_async_pf_completion(vcpu);
3436 
3437 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3438 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3439 
3440 	if (need_resched())
3441 		schedule();
3442 
3443 	if (test_cpu_flag(CIF_MCCK_PENDING))
3444 		s390_handle_mcck();
3445 
3446 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3447 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3448 		if (rc)
3449 			return rc;
3450 	}
3451 
3452 	rc = kvm_s390_handle_requests(vcpu);
3453 	if (rc)
3454 		return rc;
3455 
3456 	if (guestdbg_enabled(vcpu)) {
3457 		kvm_s390_backup_guest_per_regs(vcpu);
3458 		kvm_s390_patch_guest_per_regs(vcpu);
3459 	}
3460 
3461 	vcpu->arch.sie_block->icptcode = 0;
3462 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3463 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3464 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3465 
3466 	return 0;
3467 }
3468 
3469 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3470 {
3471 	struct kvm_s390_pgm_info pgm_info = {
3472 		.code = PGM_ADDRESSING,
3473 	};
3474 	u8 opcode, ilen;
3475 	int rc;
3476 
3477 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3478 	trace_kvm_s390_sie_fault(vcpu);
3479 
3480 	/*
3481 	 * We want to inject an addressing exception, which is defined as a
3482 	 * suppressing or terminating exception. However, since we came here
3483 	 * by a DAT access exception, the PSW still points to the faulting
3484 	 * instruction since DAT exceptions are nullifying. So we've got
3485 	 * to look up the current opcode to get the length of the instruction
3486 	 * to be able to forward the PSW.
3487 	 */
3488 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3489 	ilen = insn_length(opcode);
3490 	if (rc < 0) {
3491 		return rc;
3492 	} else if (rc) {
3493 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3494 		 * Forward by arbitrary ilc, injection will take care of
3495 		 * nullification if necessary.
3496 		 */
3497 		pgm_info = vcpu->arch.pgm;
3498 		ilen = 4;
3499 	}
3500 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3501 	kvm_s390_forward_psw(vcpu, ilen);
3502 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3503 }
3504 
3505 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3506 {
3507 	struct mcck_volatile_info *mcck_info;
3508 	struct sie_page *sie_page;
3509 
3510 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3511 		   vcpu->arch.sie_block->icptcode);
3512 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3513 
3514 	if (guestdbg_enabled(vcpu))
3515 		kvm_s390_restore_guest_per_regs(vcpu);
3516 
3517 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3518 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3519 
3520 	if (exit_reason == -EINTR) {
3521 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3522 		sie_page = container_of(vcpu->arch.sie_block,
3523 					struct sie_page, sie_block);
3524 		mcck_info = &sie_page->mcck_info;
3525 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3526 		return 0;
3527 	}
3528 
3529 	if (vcpu->arch.sie_block->icptcode > 0) {
3530 		int rc = kvm_handle_sie_intercept(vcpu);
3531 
3532 		if (rc != -EOPNOTSUPP)
3533 			return rc;
3534 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3535 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3536 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3537 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3538 		return -EREMOTE;
3539 	} else if (exit_reason != -EFAULT) {
3540 		vcpu->stat.exit_null++;
3541 		return 0;
3542 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3543 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3544 		vcpu->run->s390_ucontrol.trans_exc_code =
3545 						current->thread.gmap_addr;
3546 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3547 		return -EREMOTE;
3548 	} else if (current->thread.gmap_pfault) {
3549 		trace_kvm_s390_major_guest_pfault(vcpu);
3550 		current->thread.gmap_pfault = 0;
3551 		if (kvm_arch_setup_async_pf(vcpu))
3552 			return 0;
3553 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3554 	}
3555 	return vcpu_post_run_fault_in_sie(vcpu);
3556 }
3557 
3558 static int __vcpu_run(struct kvm_vcpu *vcpu)
3559 {
3560 	int rc, exit_reason;
3561 
3562 	/*
3563 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3564 	 * ning the guest), so that memslots (and other stuff) are protected
3565 	 */
3566 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3567 
3568 	do {
3569 		rc = vcpu_pre_run(vcpu);
3570 		if (rc)
3571 			break;
3572 
3573 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3574 		/*
3575 		 * As PF_VCPU will be used in fault handler, between
3576 		 * guest_enter and guest_exit should be no uaccess.
3577 		 */
3578 		local_irq_disable();
3579 		guest_enter_irqoff();
3580 		__disable_cpu_timer_accounting(vcpu);
3581 		local_irq_enable();
3582 		exit_reason = sie64a(vcpu->arch.sie_block,
3583 				     vcpu->run->s.regs.gprs);
3584 		local_irq_disable();
3585 		__enable_cpu_timer_accounting(vcpu);
3586 		guest_exit_irqoff();
3587 		local_irq_enable();
3588 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3589 
3590 		rc = vcpu_post_run(vcpu, exit_reason);
3591 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3592 
3593 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3594 	return rc;
3595 }
3596 
3597 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3598 {
3599 	struct runtime_instr_cb *riccb;
3600 	struct gs_cb *gscb;
3601 
3602 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3603 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3604 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3605 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3606 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3607 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3608 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3609 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3610 		/* some control register changes require a tlb flush */
3611 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3612 	}
3613 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3614 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3615 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3616 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3617 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3618 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3619 	}
3620 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3621 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3622 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3623 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3624 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3625 			kvm_clear_async_pf_completion_queue(vcpu);
3626 	}
3627 	/*
3628 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3629 	 * we should enable RI here instead of doing the lazy enablement.
3630 	 */
3631 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3632 	    test_kvm_facility(vcpu->kvm, 64) &&
3633 	    riccb->v &&
3634 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3635 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3636 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3637 	}
3638 	/*
3639 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3640 	 * we should enable GS here instead of doing the lazy enablement.
3641 	 */
3642 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3643 	    test_kvm_facility(vcpu->kvm, 133) &&
3644 	    gscb->gssm &&
3645 	    !vcpu->arch.gs_enabled) {
3646 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3647 		vcpu->arch.sie_block->ecb |= ECB_GS;
3648 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3649 		vcpu->arch.gs_enabled = 1;
3650 	}
3651 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3652 	    test_kvm_facility(vcpu->kvm, 82)) {
3653 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3654 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3655 	}
3656 	save_access_regs(vcpu->arch.host_acrs);
3657 	restore_access_regs(vcpu->run->s.regs.acrs);
3658 	/* save host (userspace) fprs/vrs */
3659 	save_fpu_regs();
3660 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3661 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3662 	if (MACHINE_HAS_VX)
3663 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3664 	else
3665 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3666 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3667 	if (test_fp_ctl(current->thread.fpu.fpc))
3668 		/* User space provided an invalid FPC, let's clear it */
3669 		current->thread.fpu.fpc = 0;
3670 	if (MACHINE_HAS_GS) {
3671 		preempt_disable();
3672 		__ctl_set_bit(2, 4);
3673 		if (current->thread.gs_cb) {
3674 			vcpu->arch.host_gscb = current->thread.gs_cb;
3675 			save_gs_cb(vcpu->arch.host_gscb);
3676 		}
3677 		if (vcpu->arch.gs_enabled) {
3678 			current->thread.gs_cb = (struct gs_cb *)
3679 						&vcpu->run->s.regs.gscb;
3680 			restore_gs_cb(current->thread.gs_cb);
3681 		}
3682 		preempt_enable();
3683 	}
3684 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
3685 
3686 	kvm_run->kvm_dirty_regs = 0;
3687 }
3688 
3689 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3690 {
3691 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3692 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3693 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3694 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3695 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3696 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3697 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3698 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3699 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3700 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3701 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3702 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3703 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3704 	save_access_regs(vcpu->run->s.regs.acrs);
3705 	restore_access_regs(vcpu->arch.host_acrs);
3706 	/* Save guest register state */
3707 	save_fpu_regs();
3708 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3709 	/* Restore will be done lazily at return */
3710 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3711 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3712 	if (MACHINE_HAS_GS) {
3713 		__ctl_set_bit(2, 4);
3714 		if (vcpu->arch.gs_enabled)
3715 			save_gs_cb(current->thread.gs_cb);
3716 		preempt_disable();
3717 		current->thread.gs_cb = vcpu->arch.host_gscb;
3718 		restore_gs_cb(vcpu->arch.host_gscb);
3719 		preempt_enable();
3720 		if (!vcpu->arch.host_gscb)
3721 			__ctl_clear_bit(2, 4);
3722 		vcpu->arch.host_gscb = NULL;
3723 	}
3724 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
3725 }
3726 
3727 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3728 {
3729 	int rc;
3730 
3731 	if (kvm_run->immediate_exit)
3732 		return -EINTR;
3733 
3734 	vcpu_load(vcpu);
3735 
3736 	if (guestdbg_exit_pending(vcpu)) {
3737 		kvm_s390_prepare_debug_exit(vcpu);
3738 		rc = 0;
3739 		goto out;
3740 	}
3741 
3742 	kvm_sigset_activate(vcpu);
3743 
3744 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3745 		kvm_s390_vcpu_start(vcpu);
3746 	} else if (is_vcpu_stopped(vcpu)) {
3747 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3748 				   vcpu->vcpu_id);
3749 		rc = -EINVAL;
3750 		goto out;
3751 	}
3752 
3753 	sync_regs(vcpu, kvm_run);
3754 	enable_cpu_timer_accounting(vcpu);
3755 
3756 	might_fault();
3757 	rc = __vcpu_run(vcpu);
3758 
3759 	if (signal_pending(current) && !rc) {
3760 		kvm_run->exit_reason = KVM_EXIT_INTR;
3761 		rc = -EINTR;
3762 	}
3763 
3764 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3765 		kvm_s390_prepare_debug_exit(vcpu);
3766 		rc = 0;
3767 	}
3768 
3769 	if (rc == -EREMOTE) {
3770 		/* userspace support is needed, kvm_run has been prepared */
3771 		rc = 0;
3772 	}
3773 
3774 	disable_cpu_timer_accounting(vcpu);
3775 	store_regs(vcpu, kvm_run);
3776 
3777 	kvm_sigset_deactivate(vcpu);
3778 
3779 	vcpu->stat.exit_userspace++;
3780 out:
3781 	vcpu_put(vcpu);
3782 	return rc;
3783 }
3784 
3785 /*
3786  * store status at address
3787  * we use have two special cases:
3788  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3789  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3790  */
3791 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3792 {
3793 	unsigned char archmode = 1;
3794 	freg_t fprs[NUM_FPRS];
3795 	unsigned int px;
3796 	u64 clkcomp, cputm;
3797 	int rc;
3798 
3799 	px = kvm_s390_get_prefix(vcpu);
3800 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3801 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3802 			return -EFAULT;
3803 		gpa = 0;
3804 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3805 		if (write_guest_real(vcpu, 163, &archmode, 1))
3806 			return -EFAULT;
3807 		gpa = px;
3808 	} else
3809 		gpa -= __LC_FPREGS_SAVE_AREA;
3810 
3811 	/* manually convert vector registers if necessary */
3812 	if (MACHINE_HAS_VX) {
3813 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3814 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3815 				     fprs, 128);
3816 	} else {
3817 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3818 				     vcpu->run->s.regs.fprs, 128);
3819 	}
3820 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3821 			      vcpu->run->s.regs.gprs, 128);
3822 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3823 			      &vcpu->arch.sie_block->gpsw, 16);
3824 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3825 			      &px, 4);
3826 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3827 			      &vcpu->run->s.regs.fpc, 4);
3828 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3829 			      &vcpu->arch.sie_block->todpr, 4);
3830 	cputm = kvm_s390_get_cpu_timer(vcpu);
3831 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3832 			      &cputm, 8);
3833 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3834 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3835 			      &clkcomp, 8);
3836 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3837 			      &vcpu->run->s.regs.acrs, 64);
3838 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3839 			      &vcpu->arch.sie_block->gcr, 128);
3840 	return rc ? -EFAULT : 0;
3841 }
3842 
3843 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3844 {
3845 	/*
3846 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3847 	 * switch in the run ioctl. Let's update our copies before we save
3848 	 * it into the save area
3849 	 */
3850 	save_fpu_regs();
3851 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3852 	save_access_regs(vcpu->run->s.regs.acrs);
3853 
3854 	return kvm_s390_store_status_unloaded(vcpu, addr);
3855 }
3856 
3857 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3858 {
3859 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3860 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3861 }
3862 
3863 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3864 {
3865 	unsigned int i;
3866 	struct kvm_vcpu *vcpu;
3867 
3868 	kvm_for_each_vcpu(i, vcpu, kvm) {
3869 		__disable_ibs_on_vcpu(vcpu);
3870 	}
3871 }
3872 
3873 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3874 {
3875 	if (!sclp.has_ibs)
3876 		return;
3877 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3878 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3879 }
3880 
3881 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3882 {
3883 	int i, online_vcpus, started_vcpus = 0;
3884 
3885 	if (!is_vcpu_stopped(vcpu))
3886 		return;
3887 
3888 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3889 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3890 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3891 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3892 
3893 	for (i = 0; i < online_vcpus; i++) {
3894 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3895 			started_vcpus++;
3896 	}
3897 
3898 	if (started_vcpus == 0) {
3899 		/* we're the only active VCPU -> speed it up */
3900 		__enable_ibs_on_vcpu(vcpu);
3901 	} else if (started_vcpus == 1) {
3902 		/*
3903 		 * As we are starting a second VCPU, we have to disable
3904 		 * the IBS facility on all VCPUs to remove potentially
3905 		 * oustanding ENABLE requests.
3906 		 */
3907 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3908 	}
3909 
3910 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3911 	/*
3912 	 * Another VCPU might have used IBS while we were offline.
3913 	 * Let's play safe and flush the VCPU at startup.
3914 	 */
3915 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3916 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3917 	return;
3918 }
3919 
3920 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3921 {
3922 	int i, online_vcpus, started_vcpus = 0;
3923 	struct kvm_vcpu *started_vcpu = NULL;
3924 
3925 	if (is_vcpu_stopped(vcpu))
3926 		return;
3927 
3928 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3929 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3930 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3931 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3932 
3933 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3934 	kvm_s390_clear_stop_irq(vcpu);
3935 
3936 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3937 	__disable_ibs_on_vcpu(vcpu);
3938 
3939 	for (i = 0; i < online_vcpus; i++) {
3940 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3941 			started_vcpus++;
3942 			started_vcpu = vcpu->kvm->vcpus[i];
3943 		}
3944 	}
3945 
3946 	if (started_vcpus == 1) {
3947 		/*
3948 		 * As we only have one VCPU left, we want to enable the
3949 		 * IBS facility for that VCPU to speed it up.
3950 		 */
3951 		__enable_ibs_on_vcpu(started_vcpu);
3952 	}
3953 
3954 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3955 	return;
3956 }
3957 
3958 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3959 				     struct kvm_enable_cap *cap)
3960 {
3961 	int r;
3962 
3963 	if (cap->flags)
3964 		return -EINVAL;
3965 
3966 	switch (cap->cap) {
3967 	case KVM_CAP_S390_CSS_SUPPORT:
3968 		if (!vcpu->kvm->arch.css_support) {
3969 			vcpu->kvm->arch.css_support = 1;
3970 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3971 			trace_kvm_s390_enable_css(vcpu->kvm);
3972 		}
3973 		r = 0;
3974 		break;
3975 	default:
3976 		r = -EINVAL;
3977 		break;
3978 	}
3979 	return r;
3980 }
3981 
3982 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3983 				  struct kvm_s390_mem_op *mop)
3984 {
3985 	void __user *uaddr = (void __user *)mop->buf;
3986 	void *tmpbuf = NULL;
3987 	int r, srcu_idx;
3988 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3989 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3990 
3991 	if (mop->flags & ~supported_flags)
3992 		return -EINVAL;
3993 
3994 	if (mop->size > MEM_OP_MAX_SIZE)
3995 		return -E2BIG;
3996 
3997 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3998 		tmpbuf = vmalloc(mop->size);
3999 		if (!tmpbuf)
4000 			return -ENOMEM;
4001 	}
4002 
4003 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4004 
4005 	switch (mop->op) {
4006 	case KVM_S390_MEMOP_LOGICAL_READ:
4007 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4008 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4009 					    mop->size, GACC_FETCH);
4010 			break;
4011 		}
4012 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4013 		if (r == 0) {
4014 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4015 				r = -EFAULT;
4016 		}
4017 		break;
4018 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4019 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4020 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4021 					    mop->size, GACC_STORE);
4022 			break;
4023 		}
4024 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4025 			r = -EFAULT;
4026 			break;
4027 		}
4028 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4029 		break;
4030 	default:
4031 		r = -EINVAL;
4032 	}
4033 
4034 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4035 
4036 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4037 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4038 
4039 	vfree(tmpbuf);
4040 	return r;
4041 }
4042 
4043 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4044 			       unsigned int ioctl, unsigned long arg)
4045 {
4046 	struct kvm_vcpu *vcpu = filp->private_data;
4047 	void __user *argp = (void __user *)arg;
4048 
4049 	switch (ioctl) {
4050 	case KVM_S390_IRQ: {
4051 		struct kvm_s390_irq s390irq;
4052 
4053 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4054 			return -EFAULT;
4055 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4056 	}
4057 	case KVM_S390_INTERRUPT: {
4058 		struct kvm_s390_interrupt s390int;
4059 		struct kvm_s390_irq s390irq;
4060 
4061 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4062 			return -EFAULT;
4063 		if (s390int_to_s390irq(&s390int, &s390irq))
4064 			return -EINVAL;
4065 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4066 	}
4067 	}
4068 	return -ENOIOCTLCMD;
4069 }
4070 
4071 long kvm_arch_vcpu_ioctl(struct file *filp,
4072 			 unsigned int ioctl, unsigned long arg)
4073 {
4074 	struct kvm_vcpu *vcpu = filp->private_data;
4075 	void __user *argp = (void __user *)arg;
4076 	int idx;
4077 	long r;
4078 
4079 	vcpu_load(vcpu);
4080 
4081 	switch (ioctl) {
4082 	case KVM_S390_STORE_STATUS:
4083 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4084 		r = kvm_s390_vcpu_store_status(vcpu, arg);
4085 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4086 		break;
4087 	case KVM_S390_SET_INITIAL_PSW: {
4088 		psw_t psw;
4089 
4090 		r = -EFAULT;
4091 		if (copy_from_user(&psw, argp, sizeof(psw)))
4092 			break;
4093 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4094 		break;
4095 	}
4096 	case KVM_S390_INITIAL_RESET:
4097 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4098 		break;
4099 	case KVM_SET_ONE_REG:
4100 	case KVM_GET_ONE_REG: {
4101 		struct kvm_one_reg reg;
4102 		r = -EFAULT;
4103 		if (copy_from_user(&reg, argp, sizeof(reg)))
4104 			break;
4105 		if (ioctl == KVM_SET_ONE_REG)
4106 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4107 		else
4108 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4109 		break;
4110 	}
4111 #ifdef CONFIG_KVM_S390_UCONTROL
4112 	case KVM_S390_UCAS_MAP: {
4113 		struct kvm_s390_ucas_mapping ucasmap;
4114 
4115 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4116 			r = -EFAULT;
4117 			break;
4118 		}
4119 
4120 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4121 			r = -EINVAL;
4122 			break;
4123 		}
4124 
4125 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4126 				     ucasmap.vcpu_addr, ucasmap.length);
4127 		break;
4128 	}
4129 	case KVM_S390_UCAS_UNMAP: {
4130 		struct kvm_s390_ucas_mapping ucasmap;
4131 
4132 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4133 			r = -EFAULT;
4134 			break;
4135 		}
4136 
4137 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4138 			r = -EINVAL;
4139 			break;
4140 		}
4141 
4142 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4143 			ucasmap.length);
4144 		break;
4145 	}
4146 #endif
4147 	case KVM_S390_VCPU_FAULT: {
4148 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4149 		break;
4150 	}
4151 	case KVM_ENABLE_CAP:
4152 	{
4153 		struct kvm_enable_cap cap;
4154 		r = -EFAULT;
4155 		if (copy_from_user(&cap, argp, sizeof(cap)))
4156 			break;
4157 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4158 		break;
4159 	}
4160 	case KVM_S390_MEM_OP: {
4161 		struct kvm_s390_mem_op mem_op;
4162 
4163 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4164 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4165 		else
4166 			r = -EFAULT;
4167 		break;
4168 	}
4169 	case KVM_S390_SET_IRQ_STATE: {
4170 		struct kvm_s390_irq_state irq_state;
4171 
4172 		r = -EFAULT;
4173 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4174 			break;
4175 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4176 		    irq_state.len == 0 ||
4177 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4178 			r = -EINVAL;
4179 			break;
4180 		}
4181 		/* do not use irq_state.flags, it will break old QEMUs */
4182 		r = kvm_s390_set_irq_state(vcpu,
4183 					   (void __user *) irq_state.buf,
4184 					   irq_state.len);
4185 		break;
4186 	}
4187 	case KVM_S390_GET_IRQ_STATE: {
4188 		struct kvm_s390_irq_state irq_state;
4189 
4190 		r = -EFAULT;
4191 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4192 			break;
4193 		if (irq_state.len == 0) {
4194 			r = -EINVAL;
4195 			break;
4196 		}
4197 		/* do not use irq_state.flags, it will break old QEMUs */
4198 		r = kvm_s390_get_irq_state(vcpu,
4199 					   (__u8 __user *)  irq_state.buf,
4200 					   irq_state.len);
4201 		break;
4202 	}
4203 	default:
4204 		r = -ENOTTY;
4205 	}
4206 
4207 	vcpu_put(vcpu);
4208 	return r;
4209 }
4210 
4211 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4212 {
4213 #ifdef CONFIG_KVM_S390_UCONTROL
4214 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4215 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4216 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4217 		get_page(vmf->page);
4218 		return 0;
4219 	}
4220 #endif
4221 	return VM_FAULT_SIGBUS;
4222 }
4223 
4224 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4225 			    unsigned long npages)
4226 {
4227 	return 0;
4228 }
4229 
4230 /* Section: memory related */
4231 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4232 				   struct kvm_memory_slot *memslot,
4233 				   const struct kvm_userspace_memory_region *mem,
4234 				   enum kvm_mr_change change)
4235 {
4236 	/* A few sanity checks. We can have memory slots which have to be
4237 	   located/ended at a segment boundary (1MB). The memory in userland is
4238 	   ok to be fragmented into various different vmas. It is okay to mmap()
4239 	   and munmap() stuff in this slot after doing this call at any time */
4240 
4241 	if (mem->userspace_addr & 0xffffful)
4242 		return -EINVAL;
4243 
4244 	if (mem->memory_size & 0xffffful)
4245 		return -EINVAL;
4246 
4247 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4248 		return -EINVAL;
4249 
4250 	return 0;
4251 }
4252 
4253 void kvm_arch_commit_memory_region(struct kvm *kvm,
4254 				const struct kvm_userspace_memory_region *mem,
4255 				const struct kvm_memory_slot *old,
4256 				const struct kvm_memory_slot *new,
4257 				enum kvm_mr_change change)
4258 {
4259 	int rc;
4260 
4261 	/* If the basics of the memslot do not change, we do not want
4262 	 * to update the gmap. Every update causes several unnecessary
4263 	 * segment translation exceptions. This is usually handled just
4264 	 * fine by the normal fault handler + gmap, but it will also
4265 	 * cause faults on the prefix page of running guest CPUs.
4266 	 */
4267 	if (old->userspace_addr == mem->userspace_addr &&
4268 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4269 	    old->npages * PAGE_SIZE == mem->memory_size)
4270 		return;
4271 
4272 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4273 		mem->guest_phys_addr, mem->memory_size);
4274 	if (rc)
4275 		pr_warn("failed to commit memory region\n");
4276 	return;
4277 }
4278 
4279 static inline unsigned long nonhyp_mask(int i)
4280 {
4281 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4282 
4283 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4284 }
4285 
4286 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4287 {
4288 	vcpu->valid_wakeup = false;
4289 }
4290 
4291 static int __init kvm_s390_init(void)
4292 {
4293 	int i;
4294 
4295 	if (!sclp.has_sief2) {
4296 		pr_info("SIE not available\n");
4297 		return -ENODEV;
4298 	}
4299 
4300 	if (nested && hpage) {
4301 		pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4302 		return -EINVAL;
4303 	}
4304 
4305 	for (i = 0; i < 16; i++)
4306 		kvm_s390_fac_base[i] |=
4307 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4308 
4309 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4310 }
4311 
4312 static void __exit kvm_s390_exit(void)
4313 {
4314 	kvm_exit();
4315 }
4316 
4317 module_init(kvm_s390_init);
4318 module_exit(kvm_s390_exit);
4319 
4320 /*
4321  * Enable autoloading of the kvm module.
4322  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4323  * since x86 takes a different approach.
4324  */
4325 #include <linux/miscdevice.h>
4326 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4327 MODULE_ALIAS("devname:kvm");
4328