xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision e3d786a3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31 
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include <asm/ap.h>
44 #include "kvm-s390.h"
45 #include "gaccess.h"
46 
47 #define KMSG_COMPONENT "kvm-s390"
48 #undef pr_fmt
49 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
50 
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54 
55 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
59 
60 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
61 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
62 
63 struct kvm_stats_debugfs_item debugfs_entries[] = {
64 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
65 	{ "exit_null", VCPU_STAT(exit_null) },
66 	{ "exit_validity", VCPU_STAT(exit_validity) },
67 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
68 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
69 	{ "exit_io_request", VCPU_STAT(exit_io_request) },
70 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
72 	{ "exit_pei", VCPU_STAT(exit_pei) },
73 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
85 	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
86 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
87 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
88 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
89 	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
90 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
91 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
92 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
93 	{ "deliver_program", VCPU_STAT(deliver_program) },
94 	{ "deliver_io", VCPU_STAT(deliver_io) },
95 	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
96 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
97 	{ "inject_ckc", VCPU_STAT(inject_ckc) },
98 	{ "inject_cputm", VCPU_STAT(inject_cputm) },
99 	{ "inject_external_call", VCPU_STAT(inject_external_call) },
100 	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
101 	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
102 	{ "inject_io", VM_STAT(inject_io) },
103 	{ "inject_mchk", VCPU_STAT(inject_mchk) },
104 	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
105 	{ "inject_program", VCPU_STAT(inject_program) },
106 	{ "inject_restart", VCPU_STAT(inject_restart) },
107 	{ "inject_service_signal", VM_STAT(inject_service_signal) },
108 	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
109 	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
110 	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
111 	{ "inject_virtio", VM_STAT(inject_virtio) },
112 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
113 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
114 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
115 	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
116 	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
117 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
118 	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
119 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
120 	{ "instruction_sck", VCPU_STAT(instruction_sck) },
121 	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
122 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
123 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
124 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
125 	{ "instruction_iske", VCPU_STAT(instruction_iske) },
126 	{ "instruction_ri", VCPU_STAT(instruction_ri) },
127 	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
128 	{ "instruction_sske", VCPU_STAT(instruction_sske) },
129 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
130 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
131 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
132 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
133 	{ "instruction_tb", VCPU_STAT(instruction_tb) },
134 	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
135 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
136 	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
137 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
138 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
139 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
140 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
141 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
142 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
143 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
144 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
145 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
146 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
147 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
148 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
149 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
150 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
151 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
152 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
153 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
154 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
155 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
156 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
157 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
158 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
159 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
160 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
161 	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
162 	{ NULL }
163 };
164 
165 struct kvm_s390_tod_clock_ext {
166 	__u8 epoch_idx;
167 	__u64 tod;
168 	__u8 reserved[7];
169 } __packed;
170 
171 /* allow nested virtualization in KVM (if enabled by user space) */
172 static int nested;
173 module_param(nested, int, S_IRUGO);
174 MODULE_PARM_DESC(nested, "Nested virtualization support");
175 
176 /* allow 1m huge page guest backing, if !nested */
177 static int hpage;
178 module_param(hpage, int, 0444);
179 MODULE_PARM_DESC(hpage, "1m huge page backing support");
180 
181 /*
182  * For now we handle at most 16 double words as this is what the s390 base
183  * kernel handles and stores in the prefix page. If we ever need to go beyond
184  * this, this requires changes to code, but the external uapi can stay.
185  */
186 #define SIZE_INTERNAL 16
187 
188 /*
189  * Base feature mask that defines default mask for facilities. Consists of the
190  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
191  */
192 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
193 /*
194  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
195  * and defines the facilities that can be enabled via a cpu model.
196  */
197 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
198 
199 static unsigned long kvm_s390_fac_size(void)
200 {
201 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
202 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
203 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
204 		sizeof(S390_lowcore.stfle_fac_list));
205 
206 	return SIZE_INTERNAL;
207 }
208 
209 /* available cpu features supported by kvm */
210 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
211 /* available subfunctions indicated via query / "test bit" */
212 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
213 
214 static struct gmap_notifier gmap_notifier;
215 static struct gmap_notifier vsie_gmap_notifier;
216 debug_info_t *kvm_s390_dbf;
217 
218 /* Section: not file related */
219 int kvm_arch_hardware_enable(void)
220 {
221 	/* every s390 is virtualization enabled ;-) */
222 	return 0;
223 }
224 
225 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
226 			      unsigned long end);
227 
228 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
229 {
230 	u8 delta_idx = 0;
231 
232 	/*
233 	 * The TOD jumps by delta, we have to compensate this by adding
234 	 * -delta to the epoch.
235 	 */
236 	delta = -delta;
237 
238 	/* sign-extension - we're adding to signed values below */
239 	if ((s64)delta < 0)
240 		delta_idx = -1;
241 
242 	scb->epoch += delta;
243 	if (scb->ecd & ECD_MEF) {
244 		scb->epdx += delta_idx;
245 		if (scb->epoch < delta)
246 			scb->epdx += 1;
247 	}
248 }
249 
250 /*
251  * This callback is executed during stop_machine(). All CPUs are therefore
252  * temporarily stopped. In order not to change guest behavior, we have to
253  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
254  * so a CPU won't be stopped while calculating with the epoch.
255  */
256 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
257 			  void *v)
258 {
259 	struct kvm *kvm;
260 	struct kvm_vcpu *vcpu;
261 	int i;
262 	unsigned long long *delta = v;
263 
264 	list_for_each_entry(kvm, &vm_list, vm_list) {
265 		kvm_for_each_vcpu(i, vcpu, kvm) {
266 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
267 			if (i == 0) {
268 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
269 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
270 			}
271 			if (vcpu->arch.cputm_enabled)
272 				vcpu->arch.cputm_start += *delta;
273 			if (vcpu->arch.vsie_block)
274 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
275 						   *delta);
276 		}
277 	}
278 	return NOTIFY_OK;
279 }
280 
281 static struct notifier_block kvm_clock_notifier = {
282 	.notifier_call = kvm_clock_sync,
283 };
284 
285 int kvm_arch_hardware_setup(void)
286 {
287 	gmap_notifier.notifier_call = kvm_gmap_notifier;
288 	gmap_register_pte_notifier(&gmap_notifier);
289 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
290 	gmap_register_pte_notifier(&vsie_gmap_notifier);
291 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
292 				       &kvm_clock_notifier);
293 	return 0;
294 }
295 
296 void kvm_arch_hardware_unsetup(void)
297 {
298 	gmap_unregister_pte_notifier(&gmap_notifier);
299 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
300 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
301 					 &kvm_clock_notifier);
302 }
303 
304 static void allow_cpu_feat(unsigned long nr)
305 {
306 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
307 }
308 
309 static inline int plo_test_bit(unsigned char nr)
310 {
311 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
312 	int cc;
313 
314 	asm volatile(
315 		/* Parameter registers are ignored for "test bit" */
316 		"	plo	0,0,0,0(0)\n"
317 		"	ipm	%0\n"
318 		"	srl	%0,28\n"
319 		: "=d" (cc)
320 		: "d" (r0)
321 		: "cc");
322 	return cc == 0;
323 }
324 
325 static void kvm_s390_cpu_feat_init(void)
326 {
327 	int i;
328 
329 	for (i = 0; i < 256; ++i) {
330 		if (plo_test_bit(i))
331 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
332 	}
333 
334 	if (test_facility(28)) /* TOD-clock steering */
335 		ptff(kvm_s390_available_subfunc.ptff,
336 		     sizeof(kvm_s390_available_subfunc.ptff),
337 		     PTFF_QAF);
338 
339 	if (test_facility(17)) { /* MSA */
340 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
341 			      kvm_s390_available_subfunc.kmac);
342 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
343 			      kvm_s390_available_subfunc.kmc);
344 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
345 			      kvm_s390_available_subfunc.km);
346 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
347 			      kvm_s390_available_subfunc.kimd);
348 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
349 			      kvm_s390_available_subfunc.klmd);
350 	}
351 	if (test_facility(76)) /* MSA3 */
352 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
353 			      kvm_s390_available_subfunc.pckmo);
354 	if (test_facility(77)) { /* MSA4 */
355 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
356 			      kvm_s390_available_subfunc.kmctr);
357 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
358 			      kvm_s390_available_subfunc.kmf);
359 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
360 			      kvm_s390_available_subfunc.kmo);
361 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
362 			      kvm_s390_available_subfunc.pcc);
363 	}
364 	if (test_facility(57)) /* MSA5 */
365 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
366 			      kvm_s390_available_subfunc.ppno);
367 
368 	if (test_facility(146)) /* MSA8 */
369 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
370 			      kvm_s390_available_subfunc.kma);
371 
372 	if (MACHINE_HAS_ESOP)
373 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
374 	/*
375 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
376 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
377 	 */
378 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
379 	    !test_facility(3) || !nested)
380 		return;
381 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
382 	if (sclp.has_64bscao)
383 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
384 	if (sclp.has_siif)
385 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
386 	if (sclp.has_gpere)
387 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
388 	if (sclp.has_gsls)
389 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
390 	if (sclp.has_ib)
391 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
392 	if (sclp.has_cei)
393 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
394 	if (sclp.has_ibs)
395 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
396 	if (sclp.has_kss)
397 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
398 	/*
399 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
400 	 * all skey handling functions read/set the skey from the PGSTE
401 	 * instead of the real storage key.
402 	 *
403 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
404 	 * pages being detected as preserved although they are resident.
405 	 *
406 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
407 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
408 	 *
409 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
410 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
411 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
412 	 *
413 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
414 	 * cannot easily shadow the SCA because of the ipte lock.
415 	 */
416 }
417 
418 int kvm_arch_init(void *opaque)
419 {
420 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
421 	if (!kvm_s390_dbf)
422 		return -ENOMEM;
423 
424 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
425 		debug_unregister(kvm_s390_dbf);
426 		return -ENOMEM;
427 	}
428 
429 	kvm_s390_cpu_feat_init();
430 
431 	/* Register floating interrupt controller interface. */
432 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
433 }
434 
435 void kvm_arch_exit(void)
436 {
437 	debug_unregister(kvm_s390_dbf);
438 }
439 
440 /* Section: device related */
441 long kvm_arch_dev_ioctl(struct file *filp,
442 			unsigned int ioctl, unsigned long arg)
443 {
444 	if (ioctl == KVM_S390_ENABLE_SIE)
445 		return s390_enable_sie();
446 	return -EINVAL;
447 }
448 
449 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
450 {
451 	int r;
452 
453 	switch (ext) {
454 	case KVM_CAP_S390_PSW:
455 	case KVM_CAP_S390_GMAP:
456 	case KVM_CAP_SYNC_MMU:
457 #ifdef CONFIG_KVM_S390_UCONTROL
458 	case KVM_CAP_S390_UCONTROL:
459 #endif
460 	case KVM_CAP_ASYNC_PF:
461 	case KVM_CAP_SYNC_REGS:
462 	case KVM_CAP_ONE_REG:
463 	case KVM_CAP_ENABLE_CAP:
464 	case KVM_CAP_S390_CSS_SUPPORT:
465 	case KVM_CAP_IOEVENTFD:
466 	case KVM_CAP_DEVICE_CTRL:
467 	case KVM_CAP_ENABLE_CAP_VM:
468 	case KVM_CAP_S390_IRQCHIP:
469 	case KVM_CAP_VM_ATTRIBUTES:
470 	case KVM_CAP_MP_STATE:
471 	case KVM_CAP_IMMEDIATE_EXIT:
472 	case KVM_CAP_S390_INJECT_IRQ:
473 	case KVM_CAP_S390_USER_SIGP:
474 	case KVM_CAP_S390_USER_STSI:
475 	case KVM_CAP_S390_SKEYS:
476 	case KVM_CAP_S390_IRQ_STATE:
477 	case KVM_CAP_S390_USER_INSTR0:
478 	case KVM_CAP_S390_CMMA_MIGRATION:
479 	case KVM_CAP_S390_AIS:
480 	case KVM_CAP_S390_AIS_MIGRATION:
481 		r = 1;
482 		break;
483 	case KVM_CAP_S390_HPAGE_1M:
484 		r = 0;
485 		if (hpage && !kvm_is_ucontrol(kvm))
486 			r = 1;
487 		break;
488 	case KVM_CAP_S390_MEM_OP:
489 		r = MEM_OP_MAX_SIZE;
490 		break;
491 	case KVM_CAP_NR_VCPUS:
492 	case KVM_CAP_MAX_VCPUS:
493 		r = KVM_S390_BSCA_CPU_SLOTS;
494 		if (!kvm_s390_use_sca_entries())
495 			r = KVM_MAX_VCPUS;
496 		else if (sclp.has_esca && sclp.has_64bscao)
497 			r = KVM_S390_ESCA_CPU_SLOTS;
498 		break;
499 	case KVM_CAP_NR_MEMSLOTS:
500 		r = KVM_USER_MEM_SLOTS;
501 		break;
502 	case KVM_CAP_S390_COW:
503 		r = MACHINE_HAS_ESOP;
504 		break;
505 	case KVM_CAP_S390_VECTOR_REGISTERS:
506 		r = MACHINE_HAS_VX;
507 		break;
508 	case KVM_CAP_S390_RI:
509 		r = test_facility(64);
510 		break;
511 	case KVM_CAP_S390_GS:
512 		r = test_facility(133);
513 		break;
514 	case KVM_CAP_S390_BPB:
515 		r = test_facility(82);
516 		break;
517 	default:
518 		r = 0;
519 	}
520 	return r;
521 }
522 
523 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
524 				    struct kvm_memory_slot *memslot)
525 {
526 	int i;
527 	gfn_t cur_gfn, last_gfn;
528 	unsigned long gaddr, vmaddr;
529 	struct gmap *gmap = kvm->arch.gmap;
530 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
531 
532 	/* Loop over all guest segments */
533 	cur_gfn = memslot->base_gfn;
534 	last_gfn = memslot->base_gfn + memslot->npages;
535 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
536 		gaddr = gfn_to_gpa(cur_gfn);
537 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
538 		if (kvm_is_error_hva(vmaddr))
539 			continue;
540 
541 		bitmap_zero(bitmap, _PAGE_ENTRIES);
542 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
543 		for (i = 0; i < _PAGE_ENTRIES; i++) {
544 			if (test_bit(i, bitmap))
545 				mark_page_dirty(kvm, cur_gfn + i);
546 		}
547 
548 		if (fatal_signal_pending(current))
549 			return;
550 		cond_resched();
551 	}
552 }
553 
554 /* Section: vm related */
555 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
556 
557 /*
558  * Get (and clear) the dirty memory log for a memory slot.
559  */
560 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
561 			       struct kvm_dirty_log *log)
562 {
563 	int r;
564 	unsigned long n;
565 	struct kvm_memslots *slots;
566 	struct kvm_memory_slot *memslot;
567 	int is_dirty = 0;
568 
569 	if (kvm_is_ucontrol(kvm))
570 		return -EINVAL;
571 
572 	mutex_lock(&kvm->slots_lock);
573 
574 	r = -EINVAL;
575 	if (log->slot >= KVM_USER_MEM_SLOTS)
576 		goto out;
577 
578 	slots = kvm_memslots(kvm);
579 	memslot = id_to_memslot(slots, log->slot);
580 	r = -ENOENT;
581 	if (!memslot->dirty_bitmap)
582 		goto out;
583 
584 	kvm_s390_sync_dirty_log(kvm, memslot);
585 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
586 	if (r)
587 		goto out;
588 
589 	/* Clear the dirty log */
590 	if (is_dirty) {
591 		n = kvm_dirty_bitmap_bytes(memslot);
592 		memset(memslot->dirty_bitmap, 0, n);
593 	}
594 	r = 0;
595 out:
596 	mutex_unlock(&kvm->slots_lock);
597 	return r;
598 }
599 
600 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
601 {
602 	unsigned int i;
603 	struct kvm_vcpu *vcpu;
604 
605 	kvm_for_each_vcpu(i, vcpu, kvm) {
606 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
607 	}
608 }
609 
610 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
611 {
612 	int r;
613 
614 	if (cap->flags)
615 		return -EINVAL;
616 
617 	switch (cap->cap) {
618 	case KVM_CAP_S390_IRQCHIP:
619 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
620 		kvm->arch.use_irqchip = 1;
621 		r = 0;
622 		break;
623 	case KVM_CAP_S390_USER_SIGP:
624 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
625 		kvm->arch.user_sigp = 1;
626 		r = 0;
627 		break;
628 	case KVM_CAP_S390_VECTOR_REGISTERS:
629 		mutex_lock(&kvm->lock);
630 		if (kvm->created_vcpus) {
631 			r = -EBUSY;
632 		} else if (MACHINE_HAS_VX) {
633 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
634 			set_kvm_facility(kvm->arch.model.fac_list, 129);
635 			if (test_facility(134)) {
636 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
637 				set_kvm_facility(kvm->arch.model.fac_list, 134);
638 			}
639 			if (test_facility(135)) {
640 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
641 				set_kvm_facility(kvm->arch.model.fac_list, 135);
642 			}
643 			r = 0;
644 		} else
645 			r = -EINVAL;
646 		mutex_unlock(&kvm->lock);
647 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
648 			 r ? "(not available)" : "(success)");
649 		break;
650 	case KVM_CAP_S390_RI:
651 		r = -EINVAL;
652 		mutex_lock(&kvm->lock);
653 		if (kvm->created_vcpus) {
654 			r = -EBUSY;
655 		} else if (test_facility(64)) {
656 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
657 			set_kvm_facility(kvm->arch.model.fac_list, 64);
658 			r = 0;
659 		}
660 		mutex_unlock(&kvm->lock);
661 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
662 			 r ? "(not available)" : "(success)");
663 		break;
664 	case KVM_CAP_S390_AIS:
665 		mutex_lock(&kvm->lock);
666 		if (kvm->created_vcpus) {
667 			r = -EBUSY;
668 		} else {
669 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
670 			set_kvm_facility(kvm->arch.model.fac_list, 72);
671 			r = 0;
672 		}
673 		mutex_unlock(&kvm->lock);
674 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
675 			 r ? "(not available)" : "(success)");
676 		break;
677 	case KVM_CAP_S390_GS:
678 		r = -EINVAL;
679 		mutex_lock(&kvm->lock);
680 		if (kvm->created_vcpus) {
681 			r = -EBUSY;
682 		} else if (test_facility(133)) {
683 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
684 			set_kvm_facility(kvm->arch.model.fac_list, 133);
685 			r = 0;
686 		}
687 		mutex_unlock(&kvm->lock);
688 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
689 			 r ? "(not available)" : "(success)");
690 		break;
691 	case KVM_CAP_S390_HPAGE_1M:
692 		mutex_lock(&kvm->lock);
693 		if (kvm->created_vcpus)
694 			r = -EBUSY;
695 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
696 			r = -EINVAL;
697 		else {
698 			r = 0;
699 			down_write(&kvm->mm->mmap_sem);
700 			kvm->mm->context.allow_gmap_hpage_1m = 1;
701 			up_write(&kvm->mm->mmap_sem);
702 			/*
703 			 * We might have to create fake 4k page
704 			 * tables. To avoid that the hardware works on
705 			 * stale PGSTEs, we emulate these instructions.
706 			 */
707 			kvm->arch.use_skf = 0;
708 			kvm->arch.use_pfmfi = 0;
709 		}
710 		mutex_unlock(&kvm->lock);
711 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
712 			 r ? "(not available)" : "(success)");
713 		break;
714 	case KVM_CAP_S390_USER_STSI:
715 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
716 		kvm->arch.user_stsi = 1;
717 		r = 0;
718 		break;
719 	case KVM_CAP_S390_USER_INSTR0:
720 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
721 		kvm->arch.user_instr0 = 1;
722 		icpt_operexc_on_all_vcpus(kvm);
723 		r = 0;
724 		break;
725 	default:
726 		r = -EINVAL;
727 		break;
728 	}
729 	return r;
730 }
731 
732 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
733 {
734 	int ret;
735 
736 	switch (attr->attr) {
737 	case KVM_S390_VM_MEM_LIMIT_SIZE:
738 		ret = 0;
739 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
740 			 kvm->arch.mem_limit);
741 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
742 			ret = -EFAULT;
743 		break;
744 	default:
745 		ret = -ENXIO;
746 		break;
747 	}
748 	return ret;
749 }
750 
751 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
752 {
753 	int ret;
754 	unsigned int idx;
755 	switch (attr->attr) {
756 	case KVM_S390_VM_MEM_ENABLE_CMMA:
757 		ret = -ENXIO;
758 		if (!sclp.has_cmma)
759 			break;
760 
761 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
762 		mutex_lock(&kvm->lock);
763 		if (kvm->created_vcpus)
764 			ret = -EBUSY;
765 		else if (kvm->mm->context.allow_gmap_hpage_1m)
766 			ret = -EINVAL;
767 		else {
768 			kvm->arch.use_cmma = 1;
769 			/* Not compatible with cmma. */
770 			kvm->arch.use_pfmfi = 0;
771 			ret = 0;
772 		}
773 		mutex_unlock(&kvm->lock);
774 		break;
775 	case KVM_S390_VM_MEM_CLR_CMMA:
776 		ret = -ENXIO;
777 		if (!sclp.has_cmma)
778 			break;
779 		ret = -EINVAL;
780 		if (!kvm->arch.use_cmma)
781 			break;
782 
783 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
784 		mutex_lock(&kvm->lock);
785 		idx = srcu_read_lock(&kvm->srcu);
786 		s390_reset_cmma(kvm->arch.gmap->mm);
787 		srcu_read_unlock(&kvm->srcu, idx);
788 		mutex_unlock(&kvm->lock);
789 		ret = 0;
790 		break;
791 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
792 		unsigned long new_limit;
793 
794 		if (kvm_is_ucontrol(kvm))
795 			return -EINVAL;
796 
797 		if (get_user(new_limit, (u64 __user *)attr->addr))
798 			return -EFAULT;
799 
800 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
801 		    new_limit > kvm->arch.mem_limit)
802 			return -E2BIG;
803 
804 		if (!new_limit)
805 			return -EINVAL;
806 
807 		/* gmap_create takes last usable address */
808 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
809 			new_limit -= 1;
810 
811 		ret = -EBUSY;
812 		mutex_lock(&kvm->lock);
813 		if (!kvm->created_vcpus) {
814 			/* gmap_create will round the limit up */
815 			struct gmap *new = gmap_create(current->mm, new_limit);
816 
817 			if (!new) {
818 				ret = -ENOMEM;
819 			} else {
820 				gmap_remove(kvm->arch.gmap);
821 				new->private = kvm;
822 				kvm->arch.gmap = new;
823 				ret = 0;
824 			}
825 		}
826 		mutex_unlock(&kvm->lock);
827 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
828 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
829 			 (void *) kvm->arch.gmap->asce);
830 		break;
831 	}
832 	default:
833 		ret = -ENXIO;
834 		break;
835 	}
836 	return ret;
837 }
838 
839 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
840 
841 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
842 {
843 	struct kvm_vcpu *vcpu;
844 	int i;
845 
846 	kvm_s390_vcpu_block_all(kvm);
847 
848 	kvm_for_each_vcpu(i, vcpu, kvm) {
849 		kvm_s390_vcpu_crypto_setup(vcpu);
850 		/* recreate the shadow crycb by leaving the VSIE handler */
851 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
852 	}
853 
854 	kvm_s390_vcpu_unblock_all(kvm);
855 }
856 
857 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
858 {
859 	mutex_lock(&kvm->lock);
860 	switch (attr->attr) {
861 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
862 		if (!test_kvm_facility(kvm, 76)) {
863 			mutex_unlock(&kvm->lock);
864 			return -EINVAL;
865 		}
866 		get_random_bytes(
867 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
868 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
869 		kvm->arch.crypto.aes_kw = 1;
870 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
871 		break;
872 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
873 		if (!test_kvm_facility(kvm, 76)) {
874 			mutex_unlock(&kvm->lock);
875 			return -EINVAL;
876 		}
877 		get_random_bytes(
878 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
879 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
880 		kvm->arch.crypto.dea_kw = 1;
881 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
882 		break;
883 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
884 		if (!test_kvm_facility(kvm, 76)) {
885 			mutex_unlock(&kvm->lock);
886 			return -EINVAL;
887 		}
888 		kvm->arch.crypto.aes_kw = 0;
889 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
890 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
891 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
892 		break;
893 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
894 		if (!test_kvm_facility(kvm, 76)) {
895 			mutex_unlock(&kvm->lock);
896 			return -EINVAL;
897 		}
898 		kvm->arch.crypto.dea_kw = 0;
899 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
900 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
901 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
902 		break;
903 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
904 		if (!ap_instructions_available()) {
905 			mutex_unlock(&kvm->lock);
906 			return -EOPNOTSUPP;
907 		}
908 		kvm->arch.crypto.apie = 1;
909 		break;
910 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
911 		if (!ap_instructions_available()) {
912 			mutex_unlock(&kvm->lock);
913 			return -EOPNOTSUPP;
914 		}
915 		kvm->arch.crypto.apie = 0;
916 		break;
917 	default:
918 		mutex_unlock(&kvm->lock);
919 		return -ENXIO;
920 	}
921 
922 	kvm_s390_vcpu_crypto_reset_all(kvm);
923 	mutex_unlock(&kvm->lock);
924 	return 0;
925 }
926 
927 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
928 {
929 	int cx;
930 	struct kvm_vcpu *vcpu;
931 
932 	kvm_for_each_vcpu(cx, vcpu, kvm)
933 		kvm_s390_sync_request(req, vcpu);
934 }
935 
936 /*
937  * Must be called with kvm->srcu held to avoid races on memslots, and with
938  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
939  */
940 static int kvm_s390_vm_start_migration(struct kvm *kvm)
941 {
942 	struct kvm_memory_slot *ms;
943 	struct kvm_memslots *slots;
944 	unsigned long ram_pages = 0;
945 	int slotnr;
946 
947 	/* migration mode already enabled */
948 	if (kvm->arch.migration_mode)
949 		return 0;
950 	slots = kvm_memslots(kvm);
951 	if (!slots || !slots->used_slots)
952 		return -EINVAL;
953 
954 	if (!kvm->arch.use_cmma) {
955 		kvm->arch.migration_mode = 1;
956 		return 0;
957 	}
958 	/* mark all the pages in active slots as dirty */
959 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
960 		ms = slots->memslots + slotnr;
961 		/*
962 		 * The second half of the bitmap is only used on x86,
963 		 * and would be wasted otherwise, so we put it to good
964 		 * use here to keep track of the state of the storage
965 		 * attributes.
966 		 */
967 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
968 		ram_pages += ms->npages;
969 	}
970 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
971 	kvm->arch.migration_mode = 1;
972 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
973 	return 0;
974 }
975 
976 /*
977  * Must be called with kvm->slots_lock to avoid races with ourselves and
978  * kvm_s390_vm_start_migration.
979  */
980 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
981 {
982 	/* migration mode already disabled */
983 	if (!kvm->arch.migration_mode)
984 		return 0;
985 	kvm->arch.migration_mode = 0;
986 	if (kvm->arch.use_cmma)
987 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
988 	return 0;
989 }
990 
991 static int kvm_s390_vm_set_migration(struct kvm *kvm,
992 				     struct kvm_device_attr *attr)
993 {
994 	int res = -ENXIO;
995 
996 	mutex_lock(&kvm->slots_lock);
997 	switch (attr->attr) {
998 	case KVM_S390_VM_MIGRATION_START:
999 		res = kvm_s390_vm_start_migration(kvm);
1000 		break;
1001 	case KVM_S390_VM_MIGRATION_STOP:
1002 		res = kvm_s390_vm_stop_migration(kvm);
1003 		break;
1004 	default:
1005 		break;
1006 	}
1007 	mutex_unlock(&kvm->slots_lock);
1008 
1009 	return res;
1010 }
1011 
1012 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1013 				     struct kvm_device_attr *attr)
1014 {
1015 	u64 mig = kvm->arch.migration_mode;
1016 
1017 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1018 		return -ENXIO;
1019 
1020 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1021 		return -EFAULT;
1022 	return 0;
1023 }
1024 
1025 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1026 {
1027 	struct kvm_s390_vm_tod_clock gtod;
1028 
1029 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1030 		return -EFAULT;
1031 
1032 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1033 		return -EINVAL;
1034 	kvm_s390_set_tod_clock(kvm, &gtod);
1035 
1036 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1037 		gtod.epoch_idx, gtod.tod);
1038 
1039 	return 0;
1040 }
1041 
1042 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1043 {
1044 	u8 gtod_high;
1045 
1046 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1047 					   sizeof(gtod_high)))
1048 		return -EFAULT;
1049 
1050 	if (gtod_high != 0)
1051 		return -EINVAL;
1052 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1053 
1054 	return 0;
1055 }
1056 
1057 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1058 {
1059 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1060 
1061 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1062 			   sizeof(gtod.tod)))
1063 		return -EFAULT;
1064 
1065 	kvm_s390_set_tod_clock(kvm, &gtod);
1066 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1067 	return 0;
1068 }
1069 
1070 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1071 {
1072 	int ret;
1073 
1074 	if (attr->flags)
1075 		return -EINVAL;
1076 
1077 	switch (attr->attr) {
1078 	case KVM_S390_VM_TOD_EXT:
1079 		ret = kvm_s390_set_tod_ext(kvm, attr);
1080 		break;
1081 	case KVM_S390_VM_TOD_HIGH:
1082 		ret = kvm_s390_set_tod_high(kvm, attr);
1083 		break;
1084 	case KVM_S390_VM_TOD_LOW:
1085 		ret = kvm_s390_set_tod_low(kvm, attr);
1086 		break;
1087 	default:
1088 		ret = -ENXIO;
1089 		break;
1090 	}
1091 	return ret;
1092 }
1093 
1094 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1095 				   struct kvm_s390_vm_tod_clock *gtod)
1096 {
1097 	struct kvm_s390_tod_clock_ext htod;
1098 
1099 	preempt_disable();
1100 
1101 	get_tod_clock_ext((char *)&htod);
1102 
1103 	gtod->tod = htod.tod + kvm->arch.epoch;
1104 	gtod->epoch_idx = 0;
1105 	if (test_kvm_facility(kvm, 139)) {
1106 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1107 		if (gtod->tod < htod.tod)
1108 			gtod->epoch_idx += 1;
1109 	}
1110 
1111 	preempt_enable();
1112 }
1113 
1114 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1115 {
1116 	struct kvm_s390_vm_tod_clock gtod;
1117 
1118 	memset(&gtod, 0, sizeof(gtod));
1119 	kvm_s390_get_tod_clock(kvm, &gtod);
1120 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1121 		return -EFAULT;
1122 
1123 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1124 		gtod.epoch_idx, gtod.tod);
1125 	return 0;
1126 }
1127 
1128 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1129 {
1130 	u8 gtod_high = 0;
1131 
1132 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1133 					 sizeof(gtod_high)))
1134 		return -EFAULT;
1135 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1136 
1137 	return 0;
1138 }
1139 
1140 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1141 {
1142 	u64 gtod;
1143 
1144 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1145 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1146 		return -EFAULT;
1147 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1148 
1149 	return 0;
1150 }
1151 
1152 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1153 {
1154 	int ret;
1155 
1156 	if (attr->flags)
1157 		return -EINVAL;
1158 
1159 	switch (attr->attr) {
1160 	case KVM_S390_VM_TOD_EXT:
1161 		ret = kvm_s390_get_tod_ext(kvm, attr);
1162 		break;
1163 	case KVM_S390_VM_TOD_HIGH:
1164 		ret = kvm_s390_get_tod_high(kvm, attr);
1165 		break;
1166 	case KVM_S390_VM_TOD_LOW:
1167 		ret = kvm_s390_get_tod_low(kvm, attr);
1168 		break;
1169 	default:
1170 		ret = -ENXIO;
1171 		break;
1172 	}
1173 	return ret;
1174 }
1175 
1176 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1177 {
1178 	struct kvm_s390_vm_cpu_processor *proc;
1179 	u16 lowest_ibc, unblocked_ibc;
1180 	int ret = 0;
1181 
1182 	mutex_lock(&kvm->lock);
1183 	if (kvm->created_vcpus) {
1184 		ret = -EBUSY;
1185 		goto out;
1186 	}
1187 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1188 	if (!proc) {
1189 		ret = -ENOMEM;
1190 		goto out;
1191 	}
1192 	if (!copy_from_user(proc, (void __user *)attr->addr,
1193 			    sizeof(*proc))) {
1194 		kvm->arch.model.cpuid = proc->cpuid;
1195 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1196 		unblocked_ibc = sclp.ibc & 0xfff;
1197 		if (lowest_ibc && proc->ibc) {
1198 			if (proc->ibc > unblocked_ibc)
1199 				kvm->arch.model.ibc = unblocked_ibc;
1200 			else if (proc->ibc < lowest_ibc)
1201 				kvm->arch.model.ibc = lowest_ibc;
1202 			else
1203 				kvm->arch.model.ibc = proc->ibc;
1204 		}
1205 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1206 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1207 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1208 			 kvm->arch.model.ibc,
1209 			 kvm->arch.model.cpuid);
1210 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1211 			 kvm->arch.model.fac_list[0],
1212 			 kvm->arch.model.fac_list[1],
1213 			 kvm->arch.model.fac_list[2]);
1214 	} else
1215 		ret = -EFAULT;
1216 	kfree(proc);
1217 out:
1218 	mutex_unlock(&kvm->lock);
1219 	return ret;
1220 }
1221 
1222 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1223 				       struct kvm_device_attr *attr)
1224 {
1225 	struct kvm_s390_vm_cpu_feat data;
1226 
1227 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1228 		return -EFAULT;
1229 	if (!bitmap_subset((unsigned long *) data.feat,
1230 			   kvm_s390_available_cpu_feat,
1231 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1232 		return -EINVAL;
1233 
1234 	mutex_lock(&kvm->lock);
1235 	if (kvm->created_vcpus) {
1236 		mutex_unlock(&kvm->lock);
1237 		return -EBUSY;
1238 	}
1239 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1240 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1241 	mutex_unlock(&kvm->lock);
1242 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1243 			 data.feat[0],
1244 			 data.feat[1],
1245 			 data.feat[2]);
1246 	return 0;
1247 }
1248 
1249 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1250 					  struct kvm_device_attr *attr)
1251 {
1252 	/*
1253 	 * Once supported by kernel + hw, we have to store the subfunctions
1254 	 * in kvm->arch and remember that user space configured them.
1255 	 */
1256 	return -ENXIO;
1257 }
1258 
1259 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1260 {
1261 	int ret = -ENXIO;
1262 
1263 	switch (attr->attr) {
1264 	case KVM_S390_VM_CPU_PROCESSOR:
1265 		ret = kvm_s390_set_processor(kvm, attr);
1266 		break;
1267 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1268 		ret = kvm_s390_set_processor_feat(kvm, attr);
1269 		break;
1270 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1271 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1272 		break;
1273 	}
1274 	return ret;
1275 }
1276 
1277 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1278 {
1279 	struct kvm_s390_vm_cpu_processor *proc;
1280 	int ret = 0;
1281 
1282 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1283 	if (!proc) {
1284 		ret = -ENOMEM;
1285 		goto out;
1286 	}
1287 	proc->cpuid = kvm->arch.model.cpuid;
1288 	proc->ibc = kvm->arch.model.ibc;
1289 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1290 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1291 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1292 		 kvm->arch.model.ibc,
1293 		 kvm->arch.model.cpuid);
1294 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1295 		 kvm->arch.model.fac_list[0],
1296 		 kvm->arch.model.fac_list[1],
1297 		 kvm->arch.model.fac_list[2]);
1298 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1299 		ret = -EFAULT;
1300 	kfree(proc);
1301 out:
1302 	return ret;
1303 }
1304 
1305 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1306 {
1307 	struct kvm_s390_vm_cpu_machine *mach;
1308 	int ret = 0;
1309 
1310 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1311 	if (!mach) {
1312 		ret = -ENOMEM;
1313 		goto out;
1314 	}
1315 	get_cpu_id((struct cpuid *) &mach->cpuid);
1316 	mach->ibc = sclp.ibc;
1317 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1318 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1319 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1320 	       sizeof(S390_lowcore.stfle_fac_list));
1321 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1322 		 kvm->arch.model.ibc,
1323 		 kvm->arch.model.cpuid);
1324 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1325 		 mach->fac_mask[0],
1326 		 mach->fac_mask[1],
1327 		 mach->fac_mask[2]);
1328 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1329 		 mach->fac_list[0],
1330 		 mach->fac_list[1],
1331 		 mach->fac_list[2]);
1332 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1333 		ret = -EFAULT;
1334 	kfree(mach);
1335 out:
1336 	return ret;
1337 }
1338 
1339 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1340 				       struct kvm_device_attr *attr)
1341 {
1342 	struct kvm_s390_vm_cpu_feat data;
1343 
1344 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1345 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1346 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1347 		return -EFAULT;
1348 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1349 			 data.feat[0],
1350 			 data.feat[1],
1351 			 data.feat[2]);
1352 	return 0;
1353 }
1354 
1355 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1356 				     struct kvm_device_attr *attr)
1357 {
1358 	struct kvm_s390_vm_cpu_feat data;
1359 
1360 	bitmap_copy((unsigned long *) data.feat,
1361 		    kvm_s390_available_cpu_feat,
1362 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1363 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1364 		return -EFAULT;
1365 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1366 			 data.feat[0],
1367 			 data.feat[1],
1368 			 data.feat[2]);
1369 	return 0;
1370 }
1371 
1372 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1373 					  struct kvm_device_attr *attr)
1374 {
1375 	/*
1376 	 * Once we can actually configure subfunctions (kernel + hw support),
1377 	 * we have to check if they were already set by user space, if so copy
1378 	 * them from kvm->arch.
1379 	 */
1380 	return -ENXIO;
1381 }
1382 
1383 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1384 					struct kvm_device_attr *attr)
1385 {
1386 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1387 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1388 		return -EFAULT;
1389 	return 0;
1390 }
1391 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1392 {
1393 	int ret = -ENXIO;
1394 
1395 	switch (attr->attr) {
1396 	case KVM_S390_VM_CPU_PROCESSOR:
1397 		ret = kvm_s390_get_processor(kvm, attr);
1398 		break;
1399 	case KVM_S390_VM_CPU_MACHINE:
1400 		ret = kvm_s390_get_machine(kvm, attr);
1401 		break;
1402 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1403 		ret = kvm_s390_get_processor_feat(kvm, attr);
1404 		break;
1405 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1406 		ret = kvm_s390_get_machine_feat(kvm, attr);
1407 		break;
1408 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1409 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1410 		break;
1411 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1412 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1413 		break;
1414 	}
1415 	return ret;
1416 }
1417 
1418 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1419 {
1420 	int ret;
1421 
1422 	switch (attr->group) {
1423 	case KVM_S390_VM_MEM_CTRL:
1424 		ret = kvm_s390_set_mem_control(kvm, attr);
1425 		break;
1426 	case KVM_S390_VM_TOD:
1427 		ret = kvm_s390_set_tod(kvm, attr);
1428 		break;
1429 	case KVM_S390_VM_CPU_MODEL:
1430 		ret = kvm_s390_set_cpu_model(kvm, attr);
1431 		break;
1432 	case KVM_S390_VM_CRYPTO:
1433 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1434 		break;
1435 	case KVM_S390_VM_MIGRATION:
1436 		ret = kvm_s390_vm_set_migration(kvm, attr);
1437 		break;
1438 	default:
1439 		ret = -ENXIO;
1440 		break;
1441 	}
1442 
1443 	return ret;
1444 }
1445 
1446 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1447 {
1448 	int ret;
1449 
1450 	switch (attr->group) {
1451 	case KVM_S390_VM_MEM_CTRL:
1452 		ret = kvm_s390_get_mem_control(kvm, attr);
1453 		break;
1454 	case KVM_S390_VM_TOD:
1455 		ret = kvm_s390_get_tod(kvm, attr);
1456 		break;
1457 	case KVM_S390_VM_CPU_MODEL:
1458 		ret = kvm_s390_get_cpu_model(kvm, attr);
1459 		break;
1460 	case KVM_S390_VM_MIGRATION:
1461 		ret = kvm_s390_vm_get_migration(kvm, attr);
1462 		break;
1463 	default:
1464 		ret = -ENXIO;
1465 		break;
1466 	}
1467 
1468 	return ret;
1469 }
1470 
1471 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1472 {
1473 	int ret;
1474 
1475 	switch (attr->group) {
1476 	case KVM_S390_VM_MEM_CTRL:
1477 		switch (attr->attr) {
1478 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1479 		case KVM_S390_VM_MEM_CLR_CMMA:
1480 			ret = sclp.has_cmma ? 0 : -ENXIO;
1481 			break;
1482 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1483 			ret = 0;
1484 			break;
1485 		default:
1486 			ret = -ENXIO;
1487 			break;
1488 		}
1489 		break;
1490 	case KVM_S390_VM_TOD:
1491 		switch (attr->attr) {
1492 		case KVM_S390_VM_TOD_LOW:
1493 		case KVM_S390_VM_TOD_HIGH:
1494 			ret = 0;
1495 			break;
1496 		default:
1497 			ret = -ENXIO;
1498 			break;
1499 		}
1500 		break;
1501 	case KVM_S390_VM_CPU_MODEL:
1502 		switch (attr->attr) {
1503 		case KVM_S390_VM_CPU_PROCESSOR:
1504 		case KVM_S390_VM_CPU_MACHINE:
1505 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1506 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1507 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1508 			ret = 0;
1509 			break;
1510 		/* configuring subfunctions is not supported yet */
1511 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1512 		default:
1513 			ret = -ENXIO;
1514 			break;
1515 		}
1516 		break;
1517 	case KVM_S390_VM_CRYPTO:
1518 		switch (attr->attr) {
1519 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1520 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1521 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1522 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1523 			ret = 0;
1524 			break;
1525 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1526 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1527 			ret = ap_instructions_available() ? 0 : -ENXIO;
1528 			break;
1529 		default:
1530 			ret = -ENXIO;
1531 			break;
1532 		}
1533 		break;
1534 	case KVM_S390_VM_MIGRATION:
1535 		ret = 0;
1536 		break;
1537 	default:
1538 		ret = -ENXIO;
1539 		break;
1540 	}
1541 
1542 	return ret;
1543 }
1544 
1545 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1546 {
1547 	uint8_t *keys;
1548 	uint64_t hva;
1549 	int srcu_idx, i, r = 0;
1550 
1551 	if (args->flags != 0)
1552 		return -EINVAL;
1553 
1554 	/* Is this guest using storage keys? */
1555 	if (!mm_uses_skeys(current->mm))
1556 		return KVM_S390_GET_SKEYS_NONE;
1557 
1558 	/* Enforce sane limit on memory allocation */
1559 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1560 		return -EINVAL;
1561 
1562 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1563 	if (!keys)
1564 		return -ENOMEM;
1565 
1566 	down_read(&current->mm->mmap_sem);
1567 	srcu_idx = srcu_read_lock(&kvm->srcu);
1568 	for (i = 0; i < args->count; i++) {
1569 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1570 		if (kvm_is_error_hva(hva)) {
1571 			r = -EFAULT;
1572 			break;
1573 		}
1574 
1575 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1576 		if (r)
1577 			break;
1578 	}
1579 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1580 	up_read(&current->mm->mmap_sem);
1581 
1582 	if (!r) {
1583 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1584 				 sizeof(uint8_t) * args->count);
1585 		if (r)
1586 			r = -EFAULT;
1587 	}
1588 
1589 	kvfree(keys);
1590 	return r;
1591 }
1592 
1593 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1594 {
1595 	uint8_t *keys;
1596 	uint64_t hva;
1597 	int srcu_idx, i, r = 0;
1598 	bool unlocked;
1599 
1600 	if (args->flags != 0)
1601 		return -EINVAL;
1602 
1603 	/* Enforce sane limit on memory allocation */
1604 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1605 		return -EINVAL;
1606 
1607 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1608 	if (!keys)
1609 		return -ENOMEM;
1610 
1611 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1612 			   sizeof(uint8_t) * args->count);
1613 	if (r) {
1614 		r = -EFAULT;
1615 		goto out;
1616 	}
1617 
1618 	/* Enable storage key handling for the guest */
1619 	r = s390_enable_skey();
1620 	if (r)
1621 		goto out;
1622 
1623 	i = 0;
1624 	down_read(&current->mm->mmap_sem);
1625 	srcu_idx = srcu_read_lock(&kvm->srcu);
1626         while (i < args->count) {
1627 		unlocked = false;
1628 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1629 		if (kvm_is_error_hva(hva)) {
1630 			r = -EFAULT;
1631 			break;
1632 		}
1633 
1634 		/* Lowest order bit is reserved */
1635 		if (keys[i] & 0x01) {
1636 			r = -EINVAL;
1637 			break;
1638 		}
1639 
1640 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1641 		if (r) {
1642 			r = fixup_user_fault(current, current->mm, hva,
1643 					     FAULT_FLAG_WRITE, &unlocked);
1644 			if (r)
1645 				break;
1646 		}
1647 		if (!r)
1648 			i++;
1649 	}
1650 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1651 	up_read(&current->mm->mmap_sem);
1652 out:
1653 	kvfree(keys);
1654 	return r;
1655 }
1656 
1657 /*
1658  * Base address and length must be sent at the start of each block, therefore
1659  * it's cheaper to send some clean data, as long as it's less than the size of
1660  * two longs.
1661  */
1662 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1663 /* for consistency */
1664 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1665 
1666 /*
1667  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1668  * address falls in a hole. In that case the index of one of the memslots
1669  * bordering the hole is returned.
1670  */
1671 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1672 {
1673 	int start = 0, end = slots->used_slots;
1674 	int slot = atomic_read(&slots->lru_slot);
1675 	struct kvm_memory_slot *memslots = slots->memslots;
1676 
1677 	if (gfn >= memslots[slot].base_gfn &&
1678 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1679 		return slot;
1680 
1681 	while (start < end) {
1682 		slot = start + (end - start) / 2;
1683 
1684 		if (gfn >= memslots[slot].base_gfn)
1685 			end = slot;
1686 		else
1687 			start = slot + 1;
1688 	}
1689 
1690 	if (gfn >= memslots[start].base_gfn &&
1691 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1692 		atomic_set(&slots->lru_slot, start);
1693 	}
1694 
1695 	return start;
1696 }
1697 
1698 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1699 			      u8 *res, unsigned long bufsize)
1700 {
1701 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1702 
1703 	args->count = 0;
1704 	while (args->count < bufsize) {
1705 		hva = gfn_to_hva(kvm, cur_gfn);
1706 		/*
1707 		 * We return an error if the first value was invalid, but we
1708 		 * return successfully if at least one value was copied.
1709 		 */
1710 		if (kvm_is_error_hva(hva))
1711 			return args->count ? 0 : -EFAULT;
1712 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1713 			pgstev = 0;
1714 		res[args->count++] = (pgstev >> 24) & 0x43;
1715 		cur_gfn++;
1716 	}
1717 
1718 	return 0;
1719 }
1720 
1721 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1722 					      unsigned long cur_gfn)
1723 {
1724 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1725 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1726 	unsigned long ofs = cur_gfn - ms->base_gfn;
1727 
1728 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1729 		slotidx--;
1730 		/* If we are above the highest slot, wrap around */
1731 		if (slotidx < 0)
1732 			slotidx = slots->used_slots - 1;
1733 
1734 		ms = slots->memslots + slotidx;
1735 		ofs = 0;
1736 	}
1737 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1738 	while ((slotidx > 0) && (ofs >= ms->npages)) {
1739 		slotidx--;
1740 		ms = slots->memslots + slotidx;
1741 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1742 	}
1743 	return ms->base_gfn + ofs;
1744 }
1745 
1746 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1747 			     u8 *res, unsigned long bufsize)
1748 {
1749 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1750 	struct kvm_memslots *slots = kvm_memslots(kvm);
1751 	struct kvm_memory_slot *ms;
1752 
1753 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1754 	ms = gfn_to_memslot(kvm, cur_gfn);
1755 	args->count = 0;
1756 	args->start_gfn = cur_gfn;
1757 	if (!ms)
1758 		return 0;
1759 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1760 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1761 
1762 	while (args->count < bufsize) {
1763 		hva = gfn_to_hva(kvm, cur_gfn);
1764 		if (kvm_is_error_hva(hva))
1765 			return 0;
1766 		/* Decrement only if we actually flipped the bit to 0 */
1767 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1768 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
1769 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1770 			pgstev = 0;
1771 		/* Save the value */
1772 		res[args->count++] = (pgstev >> 24) & 0x43;
1773 		/* If the next bit is too far away, stop. */
1774 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1775 			return 0;
1776 		/* If we reached the previous "next", find the next one */
1777 		if (cur_gfn == next_gfn)
1778 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1779 		/* Reached the end of memory or of the buffer, stop */
1780 		if ((next_gfn >= mem_end) ||
1781 		    (next_gfn - args->start_gfn >= bufsize))
1782 			return 0;
1783 		cur_gfn++;
1784 		/* Reached the end of the current memslot, take the next one. */
1785 		if (cur_gfn - ms->base_gfn >= ms->npages) {
1786 			ms = gfn_to_memslot(kvm, cur_gfn);
1787 			if (!ms)
1788 				return 0;
1789 		}
1790 	}
1791 	return 0;
1792 }
1793 
1794 /*
1795  * This function searches for the next page with dirty CMMA attributes, and
1796  * saves the attributes in the buffer up to either the end of the buffer or
1797  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1798  * no trailing clean bytes are saved.
1799  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1800  * output buffer will indicate 0 as length.
1801  */
1802 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1803 				  struct kvm_s390_cmma_log *args)
1804 {
1805 	unsigned long bufsize;
1806 	int srcu_idx, peek, ret;
1807 	u8 *values;
1808 
1809 	if (!kvm->arch.use_cmma)
1810 		return -ENXIO;
1811 	/* Invalid/unsupported flags were specified */
1812 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1813 		return -EINVAL;
1814 	/* Migration mode query, and we are not doing a migration */
1815 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1816 	if (!peek && !kvm->arch.migration_mode)
1817 		return -EINVAL;
1818 	/* CMMA is disabled or was not used, or the buffer has length zero */
1819 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1820 	if (!bufsize || !kvm->mm->context.uses_cmm) {
1821 		memset(args, 0, sizeof(*args));
1822 		return 0;
1823 	}
1824 	/* We are not peeking, and there are no dirty pages */
1825 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1826 		memset(args, 0, sizeof(*args));
1827 		return 0;
1828 	}
1829 
1830 	values = vmalloc(bufsize);
1831 	if (!values)
1832 		return -ENOMEM;
1833 
1834 	down_read(&kvm->mm->mmap_sem);
1835 	srcu_idx = srcu_read_lock(&kvm->srcu);
1836 	if (peek)
1837 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1838 	else
1839 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1840 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1841 	up_read(&kvm->mm->mmap_sem);
1842 
1843 	if (kvm->arch.migration_mode)
1844 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1845 	else
1846 		args->remaining = 0;
1847 
1848 	if (copy_to_user((void __user *)args->values, values, args->count))
1849 		ret = -EFAULT;
1850 
1851 	vfree(values);
1852 	return ret;
1853 }
1854 
1855 /*
1856  * This function sets the CMMA attributes for the given pages. If the input
1857  * buffer has zero length, no action is taken, otherwise the attributes are
1858  * set and the mm->context.uses_cmm flag is set.
1859  */
1860 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1861 				  const struct kvm_s390_cmma_log *args)
1862 {
1863 	unsigned long hva, mask, pgstev, i;
1864 	uint8_t *bits;
1865 	int srcu_idx, r = 0;
1866 
1867 	mask = args->mask;
1868 
1869 	if (!kvm->arch.use_cmma)
1870 		return -ENXIO;
1871 	/* invalid/unsupported flags */
1872 	if (args->flags != 0)
1873 		return -EINVAL;
1874 	/* Enforce sane limit on memory allocation */
1875 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1876 		return -EINVAL;
1877 	/* Nothing to do */
1878 	if (args->count == 0)
1879 		return 0;
1880 
1881 	bits = vmalloc(array_size(sizeof(*bits), args->count));
1882 	if (!bits)
1883 		return -ENOMEM;
1884 
1885 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1886 	if (r) {
1887 		r = -EFAULT;
1888 		goto out;
1889 	}
1890 
1891 	down_read(&kvm->mm->mmap_sem);
1892 	srcu_idx = srcu_read_lock(&kvm->srcu);
1893 	for (i = 0; i < args->count; i++) {
1894 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1895 		if (kvm_is_error_hva(hva)) {
1896 			r = -EFAULT;
1897 			break;
1898 		}
1899 
1900 		pgstev = bits[i];
1901 		pgstev = pgstev << 24;
1902 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1903 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1904 	}
1905 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1906 	up_read(&kvm->mm->mmap_sem);
1907 
1908 	if (!kvm->mm->context.uses_cmm) {
1909 		down_write(&kvm->mm->mmap_sem);
1910 		kvm->mm->context.uses_cmm = 1;
1911 		up_write(&kvm->mm->mmap_sem);
1912 	}
1913 out:
1914 	vfree(bits);
1915 	return r;
1916 }
1917 
1918 long kvm_arch_vm_ioctl(struct file *filp,
1919 		       unsigned int ioctl, unsigned long arg)
1920 {
1921 	struct kvm *kvm = filp->private_data;
1922 	void __user *argp = (void __user *)arg;
1923 	struct kvm_device_attr attr;
1924 	int r;
1925 
1926 	switch (ioctl) {
1927 	case KVM_S390_INTERRUPT: {
1928 		struct kvm_s390_interrupt s390int;
1929 
1930 		r = -EFAULT;
1931 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1932 			break;
1933 		r = kvm_s390_inject_vm(kvm, &s390int);
1934 		break;
1935 	}
1936 	case KVM_ENABLE_CAP: {
1937 		struct kvm_enable_cap cap;
1938 		r = -EFAULT;
1939 		if (copy_from_user(&cap, argp, sizeof(cap)))
1940 			break;
1941 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1942 		break;
1943 	}
1944 	case KVM_CREATE_IRQCHIP: {
1945 		struct kvm_irq_routing_entry routing;
1946 
1947 		r = -EINVAL;
1948 		if (kvm->arch.use_irqchip) {
1949 			/* Set up dummy routing. */
1950 			memset(&routing, 0, sizeof(routing));
1951 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1952 		}
1953 		break;
1954 	}
1955 	case KVM_SET_DEVICE_ATTR: {
1956 		r = -EFAULT;
1957 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1958 			break;
1959 		r = kvm_s390_vm_set_attr(kvm, &attr);
1960 		break;
1961 	}
1962 	case KVM_GET_DEVICE_ATTR: {
1963 		r = -EFAULT;
1964 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1965 			break;
1966 		r = kvm_s390_vm_get_attr(kvm, &attr);
1967 		break;
1968 	}
1969 	case KVM_HAS_DEVICE_ATTR: {
1970 		r = -EFAULT;
1971 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1972 			break;
1973 		r = kvm_s390_vm_has_attr(kvm, &attr);
1974 		break;
1975 	}
1976 	case KVM_S390_GET_SKEYS: {
1977 		struct kvm_s390_skeys args;
1978 
1979 		r = -EFAULT;
1980 		if (copy_from_user(&args, argp,
1981 				   sizeof(struct kvm_s390_skeys)))
1982 			break;
1983 		r = kvm_s390_get_skeys(kvm, &args);
1984 		break;
1985 	}
1986 	case KVM_S390_SET_SKEYS: {
1987 		struct kvm_s390_skeys args;
1988 
1989 		r = -EFAULT;
1990 		if (copy_from_user(&args, argp,
1991 				   sizeof(struct kvm_s390_skeys)))
1992 			break;
1993 		r = kvm_s390_set_skeys(kvm, &args);
1994 		break;
1995 	}
1996 	case KVM_S390_GET_CMMA_BITS: {
1997 		struct kvm_s390_cmma_log args;
1998 
1999 		r = -EFAULT;
2000 		if (copy_from_user(&args, argp, sizeof(args)))
2001 			break;
2002 		mutex_lock(&kvm->slots_lock);
2003 		r = kvm_s390_get_cmma_bits(kvm, &args);
2004 		mutex_unlock(&kvm->slots_lock);
2005 		if (!r) {
2006 			r = copy_to_user(argp, &args, sizeof(args));
2007 			if (r)
2008 				r = -EFAULT;
2009 		}
2010 		break;
2011 	}
2012 	case KVM_S390_SET_CMMA_BITS: {
2013 		struct kvm_s390_cmma_log args;
2014 
2015 		r = -EFAULT;
2016 		if (copy_from_user(&args, argp, sizeof(args)))
2017 			break;
2018 		mutex_lock(&kvm->slots_lock);
2019 		r = kvm_s390_set_cmma_bits(kvm, &args);
2020 		mutex_unlock(&kvm->slots_lock);
2021 		break;
2022 	}
2023 	default:
2024 		r = -ENOTTY;
2025 	}
2026 
2027 	return r;
2028 }
2029 
2030 static int kvm_s390_apxa_installed(void)
2031 {
2032 	struct ap_config_info info;
2033 
2034 	if (ap_instructions_available()) {
2035 		if (ap_qci(&info) == 0)
2036 			return info.apxa;
2037 	}
2038 
2039 	return 0;
2040 }
2041 
2042 /*
2043  * The format of the crypto control block (CRYCB) is specified in the 3 low
2044  * order bits of the CRYCB designation (CRYCBD) field as follows:
2045  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2046  *	     AP extended addressing (APXA) facility are installed.
2047  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2048  * Format 2: Both the APXA and MSAX3 facilities are installed
2049  */
2050 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2051 {
2052 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2053 
2054 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2055 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2056 
2057 	/* Check whether MSAX3 is installed */
2058 	if (!test_kvm_facility(kvm, 76))
2059 		return;
2060 
2061 	if (kvm_s390_apxa_installed())
2062 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2063 	else
2064 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2065 }
2066 
2067 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2068 			       unsigned long *aqm, unsigned long *adm)
2069 {
2070 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2071 
2072 	mutex_lock(&kvm->lock);
2073 	kvm_s390_vcpu_block_all(kvm);
2074 
2075 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2076 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2077 		memcpy(crycb->apcb1.apm, apm, 32);
2078 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2079 			 apm[0], apm[1], apm[2], apm[3]);
2080 		memcpy(crycb->apcb1.aqm, aqm, 32);
2081 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2082 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2083 		memcpy(crycb->apcb1.adm, adm, 32);
2084 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2085 			 adm[0], adm[1], adm[2], adm[3]);
2086 		break;
2087 	case CRYCB_FORMAT1:
2088 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2089 		memcpy(crycb->apcb0.apm, apm, 8);
2090 		memcpy(crycb->apcb0.aqm, aqm, 2);
2091 		memcpy(crycb->apcb0.adm, adm, 2);
2092 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2093 			 apm[0], *((unsigned short *)aqm),
2094 			 *((unsigned short *)adm));
2095 		break;
2096 	default:	/* Can not happen */
2097 		break;
2098 	}
2099 
2100 	/* recreate the shadow crycb for each vcpu */
2101 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2102 	kvm_s390_vcpu_unblock_all(kvm);
2103 	mutex_unlock(&kvm->lock);
2104 }
2105 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2106 
2107 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2108 {
2109 	mutex_lock(&kvm->lock);
2110 	kvm_s390_vcpu_block_all(kvm);
2111 
2112 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2113 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2114 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2115 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2116 
2117 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2118 	/* recreate the shadow crycb for each vcpu */
2119 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2120 	kvm_s390_vcpu_unblock_all(kvm);
2121 	mutex_unlock(&kvm->lock);
2122 }
2123 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2124 
2125 static u64 kvm_s390_get_initial_cpuid(void)
2126 {
2127 	struct cpuid cpuid;
2128 
2129 	get_cpu_id(&cpuid);
2130 	cpuid.version = 0xff;
2131 	return *((u64 *) &cpuid);
2132 }
2133 
2134 static void kvm_s390_crypto_init(struct kvm *kvm)
2135 {
2136 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2137 	kvm_s390_set_crycb_format(kvm);
2138 
2139 	if (!test_kvm_facility(kvm, 76))
2140 		return;
2141 
2142 	/* Enable AES/DEA protected key functions by default */
2143 	kvm->arch.crypto.aes_kw = 1;
2144 	kvm->arch.crypto.dea_kw = 1;
2145 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2146 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2147 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2148 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2149 }
2150 
2151 static void sca_dispose(struct kvm *kvm)
2152 {
2153 	if (kvm->arch.use_esca)
2154 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2155 	else
2156 		free_page((unsigned long)(kvm->arch.sca));
2157 	kvm->arch.sca = NULL;
2158 }
2159 
2160 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2161 {
2162 	gfp_t alloc_flags = GFP_KERNEL;
2163 	int i, rc;
2164 	char debug_name[16];
2165 	static unsigned long sca_offset;
2166 
2167 	rc = -EINVAL;
2168 #ifdef CONFIG_KVM_S390_UCONTROL
2169 	if (type & ~KVM_VM_S390_UCONTROL)
2170 		goto out_err;
2171 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2172 		goto out_err;
2173 #else
2174 	if (type)
2175 		goto out_err;
2176 #endif
2177 
2178 	rc = s390_enable_sie();
2179 	if (rc)
2180 		goto out_err;
2181 
2182 	rc = -ENOMEM;
2183 
2184 	if (!sclp.has_64bscao)
2185 		alloc_flags |= GFP_DMA;
2186 	rwlock_init(&kvm->arch.sca_lock);
2187 	/* start with basic SCA */
2188 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2189 	if (!kvm->arch.sca)
2190 		goto out_err;
2191 	spin_lock(&kvm_lock);
2192 	sca_offset += 16;
2193 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2194 		sca_offset = 0;
2195 	kvm->arch.sca = (struct bsca_block *)
2196 			((char *) kvm->arch.sca + sca_offset);
2197 	spin_unlock(&kvm_lock);
2198 
2199 	sprintf(debug_name, "kvm-%u", current->pid);
2200 
2201 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2202 	if (!kvm->arch.dbf)
2203 		goto out_err;
2204 
2205 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2206 	kvm->arch.sie_page2 =
2207 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2208 	if (!kvm->arch.sie_page2)
2209 		goto out_err;
2210 
2211 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2212 
2213 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2214 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2215 					      (kvm_s390_fac_base[i] |
2216 					       kvm_s390_fac_ext[i]);
2217 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2218 					      kvm_s390_fac_base[i];
2219 	}
2220 
2221 	/* we are always in czam mode - even on pre z14 machines */
2222 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2223 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2224 	/* we emulate STHYI in kvm */
2225 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2226 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2227 	if (MACHINE_HAS_TLB_GUEST) {
2228 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2229 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2230 	}
2231 
2232 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2233 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2234 
2235 	kvm_s390_crypto_init(kvm);
2236 
2237 	mutex_init(&kvm->arch.float_int.ais_lock);
2238 	spin_lock_init(&kvm->arch.float_int.lock);
2239 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2240 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2241 	init_waitqueue_head(&kvm->arch.ipte_wq);
2242 	mutex_init(&kvm->arch.ipte_mutex);
2243 
2244 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2245 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2246 
2247 	if (type & KVM_VM_S390_UCONTROL) {
2248 		kvm->arch.gmap = NULL;
2249 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2250 	} else {
2251 		if (sclp.hamax == U64_MAX)
2252 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2253 		else
2254 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2255 						    sclp.hamax + 1);
2256 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2257 		if (!kvm->arch.gmap)
2258 			goto out_err;
2259 		kvm->arch.gmap->private = kvm;
2260 		kvm->arch.gmap->pfault_enabled = 0;
2261 	}
2262 
2263 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2264 	kvm->arch.use_skf = sclp.has_skey;
2265 	spin_lock_init(&kvm->arch.start_stop_lock);
2266 	kvm_s390_vsie_init(kvm);
2267 	kvm_s390_gisa_init(kvm);
2268 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2269 
2270 	return 0;
2271 out_err:
2272 	free_page((unsigned long)kvm->arch.sie_page2);
2273 	debug_unregister(kvm->arch.dbf);
2274 	sca_dispose(kvm);
2275 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2276 	return rc;
2277 }
2278 
2279 bool kvm_arch_has_vcpu_debugfs(void)
2280 {
2281 	return false;
2282 }
2283 
2284 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2285 {
2286 	return 0;
2287 }
2288 
2289 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2290 {
2291 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2292 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2293 	kvm_s390_clear_local_irqs(vcpu);
2294 	kvm_clear_async_pf_completion_queue(vcpu);
2295 	if (!kvm_is_ucontrol(vcpu->kvm))
2296 		sca_del_vcpu(vcpu);
2297 
2298 	if (kvm_is_ucontrol(vcpu->kvm))
2299 		gmap_remove(vcpu->arch.gmap);
2300 
2301 	if (vcpu->kvm->arch.use_cmma)
2302 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2303 	free_page((unsigned long)(vcpu->arch.sie_block));
2304 
2305 	kvm_vcpu_uninit(vcpu);
2306 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2307 }
2308 
2309 static void kvm_free_vcpus(struct kvm *kvm)
2310 {
2311 	unsigned int i;
2312 	struct kvm_vcpu *vcpu;
2313 
2314 	kvm_for_each_vcpu(i, vcpu, kvm)
2315 		kvm_arch_vcpu_destroy(vcpu);
2316 
2317 	mutex_lock(&kvm->lock);
2318 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2319 		kvm->vcpus[i] = NULL;
2320 
2321 	atomic_set(&kvm->online_vcpus, 0);
2322 	mutex_unlock(&kvm->lock);
2323 }
2324 
2325 void kvm_arch_destroy_vm(struct kvm *kvm)
2326 {
2327 	kvm_free_vcpus(kvm);
2328 	sca_dispose(kvm);
2329 	debug_unregister(kvm->arch.dbf);
2330 	kvm_s390_gisa_destroy(kvm);
2331 	free_page((unsigned long)kvm->arch.sie_page2);
2332 	if (!kvm_is_ucontrol(kvm))
2333 		gmap_remove(kvm->arch.gmap);
2334 	kvm_s390_destroy_adapters(kvm);
2335 	kvm_s390_clear_float_irqs(kvm);
2336 	kvm_s390_vsie_destroy(kvm);
2337 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2338 }
2339 
2340 /* Section: vcpu related */
2341 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2342 {
2343 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2344 	if (!vcpu->arch.gmap)
2345 		return -ENOMEM;
2346 	vcpu->arch.gmap->private = vcpu->kvm;
2347 
2348 	return 0;
2349 }
2350 
2351 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2352 {
2353 	if (!kvm_s390_use_sca_entries())
2354 		return;
2355 	read_lock(&vcpu->kvm->arch.sca_lock);
2356 	if (vcpu->kvm->arch.use_esca) {
2357 		struct esca_block *sca = vcpu->kvm->arch.sca;
2358 
2359 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2360 		sca->cpu[vcpu->vcpu_id].sda = 0;
2361 	} else {
2362 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2363 
2364 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2365 		sca->cpu[vcpu->vcpu_id].sda = 0;
2366 	}
2367 	read_unlock(&vcpu->kvm->arch.sca_lock);
2368 }
2369 
2370 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2371 {
2372 	if (!kvm_s390_use_sca_entries()) {
2373 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2374 
2375 		/* we still need the basic sca for the ipte control */
2376 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2377 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2378 		return;
2379 	}
2380 	read_lock(&vcpu->kvm->arch.sca_lock);
2381 	if (vcpu->kvm->arch.use_esca) {
2382 		struct esca_block *sca = vcpu->kvm->arch.sca;
2383 
2384 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2385 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2386 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2387 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2388 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2389 	} else {
2390 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2391 
2392 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2393 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2394 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2395 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2396 	}
2397 	read_unlock(&vcpu->kvm->arch.sca_lock);
2398 }
2399 
2400 /* Basic SCA to Extended SCA data copy routines */
2401 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2402 {
2403 	d->sda = s->sda;
2404 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2405 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2406 }
2407 
2408 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2409 {
2410 	int i;
2411 
2412 	d->ipte_control = s->ipte_control;
2413 	d->mcn[0] = s->mcn;
2414 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2415 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2416 }
2417 
2418 static int sca_switch_to_extended(struct kvm *kvm)
2419 {
2420 	struct bsca_block *old_sca = kvm->arch.sca;
2421 	struct esca_block *new_sca;
2422 	struct kvm_vcpu *vcpu;
2423 	unsigned int vcpu_idx;
2424 	u32 scaol, scaoh;
2425 
2426 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2427 	if (!new_sca)
2428 		return -ENOMEM;
2429 
2430 	scaoh = (u32)((u64)(new_sca) >> 32);
2431 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2432 
2433 	kvm_s390_vcpu_block_all(kvm);
2434 	write_lock(&kvm->arch.sca_lock);
2435 
2436 	sca_copy_b_to_e(new_sca, old_sca);
2437 
2438 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2439 		vcpu->arch.sie_block->scaoh = scaoh;
2440 		vcpu->arch.sie_block->scaol = scaol;
2441 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2442 	}
2443 	kvm->arch.sca = new_sca;
2444 	kvm->arch.use_esca = 1;
2445 
2446 	write_unlock(&kvm->arch.sca_lock);
2447 	kvm_s390_vcpu_unblock_all(kvm);
2448 
2449 	free_page((unsigned long)old_sca);
2450 
2451 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2452 		 old_sca, kvm->arch.sca);
2453 	return 0;
2454 }
2455 
2456 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2457 {
2458 	int rc;
2459 
2460 	if (!kvm_s390_use_sca_entries()) {
2461 		if (id < KVM_MAX_VCPUS)
2462 			return true;
2463 		return false;
2464 	}
2465 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2466 		return true;
2467 	if (!sclp.has_esca || !sclp.has_64bscao)
2468 		return false;
2469 
2470 	mutex_lock(&kvm->lock);
2471 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2472 	mutex_unlock(&kvm->lock);
2473 
2474 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2475 }
2476 
2477 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2478 {
2479 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2480 	kvm_clear_async_pf_completion_queue(vcpu);
2481 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2482 				    KVM_SYNC_GPRS |
2483 				    KVM_SYNC_ACRS |
2484 				    KVM_SYNC_CRS |
2485 				    KVM_SYNC_ARCH0 |
2486 				    KVM_SYNC_PFAULT;
2487 	kvm_s390_set_prefix(vcpu, 0);
2488 	if (test_kvm_facility(vcpu->kvm, 64))
2489 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2490 	if (test_kvm_facility(vcpu->kvm, 82))
2491 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2492 	if (test_kvm_facility(vcpu->kvm, 133))
2493 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2494 	if (test_kvm_facility(vcpu->kvm, 156))
2495 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2496 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2497 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2498 	 */
2499 	if (MACHINE_HAS_VX)
2500 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2501 	else
2502 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2503 
2504 	if (kvm_is_ucontrol(vcpu->kvm))
2505 		return __kvm_ucontrol_vcpu_init(vcpu);
2506 
2507 	return 0;
2508 }
2509 
2510 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2511 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2512 {
2513 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2514 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2515 	vcpu->arch.cputm_start = get_tod_clock_fast();
2516 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2517 }
2518 
2519 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2520 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2521 {
2522 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2523 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2524 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2525 	vcpu->arch.cputm_start = 0;
2526 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2527 }
2528 
2529 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2530 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2531 {
2532 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2533 	vcpu->arch.cputm_enabled = true;
2534 	__start_cpu_timer_accounting(vcpu);
2535 }
2536 
2537 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2538 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2539 {
2540 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2541 	__stop_cpu_timer_accounting(vcpu);
2542 	vcpu->arch.cputm_enabled = false;
2543 }
2544 
2545 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2546 {
2547 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2548 	__enable_cpu_timer_accounting(vcpu);
2549 	preempt_enable();
2550 }
2551 
2552 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2553 {
2554 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2555 	__disable_cpu_timer_accounting(vcpu);
2556 	preempt_enable();
2557 }
2558 
2559 /* set the cpu timer - may only be called from the VCPU thread itself */
2560 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2561 {
2562 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2563 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2564 	if (vcpu->arch.cputm_enabled)
2565 		vcpu->arch.cputm_start = get_tod_clock_fast();
2566 	vcpu->arch.sie_block->cputm = cputm;
2567 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2568 	preempt_enable();
2569 }
2570 
2571 /* update and get the cpu timer - can also be called from other VCPU threads */
2572 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2573 {
2574 	unsigned int seq;
2575 	__u64 value;
2576 
2577 	if (unlikely(!vcpu->arch.cputm_enabled))
2578 		return vcpu->arch.sie_block->cputm;
2579 
2580 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2581 	do {
2582 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2583 		/*
2584 		 * If the writer would ever execute a read in the critical
2585 		 * section, e.g. in irq context, we have a deadlock.
2586 		 */
2587 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2588 		value = vcpu->arch.sie_block->cputm;
2589 		/* if cputm_start is 0, accounting is being started/stopped */
2590 		if (likely(vcpu->arch.cputm_start))
2591 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2592 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2593 	preempt_enable();
2594 	return value;
2595 }
2596 
2597 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2598 {
2599 
2600 	gmap_enable(vcpu->arch.enabled_gmap);
2601 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2602 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2603 		__start_cpu_timer_accounting(vcpu);
2604 	vcpu->cpu = cpu;
2605 }
2606 
2607 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2608 {
2609 	vcpu->cpu = -1;
2610 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2611 		__stop_cpu_timer_accounting(vcpu);
2612 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2613 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2614 	gmap_disable(vcpu->arch.enabled_gmap);
2615 
2616 }
2617 
2618 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2619 {
2620 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2621 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2622 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2623 	kvm_s390_set_prefix(vcpu, 0);
2624 	kvm_s390_set_cpu_timer(vcpu, 0);
2625 	vcpu->arch.sie_block->ckc       = 0UL;
2626 	vcpu->arch.sie_block->todpr     = 0;
2627 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2628 	vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2629 					CR0_INTERRUPT_KEY_SUBMASK |
2630 					CR0_MEASUREMENT_ALERT_SUBMASK;
2631 	vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2632 					CR14_UNUSED_33 |
2633 					CR14_EXTERNAL_DAMAGE_SUBMASK;
2634 	/* make sure the new fpc will be lazily loaded */
2635 	save_fpu_regs();
2636 	current->thread.fpu.fpc = 0;
2637 	vcpu->arch.sie_block->gbea = 1;
2638 	vcpu->arch.sie_block->pp = 0;
2639 	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2640 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2641 	kvm_clear_async_pf_completion_queue(vcpu);
2642 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2643 		kvm_s390_vcpu_stop(vcpu);
2644 	kvm_s390_clear_local_irqs(vcpu);
2645 }
2646 
2647 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2648 {
2649 	mutex_lock(&vcpu->kvm->lock);
2650 	preempt_disable();
2651 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2652 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2653 	preempt_enable();
2654 	mutex_unlock(&vcpu->kvm->lock);
2655 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2656 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2657 		sca_add_vcpu(vcpu);
2658 	}
2659 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2660 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2661 	/* make vcpu_load load the right gmap on the first trigger */
2662 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2663 }
2664 
2665 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2666 {
2667 	/*
2668 	 * If the AP instructions are not being interpreted and the MSAX3
2669 	 * facility is not configured for the guest, there is nothing to set up.
2670 	 */
2671 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2672 		return;
2673 
2674 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2675 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2676 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
2677 
2678 	if (vcpu->kvm->arch.crypto.apie)
2679 		vcpu->arch.sie_block->eca |= ECA_APIE;
2680 
2681 	/* Set up protected key support */
2682 	if (vcpu->kvm->arch.crypto.aes_kw)
2683 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2684 	if (vcpu->kvm->arch.crypto.dea_kw)
2685 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2686 }
2687 
2688 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2689 {
2690 	free_page(vcpu->arch.sie_block->cbrlo);
2691 	vcpu->arch.sie_block->cbrlo = 0;
2692 }
2693 
2694 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2695 {
2696 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2697 	if (!vcpu->arch.sie_block->cbrlo)
2698 		return -ENOMEM;
2699 	return 0;
2700 }
2701 
2702 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2703 {
2704 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2705 
2706 	vcpu->arch.sie_block->ibc = model->ibc;
2707 	if (test_kvm_facility(vcpu->kvm, 7))
2708 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2709 }
2710 
2711 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2712 {
2713 	int rc = 0;
2714 
2715 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2716 						    CPUSTAT_SM |
2717 						    CPUSTAT_STOPPED);
2718 
2719 	if (test_kvm_facility(vcpu->kvm, 78))
2720 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2721 	else if (test_kvm_facility(vcpu->kvm, 8))
2722 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2723 
2724 	kvm_s390_vcpu_setup_model(vcpu);
2725 
2726 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2727 	if (MACHINE_HAS_ESOP)
2728 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2729 	if (test_kvm_facility(vcpu->kvm, 9))
2730 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2731 	if (test_kvm_facility(vcpu->kvm, 73))
2732 		vcpu->arch.sie_block->ecb |= ECB_TE;
2733 
2734 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2735 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2736 	if (test_kvm_facility(vcpu->kvm, 130))
2737 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2738 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2739 	if (sclp.has_cei)
2740 		vcpu->arch.sie_block->eca |= ECA_CEI;
2741 	if (sclp.has_ib)
2742 		vcpu->arch.sie_block->eca |= ECA_IB;
2743 	if (sclp.has_siif)
2744 		vcpu->arch.sie_block->eca |= ECA_SII;
2745 	if (sclp.has_sigpif)
2746 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2747 	if (test_kvm_facility(vcpu->kvm, 129)) {
2748 		vcpu->arch.sie_block->eca |= ECA_VX;
2749 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2750 	}
2751 	if (test_kvm_facility(vcpu->kvm, 139))
2752 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2753 	if (test_kvm_facility(vcpu->kvm, 156))
2754 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2755 	if (vcpu->arch.sie_block->gd) {
2756 		vcpu->arch.sie_block->eca |= ECA_AIV;
2757 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2758 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2759 	}
2760 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2761 					| SDNXC;
2762 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2763 
2764 	if (sclp.has_kss)
2765 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2766 	else
2767 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2768 
2769 	if (vcpu->kvm->arch.use_cmma) {
2770 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2771 		if (rc)
2772 			return rc;
2773 	}
2774 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2775 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2776 
2777 	vcpu->arch.sie_block->hpid = HPID_KVM;
2778 
2779 	kvm_s390_vcpu_crypto_setup(vcpu);
2780 
2781 	return rc;
2782 }
2783 
2784 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2785 				      unsigned int id)
2786 {
2787 	struct kvm_vcpu *vcpu;
2788 	struct sie_page *sie_page;
2789 	int rc = -EINVAL;
2790 
2791 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2792 		goto out;
2793 
2794 	rc = -ENOMEM;
2795 
2796 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2797 	if (!vcpu)
2798 		goto out;
2799 
2800 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2801 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2802 	if (!sie_page)
2803 		goto out_free_cpu;
2804 
2805 	vcpu->arch.sie_block = &sie_page->sie_block;
2806 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2807 
2808 	/* the real guest size will always be smaller than msl */
2809 	vcpu->arch.sie_block->mso = 0;
2810 	vcpu->arch.sie_block->msl = sclp.hamax;
2811 
2812 	vcpu->arch.sie_block->icpua = id;
2813 	spin_lock_init(&vcpu->arch.local_int.lock);
2814 	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2815 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2816 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2817 	seqcount_init(&vcpu->arch.cputm_seqcount);
2818 
2819 	rc = kvm_vcpu_init(vcpu, kvm, id);
2820 	if (rc)
2821 		goto out_free_sie_block;
2822 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2823 		 vcpu->arch.sie_block);
2824 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2825 
2826 	return vcpu;
2827 out_free_sie_block:
2828 	free_page((unsigned long)(vcpu->arch.sie_block));
2829 out_free_cpu:
2830 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2831 out:
2832 	return ERR_PTR(rc);
2833 }
2834 
2835 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2836 {
2837 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2838 }
2839 
2840 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2841 {
2842 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2843 }
2844 
2845 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2846 {
2847 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2848 	exit_sie(vcpu);
2849 }
2850 
2851 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2852 {
2853 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2854 }
2855 
2856 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2857 {
2858 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2859 	exit_sie(vcpu);
2860 }
2861 
2862 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
2863 {
2864 	return atomic_read(&vcpu->arch.sie_block->prog20) &
2865 	       (PROG_BLOCK_SIE | PROG_REQUEST);
2866 }
2867 
2868 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2869 {
2870 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2871 }
2872 
2873 /*
2874  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
2875  * If the CPU is not running (e.g. waiting as idle) the function will
2876  * return immediately. */
2877 void exit_sie(struct kvm_vcpu *vcpu)
2878 {
2879 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2880 	kvm_s390_vsie_kick(vcpu);
2881 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2882 		cpu_relax();
2883 }
2884 
2885 /* Kick a guest cpu out of SIE to process a request synchronously */
2886 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2887 {
2888 	kvm_make_request(req, vcpu);
2889 	kvm_s390_vcpu_request(vcpu);
2890 }
2891 
2892 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2893 			      unsigned long end)
2894 {
2895 	struct kvm *kvm = gmap->private;
2896 	struct kvm_vcpu *vcpu;
2897 	unsigned long prefix;
2898 	int i;
2899 
2900 	if (gmap_is_shadow(gmap))
2901 		return;
2902 	if (start >= 1UL << 31)
2903 		/* We are only interested in prefix pages */
2904 		return;
2905 	kvm_for_each_vcpu(i, vcpu, kvm) {
2906 		/* match against both prefix pages */
2907 		prefix = kvm_s390_get_prefix(vcpu);
2908 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2909 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2910 				   start, end);
2911 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2912 		}
2913 	}
2914 }
2915 
2916 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2917 {
2918 	/* kvm common code refers to this, but never calls it */
2919 	BUG();
2920 	return 0;
2921 }
2922 
2923 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2924 					   struct kvm_one_reg *reg)
2925 {
2926 	int r = -EINVAL;
2927 
2928 	switch (reg->id) {
2929 	case KVM_REG_S390_TODPR:
2930 		r = put_user(vcpu->arch.sie_block->todpr,
2931 			     (u32 __user *)reg->addr);
2932 		break;
2933 	case KVM_REG_S390_EPOCHDIFF:
2934 		r = put_user(vcpu->arch.sie_block->epoch,
2935 			     (u64 __user *)reg->addr);
2936 		break;
2937 	case KVM_REG_S390_CPU_TIMER:
2938 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2939 			     (u64 __user *)reg->addr);
2940 		break;
2941 	case KVM_REG_S390_CLOCK_COMP:
2942 		r = put_user(vcpu->arch.sie_block->ckc,
2943 			     (u64 __user *)reg->addr);
2944 		break;
2945 	case KVM_REG_S390_PFTOKEN:
2946 		r = put_user(vcpu->arch.pfault_token,
2947 			     (u64 __user *)reg->addr);
2948 		break;
2949 	case KVM_REG_S390_PFCOMPARE:
2950 		r = put_user(vcpu->arch.pfault_compare,
2951 			     (u64 __user *)reg->addr);
2952 		break;
2953 	case KVM_REG_S390_PFSELECT:
2954 		r = put_user(vcpu->arch.pfault_select,
2955 			     (u64 __user *)reg->addr);
2956 		break;
2957 	case KVM_REG_S390_PP:
2958 		r = put_user(vcpu->arch.sie_block->pp,
2959 			     (u64 __user *)reg->addr);
2960 		break;
2961 	case KVM_REG_S390_GBEA:
2962 		r = put_user(vcpu->arch.sie_block->gbea,
2963 			     (u64 __user *)reg->addr);
2964 		break;
2965 	default:
2966 		break;
2967 	}
2968 
2969 	return r;
2970 }
2971 
2972 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2973 					   struct kvm_one_reg *reg)
2974 {
2975 	int r = -EINVAL;
2976 	__u64 val;
2977 
2978 	switch (reg->id) {
2979 	case KVM_REG_S390_TODPR:
2980 		r = get_user(vcpu->arch.sie_block->todpr,
2981 			     (u32 __user *)reg->addr);
2982 		break;
2983 	case KVM_REG_S390_EPOCHDIFF:
2984 		r = get_user(vcpu->arch.sie_block->epoch,
2985 			     (u64 __user *)reg->addr);
2986 		break;
2987 	case KVM_REG_S390_CPU_TIMER:
2988 		r = get_user(val, (u64 __user *)reg->addr);
2989 		if (!r)
2990 			kvm_s390_set_cpu_timer(vcpu, val);
2991 		break;
2992 	case KVM_REG_S390_CLOCK_COMP:
2993 		r = get_user(vcpu->arch.sie_block->ckc,
2994 			     (u64 __user *)reg->addr);
2995 		break;
2996 	case KVM_REG_S390_PFTOKEN:
2997 		r = get_user(vcpu->arch.pfault_token,
2998 			     (u64 __user *)reg->addr);
2999 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3000 			kvm_clear_async_pf_completion_queue(vcpu);
3001 		break;
3002 	case KVM_REG_S390_PFCOMPARE:
3003 		r = get_user(vcpu->arch.pfault_compare,
3004 			     (u64 __user *)reg->addr);
3005 		break;
3006 	case KVM_REG_S390_PFSELECT:
3007 		r = get_user(vcpu->arch.pfault_select,
3008 			     (u64 __user *)reg->addr);
3009 		break;
3010 	case KVM_REG_S390_PP:
3011 		r = get_user(vcpu->arch.sie_block->pp,
3012 			     (u64 __user *)reg->addr);
3013 		break;
3014 	case KVM_REG_S390_GBEA:
3015 		r = get_user(vcpu->arch.sie_block->gbea,
3016 			     (u64 __user *)reg->addr);
3017 		break;
3018 	default:
3019 		break;
3020 	}
3021 
3022 	return r;
3023 }
3024 
3025 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3026 {
3027 	kvm_s390_vcpu_initial_reset(vcpu);
3028 	return 0;
3029 }
3030 
3031 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3032 {
3033 	vcpu_load(vcpu);
3034 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3035 	vcpu_put(vcpu);
3036 	return 0;
3037 }
3038 
3039 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3040 {
3041 	vcpu_load(vcpu);
3042 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3043 	vcpu_put(vcpu);
3044 	return 0;
3045 }
3046 
3047 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3048 				  struct kvm_sregs *sregs)
3049 {
3050 	vcpu_load(vcpu);
3051 
3052 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3053 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3054 
3055 	vcpu_put(vcpu);
3056 	return 0;
3057 }
3058 
3059 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3060 				  struct kvm_sregs *sregs)
3061 {
3062 	vcpu_load(vcpu);
3063 
3064 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3065 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3066 
3067 	vcpu_put(vcpu);
3068 	return 0;
3069 }
3070 
3071 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3072 {
3073 	int ret = 0;
3074 
3075 	vcpu_load(vcpu);
3076 
3077 	if (test_fp_ctl(fpu->fpc)) {
3078 		ret = -EINVAL;
3079 		goto out;
3080 	}
3081 	vcpu->run->s.regs.fpc = fpu->fpc;
3082 	if (MACHINE_HAS_VX)
3083 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3084 				 (freg_t *) fpu->fprs);
3085 	else
3086 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3087 
3088 out:
3089 	vcpu_put(vcpu);
3090 	return ret;
3091 }
3092 
3093 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3094 {
3095 	vcpu_load(vcpu);
3096 
3097 	/* make sure we have the latest values */
3098 	save_fpu_regs();
3099 	if (MACHINE_HAS_VX)
3100 		convert_vx_to_fp((freg_t *) fpu->fprs,
3101 				 (__vector128 *) vcpu->run->s.regs.vrs);
3102 	else
3103 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3104 	fpu->fpc = vcpu->run->s.regs.fpc;
3105 
3106 	vcpu_put(vcpu);
3107 	return 0;
3108 }
3109 
3110 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3111 {
3112 	int rc = 0;
3113 
3114 	if (!is_vcpu_stopped(vcpu))
3115 		rc = -EBUSY;
3116 	else {
3117 		vcpu->run->psw_mask = psw.mask;
3118 		vcpu->run->psw_addr = psw.addr;
3119 	}
3120 	return rc;
3121 }
3122 
3123 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3124 				  struct kvm_translation *tr)
3125 {
3126 	return -EINVAL; /* not implemented yet */
3127 }
3128 
3129 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3130 			      KVM_GUESTDBG_USE_HW_BP | \
3131 			      KVM_GUESTDBG_ENABLE)
3132 
3133 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3134 					struct kvm_guest_debug *dbg)
3135 {
3136 	int rc = 0;
3137 
3138 	vcpu_load(vcpu);
3139 
3140 	vcpu->guest_debug = 0;
3141 	kvm_s390_clear_bp_data(vcpu);
3142 
3143 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3144 		rc = -EINVAL;
3145 		goto out;
3146 	}
3147 	if (!sclp.has_gpere) {
3148 		rc = -EINVAL;
3149 		goto out;
3150 	}
3151 
3152 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3153 		vcpu->guest_debug = dbg->control;
3154 		/* enforce guest PER */
3155 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3156 
3157 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3158 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3159 	} else {
3160 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3161 		vcpu->arch.guestdbg.last_bp = 0;
3162 	}
3163 
3164 	if (rc) {
3165 		vcpu->guest_debug = 0;
3166 		kvm_s390_clear_bp_data(vcpu);
3167 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3168 	}
3169 
3170 out:
3171 	vcpu_put(vcpu);
3172 	return rc;
3173 }
3174 
3175 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3176 				    struct kvm_mp_state *mp_state)
3177 {
3178 	int ret;
3179 
3180 	vcpu_load(vcpu);
3181 
3182 	/* CHECK_STOP and LOAD are not supported yet */
3183 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3184 				      KVM_MP_STATE_OPERATING;
3185 
3186 	vcpu_put(vcpu);
3187 	return ret;
3188 }
3189 
3190 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3191 				    struct kvm_mp_state *mp_state)
3192 {
3193 	int rc = 0;
3194 
3195 	vcpu_load(vcpu);
3196 
3197 	/* user space knows about this interface - let it control the state */
3198 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3199 
3200 	switch (mp_state->mp_state) {
3201 	case KVM_MP_STATE_STOPPED:
3202 		kvm_s390_vcpu_stop(vcpu);
3203 		break;
3204 	case KVM_MP_STATE_OPERATING:
3205 		kvm_s390_vcpu_start(vcpu);
3206 		break;
3207 	case KVM_MP_STATE_LOAD:
3208 	case KVM_MP_STATE_CHECK_STOP:
3209 		/* fall through - CHECK_STOP and LOAD are not supported yet */
3210 	default:
3211 		rc = -ENXIO;
3212 	}
3213 
3214 	vcpu_put(vcpu);
3215 	return rc;
3216 }
3217 
3218 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3219 {
3220 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3221 }
3222 
3223 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3224 {
3225 retry:
3226 	kvm_s390_vcpu_request_handled(vcpu);
3227 	if (!kvm_request_pending(vcpu))
3228 		return 0;
3229 	/*
3230 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3231 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3232 	 * This ensures that the ipte instruction for this request has
3233 	 * already finished. We might race against a second unmapper that
3234 	 * wants to set the blocking bit. Lets just retry the request loop.
3235 	 */
3236 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3237 		int rc;
3238 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3239 					  kvm_s390_get_prefix(vcpu),
3240 					  PAGE_SIZE * 2, PROT_WRITE);
3241 		if (rc) {
3242 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3243 			return rc;
3244 		}
3245 		goto retry;
3246 	}
3247 
3248 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3249 		vcpu->arch.sie_block->ihcpu = 0xffff;
3250 		goto retry;
3251 	}
3252 
3253 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3254 		if (!ibs_enabled(vcpu)) {
3255 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3256 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3257 		}
3258 		goto retry;
3259 	}
3260 
3261 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3262 		if (ibs_enabled(vcpu)) {
3263 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3264 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3265 		}
3266 		goto retry;
3267 	}
3268 
3269 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3270 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3271 		goto retry;
3272 	}
3273 
3274 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3275 		/*
3276 		 * Disable CMM virtualization; we will emulate the ESSA
3277 		 * instruction manually, in order to provide additional
3278 		 * functionalities needed for live migration.
3279 		 */
3280 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3281 		goto retry;
3282 	}
3283 
3284 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3285 		/*
3286 		 * Re-enable CMM virtualization if CMMA is available and
3287 		 * CMM has been used.
3288 		 */
3289 		if ((vcpu->kvm->arch.use_cmma) &&
3290 		    (vcpu->kvm->mm->context.uses_cmm))
3291 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3292 		goto retry;
3293 	}
3294 
3295 	/* nothing to do, just clear the request */
3296 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3297 	/* we left the vsie handler, nothing to do, just clear the request */
3298 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3299 
3300 	return 0;
3301 }
3302 
3303 void kvm_s390_set_tod_clock(struct kvm *kvm,
3304 			    const struct kvm_s390_vm_tod_clock *gtod)
3305 {
3306 	struct kvm_vcpu *vcpu;
3307 	struct kvm_s390_tod_clock_ext htod;
3308 	int i;
3309 
3310 	mutex_lock(&kvm->lock);
3311 	preempt_disable();
3312 
3313 	get_tod_clock_ext((char *)&htod);
3314 
3315 	kvm->arch.epoch = gtod->tod - htod.tod;
3316 	kvm->arch.epdx = 0;
3317 	if (test_kvm_facility(kvm, 139)) {
3318 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3319 		if (kvm->arch.epoch > gtod->tod)
3320 			kvm->arch.epdx -= 1;
3321 	}
3322 
3323 	kvm_s390_vcpu_block_all(kvm);
3324 	kvm_for_each_vcpu(i, vcpu, kvm) {
3325 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3326 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3327 	}
3328 
3329 	kvm_s390_vcpu_unblock_all(kvm);
3330 	preempt_enable();
3331 	mutex_unlock(&kvm->lock);
3332 }
3333 
3334 /**
3335  * kvm_arch_fault_in_page - fault-in guest page if necessary
3336  * @vcpu: The corresponding virtual cpu
3337  * @gpa: Guest physical address
3338  * @writable: Whether the page should be writable or not
3339  *
3340  * Make sure that a guest page has been faulted-in on the host.
3341  *
3342  * Return: Zero on success, negative error code otherwise.
3343  */
3344 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3345 {
3346 	return gmap_fault(vcpu->arch.gmap, gpa,
3347 			  writable ? FAULT_FLAG_WRITE : 0);
3348 }
3349 
3350 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3351 				      unsigned long token)
3352 {
3353 	struct kvm_s390_interrupt inti;
3354 	struct kvm_s390_irq irq;
3355 
3356 	if (start_token) {
3357 		irq.u.ext.ext_params2 = token;
3358 		irq.type = KVM_S390_INT_PFAULT_INIT;
3359 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3360 	} else {
3361 		inti.type = KVM_S390_INT_PFAULT_DONE;
3362 		inti.parm64 = token;
3363 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3364 	}
3365 }
3366 
3367 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3368 				     struct kvm_async_pf *work)
3369 {
3370 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3371 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3372 }
3373 
3374 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3375 				 struct kvm_async_pf *work)
3376 {
3377 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3378 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3379 }
3380 
3381 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3382 			       struct kvm_async_pf *work)
3383 {
3384 	/* s390 will always inject the page directly */
3385 }
3386 
3387 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3388 {
3389 	/*
3390 	 * s390 will always inject the page directly,
3391 	 * but we still want check_async_completion to cleanup
3392 	 */
3393 	return true;
3394 }
3395 
3396 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3397 {
3398 	hva_t hva;
3399 	struct kvm_arch_async_pf arch;
3400 	int rc;
3401 
3402 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3403 		return 0;
3404 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3405 	    vcpu->arch.pfault_compare)
3406 		return 0;
3407 	if (psw_extint_disabled(vcpu))
3408 		return 0;
3409 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3410 		return 0;
3411 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3412 		return 0;
3413 	if (!vcpu->arch.gmap->pfault_enabled)
3414 		return 0;
3415 
3416 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3417 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3418 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3419 		return 0;
3420 
3421 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3422 	return rc;
3423 }
3424 
3425 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3426 {
3427 	int rc, cpuflags;
3428 
3429 	/*
3430 	 * On s390 notifications for arriving pages will be delivered directly
3431 	 * to the guest but the house keeping for completed pfaults is
3432 	 * handled outside the worker.
3433 	 */
3434 	kvm_check_async_pf_completion(vcpu);
3435 
3436 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3437 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3438 
3439 	if (need_resched())
3440 		schedule();
3441 
3442 	if (test_cpu_flag(CIF_MCCK_PENDING))
3443 		s390_handle_mcck();
3444 
3445 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3446 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3447 		if (rc)
3448 			return rc;
3449 	}
3450 
3451 	rc = kvm_s390_handle_requests(vcpu);
3452 	if (rc)
3453 		return rc;
3454 
3455 	if (guestdbg_enabled(vcpu)) {
3456 		kvm_s390_backup_guest_per_regs(vcpu);
3457 		kvm_s390_patch_guest_per_regs(vcpu);
3458 	}
3459 
3460 	vcpu->arch.sie_block->icptcode = 0;
3461 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3462 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3463 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3464 
3465 	return 0;
3466 }
3467 
3468 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3469 {
3470 	struct kvm_s390_pgm_info pgm_info = {
3471 		.code = PGM_ADDRESSING,
3472 	};
3473 	u8 opcode, ilen;
3474 	int rc;
3475 
3476 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3477 	trace_kvm_s390_sie_fault(vcpu);
3478 
3479 	/*
3480 	 * We want to inject an addressing exception, which is defined as a
3481 	 * suppressing or terminating exception. However, since we came here
3482 	 * by a DAT access exception, the PSW still points to the faulting
3483 	 * instruction since DAT exceptions are nullifying. So we've got
3484 	 * to look up the current opcode to get the length of the instruction
3485 	 * to be able to forward the PSW.
3486 	 */
3487 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3488 	ilen = insn_length(opcode);
3489 	if (rc < 0) {
3490 		return rc;
3491 	} else if (rc) {
3492 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3493 		 * Forward by arbitrary ilc, injection will take care of
3494 		 * nullification if necessary.
3495 		 */
3496 		pgm_info = vcpu->arch.pgm;
3497 		ilen = 4;
3498 	}
3499 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3500 	kvm_s390_forward_psw(vcpu, ilen);
3501 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3502 }
3503 
3504 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3505 {
3506 	struct mcck_volatile_info *mcck_info;
3507 	struct sie_page *sie_page;
3508 
3509 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3510 		   vcpu->arch.sie_block->icptcode);
3511 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3512 
3513 	if (guestdbg_enabled(vcpu))
3514 		kvm_s390_restore_guest_per_regs(vcpu);
3515 
3516 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3517 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3518 
3519 	if (exit_reason == -EINTR) {
3520 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3521 		sie_page = container_of(vcpu->arch.sie_block,
3522 					struct sie_page, sie_block);
3523 		mcck_info = &sie_page->mcck_info;
3524 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3525 		return 0;
3526 	}
3527 
3528 	if (vcpu->arch.sie_block->icptcode > 0) {
3529 		int rc = kvm_handle_sie_intercept(vcpu);
3530 
3531 		if (rc != -EOPNOTSUPP)
3532 			return rc;
3533 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3534 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3535 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3536 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3537 		return -EREMOTE;
3538 	} else if (exit_reason != -EFAULT) {
3539 		vcpu->stat.exit_null++;
3540 		return 0;
3541 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3542 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3543 		vcpu->run->s390_ucontrol.trans_exc_code =
3544 						current->thread.gmap_addr;
3545 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3546 		return -EREMOTE;
3547 	} else if (current->thread.gmap_pfault) {
3548 		trace_kvm_s390_major_guest_pfault(vcpu);
3549 		current->thread.gmap_pfault = 0;
3550 		if (kvm_arch_setup_async_pf(vcpu))
3551 			return 0;
3552 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3553 	}
3554 	return vcpu_post_run_fault_in_sie(vcpu);
3555 }
3556 
3557 static int __vcpu_run(struct kvm_vcpu *vcpu)
3558 {
3559 	int rc, exit_reason;
3560 
3561 	/*
3562 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3563 	 * ning the guest), so that memslots (and other stuff) are protected
3564 	 */
3565 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3566 
3567 	do {
3568 		rc = vcpu_pre_run(vcpu);
3569 		if (rc)
3570 			break;
3571 
3572 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3573 		/*
3574 		 * As PF_VCPU will be used in fault handler, between
3575 		 * guest_enter and guest_exit should be no uaccess.
3576 		 */
3577 		local_irq_disable();
3578 		guest_enter_irqoff();
3579 		__disable_cpu_timer_accounting(vcpu);
3580 		local_irq_enable();
3581 		exit_reason = sie64a(vcpu->arch.sie_block,
3582 				     vcpu->run->s.regs.gprs);
3583 		local_irq_disable();
3584 		__enable_cpu_timer_accounting(vcpu);
3585 		guest_exit_irqoff();
3586 		local_irq_enable();
3587 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3588 
3589 		rc = vcpu_post_run(vcpu, exit_reason);
3590 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3591 
3592 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3593 	return rc;
3594 }
3595 
3596 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3597 {
3598 	struct runtime_instr_cb *riccb;
3599 	struct gs_cb *gscb;
3600 
3601 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3602 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3603 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3604 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3605 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3606 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3607 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3608 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3609 		/* some control register changes require a tlb flush */
3610 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3611 	}
3612 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3613 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3614 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3615 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3616 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3617 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3618 	}
3619 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3620 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3621 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3622 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3623 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3624 			kvm_clear_async_pf_completion_queue(vcpu);
3625 	}
3626 	/*
3627 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3628 	 * we should enable RI here instead of doing the lazy enablement.
3629 	 */
3630 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3631 	    test_kvm_facility(vcpu->kvm, 64) &&
3632 	    riccb->v &&
3633 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3634 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3635 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3636 	}
3637 	/*
3638 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3639 	 * we should enable GS here instead of doing the lazy enablement.
3640 	 */
3641 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3642 	    test_kvm_facility(vcpu->kvm, 133) &&
3643 	    gscb->gssm &&
3644 	    !vcpu->arch.gs_enabled) {
3645 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3646 		vcpu->arch.sie_block->ecb |= ECB_GS;
3647 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3648 		vcpu->arch.gs_enabled = 1;
3649 	}
3650 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3651 	    test_kvm_facility(vcpu->kvm, 82)) {
3652 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3653 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3654 	}
3655 	save_access_regs(vcpu->arch.host_acrs);
3656 	restore_access_regs(vcpu->run->s.regs.acrs);
3657 	/* save host (userspace) fprs/vrs */
3658 	save_fpu_regs();
3659 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3660 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3661 	if (MACHINE_HAS_VX)
3662 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3663 	else
3664 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3665 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3666 	if (test_fp_ctl(current->thread.fpu.fpc))
3667 		/* User space provided an invalid FPC, let's clear it */
3668 		current->thread.fpu.fpc = 0;
3669 	if (MACHINE_HAS_GS) {
3670 		preempt_disable();
3671 		__ctl_set_bit(2, 4);
3672 		if (current->thread.gs_cb) {
3673 			vcpu->arch.host_gscb = current->thread.gs_cb;
3674 			save_gs_cb(vcpu->arch.host_gscb);
3675 		}
3676 		if (vcpu->arch.gs_enabled) {
3677 			current->thread.gs_cb = (struct gs_cb *)
3678 						&vcpu->run->s.regs.gscb;
3679 			restore_gs_cb(current->thread.gs_cb);
3680 		}
3681 		preempt_enable();
3682 	}
3683 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
3684 
3685 	kvm_run->kvm_dirty_regs = 0;
3686 }
3687 
3688 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3689 {
3690 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3691 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3692 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3693 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3694 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3695 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3696 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3697 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3698 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3699 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3700 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3701 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3702 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3703 	save_access_regs(vcpu->run->s.regs.acrs);
3704 	restore_access_regs(vcpu->arch.host_acrs);
3705 	/* Save guest register state */
3706 	save_fpu_regs();
3707 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3708 	/* Restore will be done lazily at return */
3709 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3710 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3711 	if (MACHINE_HAS_GS) {
3712 		__ctl_set_bit(2, 4);
3713 		if (vcpu->arch.gs_enabled)
3714 			save_gs_cb(current->thread.gs_cb);
3715 		preempt_disable();
3716 		current->thread.gs_cb = vcpu->arch.host_gscb;
3717 		restore_gs_cb(vcpu->arch.host_gscb);
3718 		preempt_enable();
3719 		if (!vcpu->arch.host_gscb)
3720 			__ctl_clear_bit(2, 4);
3721 		vcpu->arch.host_gscb = NULL;
3722 	}
3723 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
3724 }
3725 
3726 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3727 {
3728 	int rc;
3729 
3730 	if (kvm_run->immediate_exit)
3731 		return -EINTR;
3732 
3733 	vcpu_load(vcpu);
3734 
3735 	if (guestdbg_exit_pending(vcpu)) {
3736 		kvm_s390_prepare_debug_exit(vcpu);
3737 		rc = 0;
3738 		goto out;
3739 	}
3740 
3741 	kvm_sigset_activate(vcpu);
3742 
3743 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3744 		kvm_s390_vcpu_start(vcpu);
3745 	} else if (is_vcpu_stopped(vcpu)) {
3746 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3747 				   vcpu->vcpu_id);
3748 		rc = -EINVAL;
3749 		goto out;
3750 	}
3751 
3752 	sync_regs(vcpu, kvm_run);
3753 	enable_cpu_timer_accounting(vcpu);
3754 
3755 	might_fault();
3756 	rc = __vcpu_run(vcpu);
3757 
3758 	if (signal_pending(current) && !rc) {
3759 		kvm_run->exit_reason = KVM_EXIT_INTR;
3760 		rc = -EINTR;
3761 	}
3762 
3763 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3764 		kvm_s390_prepare_debug_exit(vcpu);
3765 		rc = 0;
3766 	}
3767 
3768 	if (rc == -EREMOTE) {
3769 		/* userspace support is needed, kvm_run has been prepared */
3770 		rc = 0;
3771 	}
3772 
3773 	disable_cpu_timer_accounting(vcpu);
3774 	store_regs(vcpu, kvm_run);
3775 
3776 	kvm_sigset_deactivate(vcpu);
3777 
3778 	vcpu->stat.exit_userspace++;
3779 out:
3780 	vcpu_put(vcpu);
3781 	return rc;
3782 }
3783 
3784 /*
3785  * store status at address
3786  * we use have two special cases:
3787  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3788  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3789  */
3790 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3791 {
3792 	unsigned char archmode = 1;
3793 	freg_t fprs[NUM_FPRS];
3794 	unsigned int px;
3795 	u64 clkcomp, cputm;
3796 	int rc;
3797 
3798 	px = kvm_s390_get_prefix(vcpu);
3799 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3800 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3801 			return -EFAULT;
3802 		gpa = 0;
3803 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3804 		if (write_guest_real(vcpu, 163, &archmode, 1))
3805 			return -EFAULT;
3806 		gpa = px;
3807 	} else
3808 		gpa -= __LC_FPREGS_SAVE_AREA;
3809 
3810 	/* manually convert vector registers if necessary */
3811 	if (MACHINE_HAS_VX) {
3812 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3813 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3814 				     fprs, 128);
3815 	} else {
3816 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3817 				     vcpu->run->s.regs.fprs, 128);
3818 	}
3819 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3820 			      vcpu->run->s.regs.gprs, 128);
3821 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3822 			      &vcpu->arch.sie_block->gpsw, 16);
3823 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3824 			      &px, 4);
3825 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3826 			      &vcpu->run->s.regs.fpc, 4);
3827 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3828 			      &vcpu->arch.sie_block->todpr, 4);
3829 	cputm = kvm_s390_get_cpu_timer(vcpu);
3830 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3831 			      &cputm, 8);
3832 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3833 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3834 			      &clkcomp, 8);
3835 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3836 			      &vcpu->run->s.regs.acrs, 64);
3837 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3838 			      &vcpu->arch.sie_block->gcr, 128);
3839 	return rc ? -EFAULT : 0;
3840 }
3841 
3842 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3843 {
3844 	/*
3845 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3846 	 * switch in the run ioctl. Let's update our copies before we save
3847 	 * it into the save area
3848 	 */
3849 	save_fpu_regs();
3850 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3851 	save_access_regs(vcpu->run->s.regs.acrs);
3852 
3853 	return kvm_s390_store_status_unloaded(vcpu, addr);
3854 }
3855 
3856 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3857 {
3858 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3859 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3860 }
3861 
3862 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3863 {
3864 	unsigned int i;
3865 	struct kvm_vcpu *vcpu;
3866 
3867 	kvm_for_each_vcpu(i, vcpu, kvm) {
3868 		__disable_ibs_on_vcpu(vcpu);
3869 	}
3870 }
3871 
3872 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3873 {
3874 	if (!sclp.has_ibs)
3875 		return;
3876 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3877 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3878 }
3879 
3880 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3881 {
3882 	int i, online_vcpus, started_vcpus = 0;
3883 
3884 	if (!is_vcpu_stopped(vcpu))
3885 		return;
3886 
3887 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3888 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3889 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3890 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3891 
3892 	for (i = 0; i < online_vcpus; i++) {
3893 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3894 			started_vcpus++;
3895 	}
3896 
3897 	if (started_vcpus == 0) {
3898 		/* we're the only active VCPU -> speed it up */
3899 		__enable_ibs_on_vcpu(vcpu);
3900 	} else if (started_vcpus == 1) {
3901 		/*
3902 		 * As we are starting a second VCPU, we have to disable
3903 		 * the IBS facility on all VCPUs to remove potentially
3904 		 * oustanding ENABLE requests.
3905 		 */
3906 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3907 	}
3908 
3909 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3910 	/*
3911 	 * Another VCPU might have used IBS while we were offline.
3912 	 * Let's play safe and flush the VCPU at startup.
3913 	 */
3914 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3915 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3916 	return;
3917 }
3918 
3919 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3920 {
3921 	int i, online_vcpus, started_vcpus = 0;
3922 	struct kvm_vcpu *started_vcpu = NULL;
3923 
3924 	if (is_vcpu_stopped(vcpu))
3925 		return;
3926 
3927 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3928 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3929 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3930 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3931 
3932 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3933 	kvm_s390_clear_stop_irq(vcpu);
3934 
3935 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3936 	__disable_ibs_on_vcpu(vcpu);
3937 
3938 	for (i = 0; i < online_vcpus; i++) {
3939 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3940 			started_vcpus++;
3941 			started_vcpu = vcpu->kvm->vcpus[i];
3942 		}
3943 	}
3944 
3945 	if (started_vcpus == 1) {
3946 		/*
3947 		 * As we only have one VCPU left, we want to enable the
3948 		 * IBS facility for that VCPU to speed it up.
3949 		 */
3950 		__enable_ibs_on_vcpu(started_vcpu);
3951 	}
3952 
3953 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3954 	return;
3955 }
3956 
3957 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3958 				     struct kvm_enable_cap *cap)
3959 {
3960 	int r;
3961 
3962 	if (cap->flags)
3963 		return -EINVAL;
3964 
3965 	switch (cap->cap) {
3966 	case KVM_CAP_S390_CSS_SUPPORT:
3967 		if (!vcpu->kvm->arch.css_support) {
3968 			vcpu->kvm->arch.css_support = 1;
3969 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3970 			trace_kvm_s390_enable_css(vcpu->kvm);
3971 		}
3972 		r = 0;
3973 		break;
3974 	default:
3975 		r = -EINVAL;
3976 		break;
3977 	}
3978 	return r;
3979 }
3980 
3981 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3982 				  struct kvm_s390_mem_op *mop)
3983 {
3984 	void __user *uaddr = (void __user *)mop->buf;
3985 	void *tmpbuf = NULL;
3986 	int r, srcu_idx;
3987 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3988 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3989 
3990 	if (mop->flags & ~supported_flags)
3991 		return -EINVAL;
3992 
3993 	if (mop->size > MEM_OP_MAX_SIZE)
3994 		return -E2BIG;
3995 
3996 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3997 		tmpbuf = vmalloc(mop->size);
3998 		if (!tmpbuf)
3999 			return -ENOMEM;
4000 	}
4001 
4002 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4003 
4004 	switch (mop->op) {
4005 	case KVM_S390_MEMOP_LOGICAL_READ:
4006 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4007 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4008 					    mop->size, GACC_FETCH);
4009 			break;
4010 		}
4011 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4012 		if (r == 0) {
4013 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4014 				r = -EFAULT;
4015 		}
4016 		break;
4017 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4018 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4019 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4020 					    mop->size, GACC_STORE);
4021 			break;
4022 		}
4023 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4024 			r = -EFAULT;
4025 			break;
4026 		}
4027 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4028 		break;
4029 	default:
4030 		r = -EINVAL;
4031 	}
4032 
4033 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4034 
4035 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4036 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4037 
4038 	vfree(tmpbuf);
4039 	return r;
4040 }
4041 
4042 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4043 			       unsigned int ioctl, unsigned long arg)
4044 {
4045 	struct kvm_vcpu *vcpu = filp->private_data;
4046 	void __user *argp = (void __user *)arg;
4047 
4048 	switch (ioctl) {
4049 	case KVM_S390_IRQ: {
4050 		struct kvm_s390_irq s390irq;
4051 
4052 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4053 			return -EFAULT;
4054 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4055 	}
4056 	case KVM_S390_INTERRUPT: {
4057 		struct kvm_s390_interrupt s390int;
4058 		struct kvm_s390_irq s390irq;
4059 
4060 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4061 			return -EFAULT;
4062 		if (s390int_to_s390irq(&s390int, &s390irq))
4063 			return -EINVAL;
4064 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4065 	}
4066 	}
4067 	return -ENOIOCTLCMD;
4068 }
4069 
4070 long kvm_arch_vcpu_ioctl(struct file *filp,
4071 			 unsigned int ioctl, unsigned long arg)
4072 {
4073 	struct kvm_vcpu *vcpu = filp->private_data;
4074 	void __user *argp = (void __user *)arg;
4075 	int idx;
4076 	long r;
4077 
4078 	vcpu_load(vcpu);
4079 
4080 	switch (ioctl) {
4081 	case KVM_S390_STORE_STATUS:
4082 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4083 		r = kvm_s390_vcpu_store_status(vcpu, arg);
4084 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4085 		break;
4086 	case KVM_S390_SET_INITIAL_PSW: {
4087 		psw_t psw;
4088 
4089 		r = -EFAULT;
4090 		if (copy_from_user(&psw, argp, sizeof(psw)))
4091 			break;
4092 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4093 		break;
4094 	}
4095 	case KVM_S390_INITIAL_RESET:
4096 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4097 		break;
4098 	case KVM_SET_ONE_REG:
4099 	case KVM_GET_ONE_REG: {
4100 		struct kvm_one_reg reg;
4101 		r = -EFAULT;
4102 		if (copy_from_user(&reg, argp, sizeof(reg)))
4103 			break;
4104 		if (ioctl == KVM_SET_ONE_REG)
4105 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4106 		else
4107 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4108 		break;
4109 	}
4110 #ifdef CONFIG_KVM_S390_UCONTROL
4111 	case KVM_S390_UCAS_MAP: {
4112 		struct kvm_s390_ucas_mapping ucasmap;
4113 
4114 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4115 			r = -EFAULT;
4116 			break;
4117 		}
4118 
4119 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4120 			r = -EINVAL;
4121 			break;
4122 		}
4123 
4124 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4125 				     ucasmap.vcpu_addr, ucasmap.length);
4126 		break;
4127 	}
4128 	case KVM_S390_UCAS_UNMAP: {
4129 		struct kvm_s390_ucas_mapping ucasmap;
4130 
4131 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4132 			r = -EFAULT;
4133 			break;
4134 		}
4135 
4136 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4137 			r = -EINVAL;
4138 			break;
4139 		}
4140 
4141 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4142 			ucasmap.length);
4143 		break;
4144 	}
4145 #endif
4146 	case KVM_S390_VCPU_FAULT: {
4147 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4148 		break;
4149 	}
4150 	case KVM_ENABLE_CAP:
4151 	{
4152 		struct kvm_enable_cap cap;
4153 		r = -EFAULT;
4154 		if (copy_from_user(&cap, argp, sizeof(cap)))
4155 			break;
4156 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4157 		break;
4158 	}
4159 	case KVM_S390_MEM_OP: {
4160 		struct kvm_s390_mem_op mem_op;
4161 
4162 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4163 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4164 		else
4165 			r = -EFAULT;
4166 		break;
4167 	}
4168 	case KVM_S390_SET_IRQ_STATE: {
4169 		struct kvm_s390_irq_state irq_state;
4170 
4171 		r = -EFAULT;
4172 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4173 			break;
4174 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4175 		    irq_state.len == 0 ||
4176 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4177 			r = -EINVAL;
4178 			break;
4179 		}
4180 		/* do not use irq_state.flags, it will break old QEMUs */
4181 		r = kvm_s390_set_irq_state(vcpu,
4182 					   (void __user *) irq_state.buf,
4183 					   irq_state.len);
4184 		break;
4185 	}
4186 	case KVM_S390_GET_IRQ_STATE: {
4187 		struct kvm_s390_irq_state irq_state;
4188 
4189 		r = -EFAULT;
4190 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4191 			break;
4192 		if (irq_state.len == 0) {
4193 			r = -EINVAL;
4194 			break;
4195 		}
4196 		/* do not use irq_state.flags, it will break old QEMUs */
4197 		r = kvm_s390_get_irq_state(vcpu,
4198 					   (__u8 __user *)  irq_state.buf,
4199 					   irq_state.len);
4200 		break;
4201 	}
4202 	default:
4203 		r = -ENOTTY;
4204 	}
4205 
4206 	vcpu_put(vcpu);
4207 	return r;
4208 }
4209 
4210 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4211 {
4212 #ifdef CONFIG_KVM_S390_UCONTROL
4213 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4214 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4215 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4216 		get_page(vmf->page);
4217 		return 0;
4218 	}
4219 #endif
4220 	return VM_FAULT_SIGBUS;
4221 }
4222 
4223 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4224 			    unsigned long npages)
4225 {
4226 	return 0;
4227 }
4228 
4229 /* Section: memory related */
4230 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4231 				   struct kvm_memory_slot *memslot,
4232 				   const struct kvm_userspace_memory_region *mem,
4233 				   enum kvm_mr_change change)
4234 {
4235 	/* A few sanity checks. We can have memory slots which have to be
4236 	   located/ended at a segment boundary (1MB). The memory in userland is
4237 	   ok to be fragmented into various different vmas. It is okay to mmap()
4238 	   and munmap() stuff in this slot after doing this call at any time */
4239 
4240 	if (mem->userspace_addr & 0xffffful)
4241 		return -EINVAL;
4242 
4243 	if (mem->memory_size & 0xffffful)
4244 		return -EINVAL;
4245 
4246 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4247 		return -EINVAL;
4248 
4249 	return 0;
4250 }
4251 
4252 void kvm_arch_commit_memory_region(struct kvm *kvm,
4253 				const struct kvm_userspace_memory_region *mem,
4254 				const struct kvm_memory_slot *old,
4255 				const struct kvm_memory_slot *new,
4256 				enum kvm_mr_change change)
4257 {
4258 	int rc;
4259 
4260 	/* If the basics of the memslot do not change, we do not want
4261 	 * to update the gmap. Every update causes several unnecessary
4262 	 * segment translation exceptions. This is usually handled just
4263 	 * fine by the normal fault handler + gmap, but it will also
4264 	 * cause faults on the prefix page of running guest CPUs.
4265 	 */
4266 	if (old->userspace_addr == mem->userspace_addr &&
4267 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4268 	    old->npages * PAGE_SIZE == mem->memory_size)
4269 		return;
4270 
4271 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4272 		mem->guest_phys_addr, mem->memory_size);
4273 	if (rc)
4274 		pr_warn("failed to commit memory region\n");
4275 	return;
4276 }
4277 
4278 static inline unsigned long nonhyp_mask(int i)
4279 {
4280 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4281 
4282 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4283 }
4284 
4285 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4286 {
4287 	vcpu->valid_wakeup = false;
4288 }
4289 
4290 static int __init kvm_s390_init(void)
4291 {
4292 	int i;
4293 
4294 	if (!sclp.has_sief2) {
4295 		pr_info("SIE not available\n");
4296 		return -ENODEV;
4297 	}
4298 
4299 	if (nested && hpage) {
4300 		pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4301 		return -EINVAL;
4302 	}
4303 
4304 	for (i = 0; i < 16; i++)
4305 		kvm_s390_fac_base[i] |=
4306 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4307 
4308 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4309 }
4310 
4311 static void __exit kvm_s390_exit(void)
4312 {
4313 	kvm_exit();
4314 }
4315 
4316 module_init(kvm_s390_init);
4317 module_exit(kvm_s390_exit);
4318 
4319 /*
4320  * Enable autoloading of the kvm module.
4321  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4322  * since x86 takes a different approach.
4323  */
4324 #include <linux/miscdevice.h>
4325 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4326 MODULE_ALIAS("devname:kvm");
4327