xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 03638e62)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include "kvm-s390.h"
48 #include "gaccess.h"
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53 
54 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
58 
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61 
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
64 	{ "exit_null", VCPU_STAT(exit_null) },
65 	{ "exit_validity", VCPU_STAT(exit_validity) },
66 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
67 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
68 	{ "exit_io_request", VCPU_STAT(exit_io_request) },
69 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
71 	{ "exit_pei", VCPU_STAT(exit_pei) },
72 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 	{ "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
79 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
85 	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
86 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
87 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
88 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
89 	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
90 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
91 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
92 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
93 	{ "deliver_program", VCPU_STAT(deliver_program) },
94 	{ "deliver_io", VCPU_STAT(deliver_io) },
95 	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
96 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
97 	{ "inject_ckc", VCPU_STAT(inject_ckc) },
98 	{ "inject_cputm", VCPU_STAT(inject_cputm) },
99 	{ "inject_external_call", VCPU_STAT(inject_external_call) },
100 	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
101 	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
102 	{ "inject_io", VM_STAT(inject_io) },
103 	{ "inject_mchk", VCPU_STAT(inject_mchk) },
104 	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
105 	{ "inject_program", VCPU_STAT(inject_program) },
106 	{ "inject_restart", VCPU_STAT(inject_restart) },
107 	{ "inject_service_signal", VM_STAT(inject_service_signal) },
108 	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
109 	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
110 	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
111 	{ "inject_virtio", VM_STAT(inject_virtio) },
112 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
113 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
114 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
115 	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
116 	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
117 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
118 	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
119 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
120 	{ "instruction_sck", VCPU_STAT(instruction_sck) },
121 	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
122 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
123 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
124 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
125 	{ "instruction_iske", VCPU_STAT(instruction_iske) },
126 	{ "instruction_ri", VCPU_STAT(instruction_ri) },
127 	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
128 	{ "instruction_sske", VCPU_STAT(instruction_sske) },
129 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
130 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
131 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
132 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
133 	{ "instruction_tb", VCPU_STAT(instruction_tb) },
134 	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
135 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
136 	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
137 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
138 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
139 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
140 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
141 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
142 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
143 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
144 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
145 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
146 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
147 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
148 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
149 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
150 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
151 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
152 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
153 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
154 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
155 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
156 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
157 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
158 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
159 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
160 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
161 	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
162 	{ NULL }
163 };
164 
165 struct kvm_s390_tod_clock_ext {
166 	__u8 epoch_idx;
167 	__u64 tod;
168 	__u8 reserved[7];
169 } __packed;
170 
171 /* allow nested virtualization in KVM (if enabled by user space) */
172 static int nested;
173 module_param(nested, int, S_IRUGO);
174 MODULE_PARM_DESC(nested, "Nested virtualization support");
175 
176 /* allow 1m huge page guest backing, if !nested */
177 static int hpage;
178 module_param(hpage, int, 0444);
179 MODULE_PARM_DESC(hpage, "1m huge page backing support");
180 
181 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
182 static u8 halt_poll_max_steal = 10;
183 module_param(halt_poll_max_steal, byte, 0644);
184 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
185 
186 /*
187  * For now we handle at most 16 double words as this is what the s390 base
188  * kernel handles and stores in the prefix page. If we ever need to go beyond
189  * this, this requires changes to code, but the external uapi can stay.
190  */
191 #define SIZE_INTERNAL 16
192 
193 /*
194  * Base feature mask that defines default mask for facilities. Consists of the
195  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
196  */
197 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
198 /*
199  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
200  * and defines the facilities that can be enabled via a cpu model.
201  */
202 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
203 
204 static unsigned long kvm_s390_fac_size(void)
205 {
206 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
207 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
208 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
209 		sizeof(S390_lowcore.stfle_fac_list));
210 
211 	return SIZE_INTERNAL;
212 }
213 
214 /* available cpu features supported by kvm */
215 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
216 /* available subfunctions indicated via query / "test bit" */
217 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
218 
219 static struct gmap_notifier gmap_notifier;
220 static struct gmap_notifier vsie_gmap_notifier;
221 debug_info_t *kvm_s390_dbf;
222 
223 /* Section: not file related */
224 int kvm_arch_hardware_enable(void)
225 {
226 	/* every s390 is virtualization enabled ;-) */
227 	return 0;
228 }
229 
230 int kvm_arch_check_processor_compat(void)
231 {
232 	return 0;
233 }
234 
235 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
236 			      unsigned long end);
237 
238 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
239 {
240 	u8 delta_idx = 0;
241 
242 	/*
243 	 * The TOD jumps by delta, we have to compensate this by adding
244 	 * -delta to the epoch.
245 	 */
246 	delta = -delta;
247 
248 	/* sign-extension - we're adding to signed values below */
249 	if ((s64)delta < 0)
250 		delta_idx = -1;
251 
252 	scb->epoch += delta;
253 	if (scb->ecd & ECD_MEF) {
254 		scb->epdx += delta_idx;
255 		if (scb->epoch < delta)
256 			scb->epdx += 1;
257 	}
258 }
259 
260 /*
261  * This callback is executed during stop_machine(). All CPUs are therefore
262  * temporarily stopped. In order not to change guest behavior, we have to
263  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
264  * so a CPU won't be stopped while calculating with the epoch.
265  */
266 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
267 			  void *v)
268 {
269 	struct kvm *kvm;
270 	struct kvm_vcpu *vcpu;
271 	int i;
272 	unsigned long long *delta = v;
273 
274 	list_for_each_entry(kvm, &vm_list, vm_list) {
275 		kvm_for_each_vcpu(i, vcpu, kvm) {
276 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
277 			if (i == 0) {
278 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
279 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
280 			}
281 			if (vcpu->arch.cputm_enabled)
282 				vcpu->arch.cputm_start += *delta;
283 			if (vcpu->arch.vsie_block)
284 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
285 						   *delta);
286 		}
287 	}
288 	return NOTIFY_OK;
289 }
290 
291 static struct notifier_block kvm_clock_notifier = {
292 	.notifier_call = kvm_clock_sync,
293 };
294 
295 int kvm_arch_hardware_setup(void)
296 {
297 	gmap_notifier.notifier_call = kvm_gmap_notifier;
298 	gmap_register_pte_notifier(&gmap_notifier);
299 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
300 	gmap_register_pte_notifier(&vsie_gmap_notifier);
301 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
302 				       &kvm_clock_notifier);
303 	return 0;
304 }
305 
306 void kvm_arch_hardware_unsetup(void)
307 {
308 	gmap_unregister_pte_notifier(&gmap_notifier);
309 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
310 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
311 					 &kvm_clock_notifier);
312 }
313 
314 static void allow_cpu_feat(unsigned long nr)
315 {
316 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
317 }
318 
319 static inline int plo_test_bit(unsigned char nr)
320 {
321 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
322 	int cc;
323 
324 	asm volatile(
325 		/* Parameter registers are ignored for "test bit" */
326 		"	plo	0,0,0,0(0)\n"
327 		"	ipm	%0\n"
328 		"	srl	%0,28\n"
329 		: "=d" (cc)
330 		: "d" (r0)
331 		: "cc");
332 	return cc == 0;
333 }
334 
335 static inline void __insn32_query(unsigned int opcode, u8 query[32])
336 {
337 	register unsigned long r0 asm("0") = 0;	/* query function */
338 	register unsigned long r1 asm("1") = (unsigned long) query;
339 
340 	asm volatile(
341 		/* Parameter regs are ignored */
342 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
343 		: "=m" (*query)
344 		: "d" (r0), "a" (r1), [opc] "i" (opcode)
345 		: "cc");
346 }
347 
348 #define INSN_SORTL 0xb938
349 #define INSN_DFLTCC 0xb939
350 
351 static void kvm_s390_cpu_feat_init(void)
352 {
353 	int i;
354 
355 	for (i = 0; i < 256; ++i) {
356 		if (plo_test_bit(i))
357 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
358 	}
359 
360 	if (test_facility(28)) /* TOD-clock steering */
361 		ptff(kvm_s390_available_subfunc.ptff,
362 		     sizeof(kvm_s390_available_subfunc.ptff),
363 		     PTFF_QAF);
364 
365 	if (test_facility(17)) { /* MSA */
366 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
367 			      kvm_s390_available_subfunc.kmac);
368 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
369 			      kvm_s390_available_subfunc.kmc);
370 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
371 			      kvm_s390_available_subfunc.km);
372 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
373 			      kvm_s390_available_subfunc.kimd);
374 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
375 			      kvm_s390_available_subfunc.klmd);
376 	}
377 	if (test_facility(76)) /* MSA3 */
378 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
379 			      kvm_s390_available_subfunc.pckmo);
380 	if (test_facility(77)) { /* MSA4 */
381 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
382 			      kvm_s390_available_subfunc.kmctr);
383 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
384 			      kvm_s390_available_subfunc.kmf);
385 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
386 			      kvm_s390_available_subfunc.kmo);
387 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
388 			      kvm_s390_available_subfunc.pcc);
389 	}
390 	if (test_facility(57)) /* MSA5 */
391 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
392 			      kvm_s390_available_subfunc.ppno);
393 
394 	if (test_facility(146)) /* MSA8 */
395 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
396 			      kvm_s390_available_subfunc.kma);
397 
398 	if (test_facility(155)) /* MSA9 */
399 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
400 			      kvm_s390_available_subfunc.kdsa);
401 
402 	if (test_facility(150)) /* SORTL */
403 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
404 
405 	if (test_facility(151)) /* DFLTCC */
406 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
407 
408 	if (MACHINE_HAS_ESOP)
409 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
410 	/*
411 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
412 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
413 	 */
414 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
415 	    !test_facility(3) || !nested)
416 		return;
417 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
418 	if (sclp.has_64bscao)
419 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
420 	if (sclp.has_siif)
421 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
422 	if (sclp.has_gpere)
423 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
424 	if (sclp.has_gsls)
425 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
426 	if (sclp.has_ib)
427 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
428 	if (sclp.has_cei)
429 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
430 	if (sclp.has_ibs)
431 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
432 	if (sclp.has_kss)
433 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
434 	/*
435 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
436 	 * all skey handling functions read/set the skey from the PGSTE
437 	 * instead of the real storage key.
438 	 *
439 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
440 	 * pages being detected as preserved although they are resident.
441 	 *
442 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
443 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
444 	 *
445 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
446 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
447 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
448 	 *
449 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
450 	 * cannot easily shadow the SCA because of the ipte lock.
451 	 */
452 }
453 
454 int kvm_arch_init(void *opaque)
455 {
456 	int rc;
457 
458 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
459 	if (!kvm_s390_dbf)
460 		return -ENOMEM;
461 
462 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
463 		rc = -ENOMEM;
464 		goto out_debug_unreg;
465 	}
466 
467 	kvm_s390_cpu_feat_init();
468 
469 	/* Register floating interrupt controller interface. */
470 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
471 	if (rc) {
472 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
473 		goto out_debug_unreg;
474 	}
475 
476 	rc = kvm_s390_gib_init(GAL_ISC);
477 	if (rc)
478 		goto out_gib_destroy;
479 
480 	return 0;
481 
482 out_gib_destroy:
483 	kvm_s390_gib_destroy();
484 out_debug_unreg:
485 	debug_unregister(kvm_s390_dbf);
486 	return rc;
487 }
488 
489 void kvm_arch_exit(void)
490 {
491 	kvm_s390_gib_destroy();
492 	debug_unregister(kvm_s390_dbf);
493 }
494 
495 /* Section: device related */
496 long kvm_arch_dev_ioctl(struct file *filp,
497 			unsigned int ioctl, unsigned long arg)
498 {
499 	if (ioctl == KVM_S390_ENABLE_SIE)
500 		return s390_enable_sie();
501 	return -EINVAL;
502 }
503 
504 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
505 {
506 	int r;
507 
508 	switch (ext) {
509 	case KVM_CAP_S390_PSW:
510 	case KVM_CAP_S390_GMAP:
511 	case KVM_CAP_SYNC_MMU:
512 #ifdef CONFIG_KVM_S390_UCONTROL
513 	case KVM_CAP_S390_UCONTROL:
514 #endif
515 	case KVM_CAP_ASYNC_PF:
516 	case KVM_CAP_SYNC_REGS:
517 	case KVM_CAP_ONE_REG:
518 	case KVM_CAP_ENABLE_CAP:
519 	case KVM_CAP_S390_CSS_SUPPORT:
520 	case KVM_CAP_IOEVENTFD:
521 	case KVM_CAP_DEVICE_CTRL:
522 	case KVM_CAP_S390_IRQCHIP:
523 	case KVM_CAP_VM_ATTRIBUTES:
524 	case KVM_CAP_MP_STATE:
525 	case KVM_CAP_IMMEDIATE_EXIT:
526 	case KVM_CAP_S390_INJECT_IRQ:
527 	case KVM_CAP_S390_USER_SIGP:
528 	case KVM_CAP_S390_USER_STSI:
529 	case KVM_CAP_S390_SKEYS:
530 	case KVM_CAP_S390_IRQ_STATE:
531 	case KVM_CAP_S390_USER_INSTR0:
532 	case KVM_CAP_S390_CMMA_MIGRATION:
533 	case KVM_CAP_S390_AIS:
534 	case KVM_CAP_S390_AIS_MIGRATION:
535 		r = 1;
536 		break;
537 	case KVM_CAP_S390_HPAGE_1M:
538 		r = 0;
539 		if (hpage && !kvm_is_ucontrol(kvm))
540 			r = 1;
541 		break;
542 	case KVM_CAP_S390_MEM_OP:
543 		r = MEM_OP_MAX_SIZE;
544 		break;
545 	case KVM_CAP_NR_VCPUS:
546 	case KVM_CAP_MAX_VCPUS:
547 	case KVM_CAP_MAX_VCPU_ID:
548 		r = KVM_S390_BSCA_CPU_SLOTS;
549 		if (!kvm_s390_use_sca_entries())
550 			r = KVM_MAX_VCPUS;
551 		else if (sclp.has_esca && sclp.has_64bscao)
552 			r = KVM_S390_ESCA_CPU_SLOTS;
553 		break;
554 	case KVM_CAP_S390_COW:
555 		r = MACHINE_HAS_ESOP;
556 		break;
557 	case KVM_CAP_S390_VECTOR_REGISTERS:
558 		r = MACHINE_HAS_VX;
559 		break;
560 	case KVM_CAP_S390_RI:
561 		r = test_facility(64);
562 		break;
563 	case KVM_CAP_S390_GS:
564 		r = test_facility(133);
565 		break;
566 	case KVM_CAP_S390_BPB:
567 		r = test_facility(82);
568 		break;
569 	default:
570 		r = 0;
571 	}
572 	return r;
573 }
574 
575 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
576 				    struct kvm_memory_slot *memslot)
577 {
578 	int i;
579 	gfn_t cur_gfn, last_gfn;
580 	unsigned long gaddr, vmaddr;
581 	struct gmap *gmap = kvm->arch.gmap;
582 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
583 
584 	/* Loop over all guest segments */
585 	cur_gfn = memslot->base_gfn;
586 	last_gfn = memslot->base_gfn + memslot->npages;
587 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
588 		gaddr = gfn_to_gpa(cur_gfn);
589 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
590 		if (kvm_is_error_hva(vmaddr))
591 			continue;
592 
593 		bitmap_zero(bitmap, _PAGE_ENTRIES);
594 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
595 		for (i = 0; i < _PAGE_ENTRIES; i++) {
596 			if (test_bit(i, bitmap))
597 				mark_page_dirty(kvm, cur_gfn + i);
598 		}
599 
600 		if (fatal_signal_pending(current))
601 			return;
602 		cond_resched();
603 	}
604 }
605 
606 /* Section: vm related */
607 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
608 
609 /*
610  * Get (and clear) the dirty memory log for a memory slot.
611  */
612 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
613 			       struct kvm_dirty_log *log)
614 {
615 	int r;
616 	unsigned long n;
617 	struct kvm_memslots *slots;
618 	struct kvm_memory_slot *memslot;
619 	int is_dirty = 0;
620 
621 	if (kvm_is_ucontrol(kvm))
622 		return -EINVAL;
623 
624 	mutex_lock(&kvm->slots_lock);
625 
626 	r = -EINVAL;
627 	if (log->slot >= KVM_USER_MEM_SLOTS)
628 		goto out;
629 
630 	slots = kvm_memslots(kvm);
631 	memslot = id_to_memslot(slots, log->slot);
632 	r = -ENOENT;
633 	if (!memslot->dirty_bitmap)
634 		goto out;
635 
636 	kvm_s390_sync_dirty_log(kvm, memslot);
637 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
638 	if (r)
639 		goto out;
640 
641 	/* Clear the dirty log */
642 	if (is_dirty) {
643 		n = kvm_dirty_bitmap_bytes(memslot);
644 		memset(memslot->dirty_bitmap, 0, n);
645 	}
646 	r = 0;
647 out:
648 	mutex_unlock(&kvm->slots_lock);
649 	return r;
650 }
651 
652 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
653 {
654 	unsigned int i;
655 	struct kvm_vcpu *vcpu;
656 
657 	kvm_for_each_vcpu(i, vcpu, kvm) {
658 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
659 	}
660 }
661 
662 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
663 {
664 	int r;
665 
666 	if (cap->flags)
667 		return -EINVAL;
668 
669 	switch (cap->cap) {
670 	case KVM_CAP_S390_IRQCHIP:
671 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
672 		kvm->arch.use_irqchip = 1;
673 		r = 0;
674 		break;
675 	case KVM_CAP_S390_USER_SIGP:
676 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
677 		kvm->arch.user_sigp = 1;
678 		r = 0;
679 		break;
680 	case KVM_CAP_S390_VECTOR_REGISTERS:
681 		mutex_lock(&kvm->lock);
682 		if (kvm->created_vcpus) {
683 			r = -EBUSY;
684 		} else if (MACHINE_HAS_VX) {
685 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
686 			set_kvm_facility(kvm->arch.model.fac_list, 129);
687 			if (test_facility(134)) {
688 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
689 				set_kvm_facility(kvm->arch.model.fac_list, 134);
690 			}
691 			if (test_facility(135)) {
692 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
693 				set_kvm_facility(kvm->arch.model.fac_list, 135);
694 			}
695 			if (test_facility(148)) {
696 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
697 				set_kvm_facility(kvm->arch.model.fac_list, 148);
698 			}
699 			if (test_facility(152)) {
700 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
701 				set_kvm_facility(kvm->arch.model.fac_list, 152);
702 			}
703 			r = 0;
704 		} else
705 			r = -EINVAL;
706 		mutex_unlock(&kvm->lock);
707 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
708 			 r ? "(not available)" : "(success)");
709 		break;
710 	case KVM_CAP_S390_RI:
711 		r = -EINVAL;
712 		mutex_lock(&kvm->lock);
713 		if (kvm->created_vcpus) {
714 			r = -EBUSY;
715 		} else if (test_facility(64)) {
716 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
717 			set_kvm_facility(kvm->arch.model.fac_list, 64);
718 			r = 0;
719 		}
720 		mutex_unlock(&kvm->lock);
721 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
722 			 r ? "(not available)" : "(success)");
723 		break;
724 	case KVM_CAP_S390_AIS:
725 		mutex_lock(&kvm->lock);
726 		if (kvm->created_vcpus) {
727 			r = -EBUSY;
728 		} else {
729 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
730 			set_kvm_facility(kvm->arch.model.fac_list, 72);
731 			r = 0;
732 		}
733 		mutex_unlock(&kvm->lock);
734 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
735 			 r ? "(not available)" : "(success)");
736 		break;
737 	case KVM_CAP_S390_GS:
738 		r = -EINVAL;
739 		mutex_lock(&kvm->lock);
740 		if (kvm->created_vcpus) {
741 			r = -EBUSY;
742 		} else if (test_facility(133)) {
743 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
744 			set_kvm_facility(kvm->arch.model.fac_list, 133);
745 			r = 0;
746 		}
747 		mutex_unlock(&kvm->lock);
748 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
749 			 r ? "(not available)" : "(success)");
750 		break;
751 	case KVM_CAP_S390_HPAGE_1M:
752 		mutex_lock(&kvm->lock);
753 		if (kvm->created_vcpus)
754 			r = -EBUSY;
755 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
756 			r = -EINVAL;
757 		else {
758 			r = 0;
759 			down_write(&kvm->mm->mmap_sem);
760 			kvm->mm->context.allow_gmap_hpage_1m = 1;
761 			up_write(&kvm->mm->mmap_sem);
762 			/*
763 			 * We might have to create fake 4k page
764 			 * tables. To avoid that the hardware works on
765 			 * stale PGSTEs, we emulate these instructions.
766 			 */
767 			kvm->arch.use_skf = 0;
768 			kvm->arch.use_pfmfi = 0;
769 		}
770 		mutex_unlock(&kvm->lock);
771 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
772 			 r ? "(not available)" : "(success)");
773 		break;
774 	case KVM_CAP_S390_USER_STSI:
775 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
776 		kvm->arch.user_stsi = 1;
777 		r = 0;
778 		break;
779 	case KVM_CAP_S390_USER_INSTR0:
780 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
781 		kvm->arch.user_instr0 = 1;
782 		icpt_operexc_on_all_vcpus(kvm);
783 		r = 0;
784 		break;
785 	default:
786 		r = -EINVAL;
787 		break;
788 	}
789 	return r;
790 }
791 
792 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
793 {
794 	int ret;
795 
796 	switch (attr->attr) {
797 	case KVM_S390_VM_MEM_LIMIT_SIZE:
798 		ret = 0;
799 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
800 			 kvm->arch.mem_limit);
801 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
802 			ret = -EFAULT;
803 		break;
804 	default:
805 		ret = -ENXIO;
806 		break;
807 	}
808 	return ret;
809 }
810 
811 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
812 {
813 	int ret;
814 	unsigned int idx;
815 	switch (attr->attr) {
816 	case KVM_S390_VM_MEM_ENABLE_CMMA:
817 		ret = -ENXIO;
818 		if (!sclp.has_cmma)
819 			break;
820 
821 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
822 		mutex_lock(&kvm->lock);
823 		if (kvm->created_vcpus)
824 			ret = -EBUSY;
825 		else if (kvm->mm->context.allow_gmap_hpage_1m)
826 			ret = -EINVAL;
827 		else {
828 			kvm->arch.use_cmma = 1;
829 			/* Not compatible with cmma. */
830 			kvm->arch.use_pfmfi = 0;
831 			ret = 0;
832 		}
833 		mutex_unlock(&kvm->lock);
834 		break;
835 	case KVM_S390_VM_MEM_CLR_CMMA:
836 		ret = -ENXIO;
837 		if (!sclp.has_cmma)
838 			break;
839 		ret = -EINVAL;
840 		if (!kvm->arch.use_cmma)
841 			break;
842 
843 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
844 		mutex_lock(&kvm->lock);
845 		idx = srcu_read_lock(&kvm->srcu);
846 		s390_reset_cmma(kvm->arch.gmap->mm);
847 		srcu_read_unlock(&kvm->srcu, idx);
848 		mutex_unlock(&kvm->lock);
849 		ret = 0;
850 		break;
851 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
852 		unsigned long new_limit;
853 
854 		if (kvm_is_ucontrol(kvm))
855 			return -EINVAL;
856 
857 		if (get_user(new_limit, (u64 __user *)attr->addr))
858 			return -EFAULT;
859 
860 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
861 		    new_limit > kvm->arch.mem_limit)
862 			return -E2BIG;
863 
864 		if (!new_limit)
865 			return -EINVAL;
866 
867 		/* gmap_create takes last usable address */
868 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
869 			new_limit -= 1;
870 
871 		ret = -EBUSY;
872 		mutex_lock(&kvm->lock);
873 		if (!kvm->created_vcpus) {
874 			/* gmap_create will round the limit up */
875 			struct gmap *new = gmap_create(current->mm, new_limit);
876 
877 			if (!new) {
878 				ret = -ENOMEM;
879 			} else {
880 				gmap_remove(kvm->arch.gmap);
881 				new->private = kvm;
882 				kvm->arch.gmap = new;
883 				ret = 0;
884 			}
885 		}
886 		mutex_unlock(&kvm->lock);
887 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
888 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
889 			 (void *) kvm->arch.gmap->asce);
890 		break;
891 	}
892 	default:
893 		ret = -ENXIO;
894 		break;
895 	}
896 	return ret;
897 }
898 
899 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
900 
901 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
902 {
903 	struct kvm_vcpu *vcpu;
904 	int i;
905 
906 	kvm_s390_vcpu_block_all(kvm);
907 
908 	kvm_for_each_vcpu(i, vcpu, kvm) {
909 		kvm_s390_vcpu_crypto_setup(vcpu);
910 		/* recreate the shadow crycb by leaving the VSIE handler */
911 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
912 	}
913 
914 	kvm_s390_vcpu_unblock_all(kvm);
915 }
916 
917 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
918 {
919 	mutex_lock(&kvm->lock);
920 	switch (attr->attr) {
921 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
922 		if (!test_kvm_facility(kvm, 76)) {
923 			mutex_unlock(&kvm->lock);
924 			return -EINVAL;
925 		}
926 		get_random_bytes(
927 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
928 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
929 		kvm->arch.crypto.aes_kw = 1;
930 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
931 		break;
932 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
933 		if (!test_kvm_facility(kvm, 76)) {
934 			mutex_unlock(&kvm->lock);
935 			return -EINVAL;
936 		}
937 		get_random_bytes(
938 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
939 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
940 		kvm->arch.crypto.dea_kw = 1;
941 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
942 		break;
943 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
944 		if (!test_kvm_facility(kvm, 76)) {
945 			mutex_unlock(&kvm->lock);
946 			return -EINVAL;
947 		}
948 		kvm->arch.crypto.aes_kw = 0;
949 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
950 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
951 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
952 		break;
953 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
954 		if (!test_kvm_facility(kvm, 76)) {
955 			mutex_unlock(&kvm->lock);
956 			return -EINVAL;
957 		}
958 		kvm->arch.crypto.dea_kw = 0;
959 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
960 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
961 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
962 		break;
963 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
964 		if (!ap_instructions_available()) {
965 			mutex_unlock(&kvm->lock);
966 			return -EOPNOTSUPP;
967 		}
968 		kvm->arch.crypto.apie = 1;
969 		break;
970 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
971 		if (!ap_instructions_available()) {
972 			mutex_unlock(&kvm->lock);
973 			return -EOPNOTSUPP;
974 		}
975 		kvm->arch.crypto.apie = 0;
976 		break;
977 	default:
978 		mutex_unlock(&kvm->lock);
979 		return -ENXIO;
980 	}
981 
982 	kvm_s390_vcpu_crypto_reset_all(kvm);
983 	mutex_unlock(&kvm->lock);
984 	return 0;
985 }
986 
987 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
988 {
989 	int cx;
990 	struct kvm_vcpu *vcpu;
991 
992 	kvm_for_each_vcpu(cx, vcpu, kvm)
993 		kvm_s390_sync_request(req, vcpu);
994 }
995 
996 /*
997  * Must be called with kvm->srcu held to avoid races on memslots, and with
998  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
999  */
1000 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1001 {
1002 	struct kvm_memory_slot *ms;
1003 	struct kvm_memslots *slots;
1004 	unsigned long ram_pages = 0;
1005 	int slotnr;
1006 
1007 	/* migration mode already enabled */
1008 	if (kvm->arch.migration_mode)
1009 		return 0;
1010 	slots = kvm_memslots(kvm);
1011 	if (!slots || !slots->used_slots)
1012 		return -EINVAL;
1013 
1014 	if (!kvm->arch.use_cmma) {
1015 		kvm->arch.migration_mode = 1;
1016 		return 0;
1017 	}
1018 	/* mark all the pages in active slots as dirty */
1019 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1020 		ms = slots->memslots + slotnr;
1021 		/*
1022 		 * The second half of the bitmap is only used on x86,
1023 		 * and would be wasted otherwise, so we put it to good
1024 		 * use here to keep track of the state of the storage
1025 		 * attributes.
1026 		 */
1027 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1028 		ram_pages += ms->npages;
1029 	}
1030 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1031 	kvm->arch.migration_mode = 1;
1032 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1033 	return 0;
1034 }
1035 
1036 /*
1037  * Must be called with kvm->slots_lock to avoid races with ourselves and
1038  * kvm_s390_vm_start_migration.
1039  */
1040 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1041 {
1042 	/* migration mode already disabled */
1043 	if (!kvm->arch.migration_mode)
1044 		return 0;
1045 	kvm->arch.migration_mode = 0;
1046 	if (kvm->arch.use_cmma)
1047 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1048 	return 0;
1049 }
1050 
1051 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1052 				     struct kvm_device_attr *attr)
1053 {
1054 	int res = -ENXIO;
1055 
1056 	mutex_lock(&kvm->slots_lock);
1057 	switch (attr->attr) {
1058 	case KVM_S390_VM_MIGRATION_START:
1059 		res = kvm_s390_vm_start_migration(kvm);
1060 		break;
1061 	case KVM_S390_VM_MIGRATION_STOP:
1062 		res = kvm_s390_vm_stop_migration(kvm);
1063 		break;
1064 	default:
1065 		break;
1066 	}
1067 	mutex_unlock(&kvm->slots_lock);
1068 
1069 	return res;
1070 }
1071 
1072 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1073 				     struct kvm_device_attr *attr)
1074 {
1075 	u64 mig = kvm->arch.migration_mode;
1076 
1077 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1078 		return -ENXIO;
1079 
1080 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1081 		return -EFAULT;
1082 	return 0;
1083 }
1084 
1085 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1086 {
1087 	struct kvm_s390_vm_tod_clock gtod;
1088 
1089 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1090 		return -EFAULT;
1091 
1092 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1093 		return -EINVAL;
1094 	kvm_s390_set_tod_clock(kvm, &gtod);
1095 
1096 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1097 		gtod.epoch_idx, gtod.tod);
1098 
1099 	return 0;
1100 }
1101 
1102 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1103 {
1104 	u8 gtod_high;
1105 
1106 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1107 					   sizeof(gtod_high)))
1108 		return -EFAULT;
1109 
1110 	if (gtod_high != 0)
1111 		return -EINVAL;
1112 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1113 
1114 	return 0;
1115 }
1116 
1117 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1118 {
1119 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1120 
1121 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1122 			   sizeof(gtod.tod)))
1123 		return -EFAULT;
1124 
1125 	kvm_s390_set_tod_clock(kvm, &gtod);
1126 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1127 	return 0;
1128 }
1129 
1130 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1131 {
1132 	int ret;
1133 
1134 	if (attr->flags)
1135 		return -EINVAL;
1136 
1137 	switch (attr->attr) {
1138 	case KVM_S390_VM_TOD_EXT:
1139 		ret = kvm_s390_set_tod_ext(kvm, attr);
1140 		break;
1141 	case KVM_S390_VM_TOD_HIGH:
1142 		ret = kvm_s390_set_tod_high(kvm, attr);
1143 		break;
1144 	case KVM_S390_VM_TOD_LOW:
1145 		ret = kvm_s390_set_tod_low(kvm, attr);
1146 		break;
1147 	default:
1148 		ret = -ENXIO;
1149 		break;
1150 	}
1151 	return ret;
1152 }
1153 
1154 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1155 				   struct kvm_s390_vm_tod_clock *gtod)
1156 {
1157 	struct kvm_s390_tod_clock_ext htod;
1158 
1159 	preempt_disable();
1160 
1161 	get_tod_clock_ext((char *)&htod);
1162 
1163 	gtod->tod = htod.tod + kvm->arch.epoch;
1164 	gtod->epoch_idx = 0;
1165 	if (test_kvm_facility(kvm, 139)) {
1166 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1167 		if (gtod->tod < htod.tod)
1168 			gtod->epoch_idx += 1;
1169 	}
1170 
1171 	preempt_enable();
1172 }
1173 
1174 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1175 {
1176 	struct kvm_s390_vm_tod_clock gtod;
1177 
1178 	memset(&gtod, 0, sizeof(gtod));
1179 	kvm_s390_get_tod_clock(kvm, &gtod);
1180 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1181 		return -EFAULT;
1182 
1183 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1184 		gtod.epoch_idx, gtod.tod);
1185 	return 0;
1186 }
1187 
1188 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1189 {
1190 	u8 gtod_high = 0;
1191 
1192 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1193 					 sizeof(gtod_high)))
1194 		return -EFAULT;
1195 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1196 
1197 	return 0;
1198 }
1199 
1200 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1201 {
1202 	u64 gtod;
1203 
1204 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1205 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1206 		return -EFAULT;
1207 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1208 
1209 	return 0;
1210 }
1211 
1212 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1213 {
1214 	int ret;
1215 
1216 	if (attr->flags)
1217 		return -EINVAL;
1218 
1219 	switch (attr->attr) {
1220 	case KVM_S390_VM_TOD_EXT:
1221 		ret = kvm_s390_get_tod_ext(kvm, attr);
1222 		break;
1223 	case KVM_S390_VM_TOD_HIGH:
1224 		ret = kvm_s390_get_tod_high(kvm, attr);
1225 		break;
1226 	case KVM_S390_VM_TOD_LOW:
1227 		ret = kvm_s390_get_tod_low(kvm, attr);
1228 		break;
1229 	default:
1230 		ret = -ENXIO;
1231 		break;
1232 	}
1233 	return ret;
1234 }
1235 
1236 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238 	struct kvm_s390_vm_cpu_processor *proc;
1239 	u16 lowest_ibc, unblocked_ibc;
1240 	int ret = 0;
1241 
1242 	mutex_lock(&kvm->lock);
1243 	if (kvm->created_vcpus) {
1244 		ret = -EBUSY;
1245 		goto out;
1246 	}
1247 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1248 	if (!proc) {
1249 		ret = -ENOMEM;
1250 		goto out;
1251 	}
1252 	if (!copy_from_user(proc, (void __user *)attr->addr,
1253 			    sizeof(*proc))) {
1254 		kvm->arch.model.cpuid = proc->cpuid;
1255 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1256 		unblocked_ibc = sclp.ibc & 0xfff;
1257 		if (lowest_ibc && proc->ibc) {
1258 			if (proc->ibc > unblocked_ibc)
1259 				kvm->arch.model.ibc = unblocked_ibc;
1260 			else if (proc->ibc < lowest_ibc)
1261 				kvm->arch.model.ibc = lowest_ibc;
1262 			else
1263 				kvm->arch.model.ibc = proc->ibc;
1264 		}
1265 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1266 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1267 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1268 			 kvm->arch.model.ibc,
1269 			 kvm->arch.model.cpuid);
1270 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1271 			 kvm->arch.model.fac_list[0],
1272 			 kvm->arch.model.fac_list[1],
1273 			 kvm->arch.model.fac_list[2]);
1274 	} else
1275 		ret = -EFAULT;
1276 	kfree(proc);
1277 out:
1278 	mutex_unlock(&kvm->lock);
1279 	return ret;
1280 }
1281 
1282 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1283 				       struct kvm_device_attr *attr)
1284 {
1285 	struct kvm_s390_vm_cpu_feat data;
1286 
1287 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1288 		return -EFAULT;
1289 	if (!bitmap_subset((unsigned long *) data.feat,
1290 			   kvm_s390_available_cpu_feat,
1291 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1292 		return -EINVAL;
1293 
1294 	mutex_lock(&kvm->lock);
1295 	if (kvm->created_vcpus) {
1296 		mutex_unlock(&kvm->lock);
1297 		return -EBUSY;
1298 	}
1299 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1300 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1301 	mutex_unlock(&kvm->lock);
1302 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1303 			 data.feat[0],
1304 			 data.feat[1],
1305 			 data.feat[2]);
1306 	return 0;
1307 }
1308 
1309 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1310 					  struct kvm_device_attr *attr)
1311 {
1312 	mutex_lock(&kvm->lock);
1313 	if (kvm->created_vcpus) {
1314 		mutex_unlock(&kvm->lock);
1315 		return -EBUSY;
1316 	}
1317 
1318 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1319 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1320 		mutex_unlock(&kvm->lock);
1321 		return -EFAULT;
1322 	}
1323 	mutex_unlock(&kvm->lock);
1324 
1325 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1326 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1327 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1328 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1329 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1330 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1331 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1332 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1333 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1334 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1335 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1336 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1337 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1338 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1339 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1340 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1341 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1342 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1343 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1344 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1345 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1346 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1347 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1348 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1349 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1350 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1351 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1352 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1353 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1354 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1355 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1356 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1357 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1358 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1359 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1360 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1361 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1363 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1366 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1369 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1372 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1375 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1377 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1378 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1381 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1382 
1383 	return 0;
1384 }
1385 
1386 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1387 {
1388 	int ret = -ENXIO;
1389 
1390 	switch (attr->attr) {
1391 	case KVM_S390_VM_CPU_PROCESSOR:
1392 		ret = kvm_s390_set_processor(kvm, attr);
1393 		break;
1394 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1395 		ret = kvm_s390_set_processor_feat(kvm, attr);
1396 		break;
1397 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1398 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1399 		break;
1400 	}
1401 	return ret;
1402 }
1403 
1404 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1405 {
1406 	struct kvm_s390_vm_cpu_processor *proc;
1407 	int ret = 0;
1408 
1409 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1410 	if (!proc) {
1411 		ret = -ENOMEM;
1412 		goto out;
1413 	}
1414 	proc->cpuid = kvm->arch.model.cpuid;
1415 	proc->ibc = kvm->arch.model.ibc;
1416 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1417 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1418 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1419 		 kvm->arch.model.ibc,
1420 		 kvm->arch.model.cpuid);
1421 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1422 		 kvm->arch.model.fac_list[0],
1423 		 kvm->arch.model.fac_list[1],
1424 		 kvm->arch.model.fac_list[2]);
1425 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1426 		ret = -EFAULT;
1427 	kfree(proc);
1428 out:
1429 	return ret;
1430 }
1431 
1432 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1433 {
1434 	struct kvm_s390_vm_cpu_machine *mach;
1435 	int ret = 0;
1436 
1437 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1438 	if (!mach) {
1439 		ret = -ENOMEM;
1440 		goto out;
1441 	}
1442 	get_cpu_id((struct cpuid *) &mach->cpuid);
1443 	mach->ibc = sclp.ibc;
1444 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1445 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1446 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1447 	       sizeof(S390_lowcore.stfle_fac_list));
1448 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1449 		 kvm->arch.model.ibc,
1450 		 kvm->arch.model.cpuid);
1451 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1452 		 mach->fac_mask[0],
1453 		 mach->fac_mask[1],
1454 		 mach->fac_mask[2]);
1455 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1456 		 mach->fac_list[0],
1457 		 mach->fac_list[1],
1458 		 mach->fac_list[2]);
1459 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1460 		ret = -EFAULT;
1461 	kfree(mach);
1462 out:
1463 	return ret;
1464 }
1465 
1466 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1467 				       struct kvm_device_attr *attr)
1468 {
1469 	struct kvm_s390_vm_cpu_feat data;
1470 
1471 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1472 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1473 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1474 		return -EFAULT;
1475 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1476 			 data.feat[0],
1477 			 data.feat[1],
1478 			 data.feat[2]);
1479 	return 0;
1480 }
1481 
1482 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1483 				     struct kvm_device_attr *attr)
1484 {
1485 	struct kvm_s390_vm_cpu_feat data;
1486 
1487 	bitmap_copy((unsigned long *) data.feat,
1488 		    kvm_s390_available_cpu_feat,
1489 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1490 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1491 		return -EFAULT;
1492 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1493 			 data.feat[0],
1494 			 data.feat[1],
1495 			 data.feat[2]);
1496 	return 0;
1497 }
1498 
1499 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1500 					  struct kvm_device_attr *attr)
1501 {
1502 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1503 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1504 		return -EFAULT;
1505 
1506 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1507 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1508 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1509 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1510 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1511 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1512 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1513 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1514 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1515 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1516 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1517 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1518 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1519 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1520 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1521 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1522 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1523 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1524 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1525 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1526 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1527 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1528 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1529 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1530 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1531 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1532 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1533 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1534 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1535 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1536 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1537 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1538 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1539 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1540 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1541 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1542 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1544 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1547 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1550 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1553 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1556 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1558 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1559 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1562 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1563 
1564 	return 0;
1565 }
1566 
1567 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1568 					struct kvm_device_attr *attr)
1569 {
1570 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1571 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1572 		return -EFAULT;
1573 
1574 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1575 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1576 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1577 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1578 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1579 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1580 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1581 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1582 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1583 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1584 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1585 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1586 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1587 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1588 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1589 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1590 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1591 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1592 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1593 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1594 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1595 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1596 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1597 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1598 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1599 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1600 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1601 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1602 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1603 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1604 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1605 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1606 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1607 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1608 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1609 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1610 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1612 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1615 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1618 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1621 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1624 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1626 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1627 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1630 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1631 
1632 	return 0;
1633 }
1634 
1635 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1636 {
1637 	int ret = -ENXIO;
1638 
1639 	switch (attr->attr) {
1640 	case KVM_S390_VM_CPU_PROCESSOR:
1641 		ret = kvm_s390_get_processor(kvm, attr);
1642 		break;
1643 	case KVM_S390_VM_CPU_MACHINE:
1644 		ret = kvm_s390_get_machine(kvm, attr);
1645 		break;
1646 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1647 		ret = kvm_s390_get_processor_feat(kvm, attr);
1648 		break;
1649 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1650 		ret = kvm_s390_get_machine_feat(kvm, attr);
1651 		break;
1652 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1653 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1654 		break;
1655 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1656 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1657 		break;
1658 	}
1659 	return ret;
1660 }
1661 
1662 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1663 {
1664 	int ret;
1665 
1666 	switch (attr->group) {
1667 	case KVM_S390_VM_MEM_CTRL:
1668 		ret = kvm_s390_set_mem_control(kvm, attr);
1669 		break;
1670 	case KVM_S390_VM_TOD:
1671 		ret = kvm_s390_set_tod(kvm, attr);
1672 		break;
1673 	case KVM_S390_VM_CPU_MODEL:
1674 		ret = kvm_s390_set_cpu_model(kvm, attr);
1675 		break;
1676 	case KVM_S390_VM_CRYPTO:
1677 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1678 		break;
1679 	case KVM_S390_VM_MIGRATION:
1680 		ret = kvm_s390_vm_set_migration(kvm, attr);
1681 		break;
1682 	default:
1683 		ret = -ENXIO;
1684 		break;
1685 	}
1686 
1687 	return ret;
1688 }
1689 
1690 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1691 {
1692 	int ret;
1693 
1694 	switch (attr->group) {
1695 	case KVM_S390_VM_MEM_CTRL:
1696 		ret = kvm_s390_get_mem_control(kvm, attr);
1697 		break;
1698 	case KVM_S390_VM_TOD:
1699 		ret = kvm_s390_get_tod(kvm, attr);
1700 		break;
1701 	case KVM_S390_VM_CPU_MODEL:
1702 		ret = kvm_s390_get_cpu_model(kvm, attr);
1703 		break;
1704 	case KVM_S390_VM_MIGRATION:
1705 		ret = kvm_s390_vm_get_migration(kvm, attr);
1706 		break;
1707 	default:
1708 		ret = -ENXIO;
1709 		break;
1710 	}
1711 
1712 	return ret;
1713 }
1714 
1715 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1716 {
1717 	int ret;
1718 
1719 	switch (attr->group) {
1720 	case KVM_S390_VM_MEM_CTRL:
1721 		switch (attr->attr) {
1722 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1723 		case KVM_S390_VM_MEM_CLR_CMMA:
1724 			ret = sclp.has_cmma ? 0 : -ENXIO;
1725 			break;
1726 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1727 			ret = 0;
1728 			break;
1729 		default:
1730 			ret = -ENXIO;
1731 			break;
1732 		}
1733 		break;
1734 	case KVM_S390_VM_TOD:
1735 		switch (attr->attr) {
1736 		case KVM_S390_VM_TOD_LOW:
1737 		case KVM_S390_VM_TOD_HIGH:
1738 			ret = 0;
1739 			break;
1740 		default:
1741 			ret = -ENXIO;
1742 			break;
1743 		}
1744 		break;
1745 	case KVM_S390_VM_CPU_MODEL:
1746 		switch (attr->attr) {
1747 		case KVM_S390_VM_CPU_PROCESSOR:
1748 		case KVM_S390_VM_CPU_MACHINE:
1749 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1750 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1751 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1752 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1753 			ret = 0;
1754 			break;
1755 		default:
1756 			ret = -ENXIO;
1757 			break;
1758 		}
1759 		break;
1760 	case KVM_S390_VM_CRYPTO:
1761 		switch (attr->attr) {
1762 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1763 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1764 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1765 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1766 			ret = 0;
1767 			break;
1768 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1769 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1770 			ret = ap_instructions_available() ? 0 : -ENXIO;
1771 			break;
1772 		default:
1773 			ret = -ENXIO;
1774 			break;
1775 		}
1776 		break;
1777 	case KVM_S390_VM_MIGRATION:
1778 		ret = 0;
1779 		break;
1780 	default:
1781 		ret = -ENXIO;
1782 		break;
1783 	}
1784 
1785 	return ret;
1786 }
1787 
1788 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1789 {
1790 	uint8_t *keys;
1791 	uint64_t hva;
1792 	int srcu_idx, i, r = 0;
1793 
1794 	if (args->flags != 0)
1795 		return -EINVAL;
1796 
1797 	/* Is this guest using storage keys? */
1798 	if (!mm_uses_skeys(current->mm))
1799 		return KVM_S390_GET_SKEYS_NONE;
1800 
1801 	/* Enforce sane limit on memory allocation */
1802 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1803 		return -EINVAL;
1804 
1805 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1806 	if (!keys)
1807 		return -ENOMEM;
1808 
1809 	down_read(&current->mm->mmap_sem);
1810 	srcu_idx = srcu_read_lock(&kvm->srcu);
1811 	for (i = 0; i < args->count; i++) {
1812 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1813 		if (kvm_is_error_hva(hva)) {
1814 			r = -EFAULT;
1815 			break;
1816 		}
1817 
1818 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1819 		if (r)
1820 			break;
1821 	}
1822 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1823 	up_read(&current->mm->mmap_sem);
1824 
1825 	if (!r) {
1826 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1827 				 sizeof(uint8_t) * args->count);
1828 		if (r)
1829 			r = -EFAULT;
1830 	}
1831 
1832 	kvfree(keys);
1833 	return r;
1834 }
1835 
1836 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1837 {
1838 	uint8_t *keys;
1839 	uint64_t hva;
1840 	int srcu_idx, i, r = 0;
1841 	bool unlocked;
1842 
1843 	if (args->flags != 0)
1844 		return -EINVAL;
1845 
1846 	/* Enforce sane limit on memory allocation */
1847 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1848 		return -EINVAL;
1849 
1850 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1851 	if (!keys)
1852 		return -ENOMEM;
1853 
1854 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1855 			   sizeof(uint8_t) * args->count);
1856 	if (r) {
1857 		r = -EFAULT;
1858 		goto out;
1859 	}
1860 
1861 	/* Enable storage key handling for the guest */
1862 	r = s390_enable_skey();
1863 	if (r)
1864 		goto out;
1865 
1866 	i = 0;
1867 	down_read(&current->mm->mmap_sem);
1868 	srcu_idx = srcu_read_lock(&kvm->srcu);
1869         while (i < args->count) {
1870 		unlocked = false;
1871 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1872 		if (kvm_is_error_hva(hva)) {
1873 			r = -EFAULT;
1874 			break;
1875 		}
1876 
1877 		/* Lowest order bit is reserved */
1878 		if (keys[i] & 0x01) {
1879 			r = -EINVAL;
1880 			break;
1881 		}
1882 
1883 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1884 		if (r) {
1885 			r = fixup_user_fault(current, current->mm, hva,
1886 					     FAULT_FLAG_WRITE, &unlocked);
1887 			if (r)
1888 				break;
1889 		}
1890 		if (!r)
1891 			i++;
1892 	}
1893 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1894 	up_read(&current->mm->mmap_sem);
1895 out:
1896 	kvfree(keys);
1897 	return r;
1898 }
1899 
1900 /*
1901  * Base address and length must be sent at the start of each block, therefore
1902  * it's cheaper to send some clean data, as long as it's less than the size of
1903  * two longs.
1904  */
1905 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1906 /* for consistency */
1907 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1908 
1909 /*
1910  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1911  * address falls in a hole. In that case the index of one of the memslots
1912  * bordering the hole is returned.
1913  */
1914 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1915 {
1916 	int start = 0, end = slots->used_slots;
1917 	int slot = atomic_read(&slots->lru_slot);
1918 	struct kvm_memory_slot *memslots = slots->memslots;
1919 
1920 	if (gfn >= memslots[slot].base_gfn &&
1921 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1922 		return slot;
1923 
1924 	while (start < end) {
1925 		slot = start + (end - start) / 2;
1926 
1927 		if (gfn >= memslots[slot].base_gfn)
1928 			end = slot;
1929 		else
1930 			start = slot + 1;
1931 	}
1932 
1933 	if (gfn >= memslots[start].base_gfn &&
1934 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1935 		atomic_set(&slots->lru_slot, start);
1936 	}
1937 
1938 	return start;
1939 }
1940 
1941 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1942 			      u8 *res, unsigned long bufsize)
1943 {
1944 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1945 
1946 	args->count = 0;
1947 	while (args->count < bufsize) {
1948 		hva = gfn_to_hva(kvm, cur_gfn);
1949 		/*
1950 		 * We return an error if the first value was invalid, but we
1951 		 * return successfully if at least one value was copied.
1952 		 */
1953 		if (kvm_is_error_hva(hva))
1954 			return args->count ? 0 : -EFAULT;
1955 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1956 			pgstev = 0;
1957 		res[args->count++] = (pgstev >> 24) & 0x43;
1958 		cur_gfn++;
1959 	}
1960 
1961 	return 0;
1962 }
1963 
1964 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1965 					      unsigned long cur_gfn)
1966 {
1967 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1968 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1969 	unsigned long ofs = cur_gfn - ms->base_gfn;
1970 
1971 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1972 		slotidx--;
1973 		/* If we are above the highest slot, wrap around */
1974 		if (slotidx < 0)
1975 			slotidx = slots->used_slots - 1;
1976 
1977 		ms = slots->memslots + slotidx;
1978 		ofs = 0;
1979 	}
1980 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1981 	while ((slotidx > 0) && (ofs >= ms->npages)) {
1982 		slotidx--;
1983 		ms = slots->memslots + slotidx;
1984 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1985 	}
1986 	return ms->base_gfn + ofs;
1987 }
1988 
1989 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1990 			     u8 *res, unsigned long bufsize)
1991 {
1992 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1993 	struct kvm_memslots *slots = kvm_memslots(kvm);
1994 	struct kvm_memory_slot *ms;
1995 
1996 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1997 	ms = gfn_to_memslot(kvm, cur_gfn);
1998 	args->count = 0;
1999 	args->start_gfn = cur_gfn;
2000 	if (!ms)
2001 		return 0;
2002 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2003 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2004 
2005 	while (args->count < bufsize) {
2006 		hva = gfn_to_hva(kvm, cur_gfn);
2007 		if (kvm_is_error_hva(hva))
2008 			return 0;
2009 		/* Decrement only if we actually flipped the bit to 0 */
2010 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2011 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2012 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2013 			pgstev = 0;
2014 		/* Save the value */
2015 		res[args->count++] = (pgstev >> 24) & 0x43;
2016 		/* If the next bit is too far away, stop. */
2017 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2018 			return 0;
2019 		/* If we reached the previous "next", find the next one */
2020 		if (cur_gfn == next_gfn)
2021 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2022 		/* Reached the end of memory or of the buffer, stop */
2023 		if ((next_gfn >= mem_end) ||
2024 		    (next_gfn - args->start_gfn >= bufsize))
2025 			return 0;
2026 		cur_gfn++;
2027 		/* Reached the end of the current memslot, take the next one. */
2028 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2029 			ms = gfn_to_memslot(kvm, cur_gfn);
2030 			if (!ms)
2031 				return 0;
2032 		}
2033 	}
2034 	return 0;
2035 }
2036 
2037 /*
2038  * This function searches for the next page with dirty CMMA attributes, and
2039  * saves the attributes in the buffer up to either the end of the buffer or
2040  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2041  * no trailing clean bytes are saved.
2042  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2043  * output buffer will indicate 0 as length.
2044  */
2045 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2046 				  struct kvm_s390_cmma_log *args)
2047 {
2048 	unsigned long bufsize;
2049 	int srcu_idx, peek, ret;
2050 	u8 *values;
2051 
2052 	if (!kvm->arch.use_cmma)
2053 		return -ENXIO;
2054 	/* Invalid/unsupported flags were specified */
2055 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2056 		return -EINVAL;
2057 	/* Migration mode query, and we are not doing a migration */
2058 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2059 	if (!peek && !kvm->arch.migration_mode)
2060 		return -EINVAL;
2061 	/* CMMA is disabled or was not used, or the buffer has length zero */
2062 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2063 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2064 		memset(args, 0, sizeof(*args));
2065 		return 0;
2066 	}
2067 	/* We are not peeking, and there are no dirty pages */
2068 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2069 		memset(args, 0, sizeof(*args));
2070 		return 0;
2071 	}
2072 
2073 	values = vmalloc(bufsize);
2074 	if (!values)
2075 		return -ENOMEM;
2076 
2077 	down_read(&kvm->mm->mmap_sem);
2078 	srcu_idx = srcu_read_lock(&kvm->srcu);
2079 	if (peek)
2080 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2081 	else
2082 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2083 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2084 	up_read(&kvm->mm->mmap_sem);
2085 
2086 	if (kvm->arch.migration_mode)
2087 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2088 	else
2089 		args->remaining = 0;
2090 
2091 	if (copy_to_user((void __user *)args->values, values, args->count))
2092 		ret = -EFAULT;
2093 
2094 	vfree(values);
2095 	return ret;
2096 }
2097 
2098 /*
2099  * This function sets the CMMA attributes for the given pages. If the input
2100  * buffer has zero length, no action is taken, otherwise the attributes are
2101  * set and the mm->context.uses_cmm flag is set.
2102  */
2103 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2104 				  const struct kvm_s390_cmma_log *args)
2105 {
2106 	unsigned long hva, mask, pgstev, i;
2107 	uint8_t *bits;
2108 	int srcu_idx, r = 0;
2109 
2110 	mask = args->mask;
2111 
2112 	if (!kvm->arch.use_cmma)
2113 		return -ENXIO;
2114 	/* invalid/unsupported flags */
2115 	if (args->flags != 0)
2116 		return -EINVAL;
2117 	/* Enforce sane limit on memory allocation */
2118 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2119 		return -EINVAL;
2120 	/* Nothing to do */
2121 	if (args->count == 0)
2122 		return 0;
2123 
2124 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2125 	if (!bits)
2126 		return -ENOMEM;
2127 
2128 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2129 	if (r) {
2130 		r = -EFAULT;
2131 		goto out;
2132 	}
2133 
2134 	down_read(&kvm->mm->mmap_sem);
2135 	srcu_idx = srcu_read_lock(&kvm->srcu);
2136 	for (i = 0; i < args->count; i++) {
2137 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2138 		if (kvm_is_error_hva(hva)) {
2139 			r = -EFAULT;
2140 			break;
2141 		}
2142 
2143 		pgstev = bits[i];
2144 		pgstev = pgstev << 24;
2145 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2146 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2147 	}
2148 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2149 	up_read(&kvm->mm->mmap_sem);
2150 
2151 	if (!kvm->mm->context.uses_cmm) {
2152 		down_write(&kvm->mm->mmap_sem);
2153 		kvm->mm->context.uses_cmm = 1;
2154 		up_write(&kvm->mm->mmap_sem);
2155 	}
2156 out:
2157 	vfree(bits);
2158 	return r;
2159 }
2160 
2161 long kvm_arch_vm_ioctl(struct file *filp,
2162 		       unsigned int ioctl, unsigned long arg)
2163 {
2164 	struct kvm *kvm = filp->private_data;
2165 	void __user *argp = (void __user *)arg;
2166 	struct kvm_device_attr attr;
2167 	int r;
2168 
2169 	switch (ioctl) {
2170 	case KVM_S390_INTERRUPT: {
2171 		struct kvm_s390_interrupt s390int;
2172 
2173 		r = -EFAULT;
2174 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2175 			break;
2176 		r = kvm_s390_inject_vm(kvm, &s390int);
2177 		break;
2178 	}
2179 	case KVM_CREATE_IRQCHIP: {
2180 		struct kvm_irq_routing_entry routing;
2181 
2182 		r = -EINVAL;
2183 		if (kvm->arch.use_irqchip) {
2184 			/* Set up dummy routing. */
2185 			memset(&routing, 0, sizeof(routing));
2186 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2187 		}
2188 		break;
2189 	}
2190 	case KVM_SET_DEVICE_ATTR: {
2191 		r = -EFAULT;
2192 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2193 			break;
2194 		r = kvm_s390_vm_set_attr(kvm, &attr);
2195 		break;
2196 	}
2197 	case KVM_GET_DEVICE_ATTR: {
2198 		r = -EFAULT;
2199 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2200 			break;
2201 		r = kvm_s390_vm_get_attr(kvm, &attr);
2202 		break;
2203 	}
2204 	case KVM_HAS_DEVICE_ATTR: {
2205 		r = -EFAULT;
2206 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2207 			break;
2208 		r = kvm_s390_vm_has_attr(kvm, &attr);
2209 		break;
2210 	}
2211 	case KVM_S390_GET_SKEYS: {
2212 		struct kvm_s390_skeys args;
2213 
2214 		r = -EFAULT;
2215 		if (copy_from_user(&args, argp,
2216 				   sizeof(struct kvm_s390_skeys)))
2217 			break;
2218 		r = kvm_s390_get_skeys(kvm, &args);
2219 		break;
2220 	}
2221 	case KVM_S390_SET_SKEYS: {
2222 		struct kvm_s390_skeys args;
2223 
2224 		r = -EFAULT;
2225 		if (copy_from_user(&args, argp,
2226 				   sizeof(struct kvm_s390_skeys)))
2227 			break;
2228 		r = kvm_s390_set_skeys(kvm, &args);
2229 		break;
2230 	}
2231 	case KVM_S390_GET_CMMA_BITS: {
2232 		struct kvm_s390_cmma_log args;
2233 
2234 		r = -EFAULT;
2235 		if (copy_from_user(&args, argp, sizeof(args)))
2236 			break;
2237 		mutex_lock(&kvm->slots_lock);
2238 		r = kvm_s390_get_cmma_bits(kvm, &args);
2239 		mutex_unlock(&kvm->slots_lock);
2240 		if (!r) {
2241 			r = copy_to_user(argp, &args, sizeof(args));
2242 			if (r)
2243 				r = -EFAULT;
2244 		}
2245 		break;
2246 	}
2247 	case KVM_S390_SET_CMMA_BITS: {
2248 		struct kvm_s390_cmma_log args;
2249 
2250 		r = -EFAULT;
2251 		if (copy_from_user(&args, argp, sizeof(args)))
2252 			break;
2253 		mutex_lock(&kvm->slots_lock);
2254 		r = kvm_s390_set_cmma_bits(kvm, &args);
2255 		mutex_unlock(&kvm->slots_lock);
2256 		break;
2257 	}
2258 	default:
2259 		r = -ENOTTY;
2260 	}
2261 
2262 	return r;
2263 }
2264 
2265 static int kvm_s390_apxa_installed(void)
2266 {
2267 	struct ap_config_info info;
2268 
2269 	if (ap_instructions_available()) {
2270 		if (ap_qci(&info) == 0)
2271 			return info.apxa;
2272 	}
2273 
2274 	return 0;
2275 }
2276 
2277 /*
2278  * The format of the crypto control block (CRYCB) is specified in the 3 low
2279  * order bits of the CRYCB designation (CRYCBD) field as follows:
2280  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2281  *	     AP extended addressing (APXA) facility are installed.
2282  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2283  * Format 2: Both the APXA and MSAX3 facilities are installed
2284  */
2285 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2286 {
2287 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2288 
2289 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2290 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2291 
2292 	/* Check whether MSAX3 is installed */
2293 	if (!test_kvm_facility(kvm, 76))
2294 		return;
2295 
2296 	if (kvm_s390_apxa_installed())
2297 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2298 	else
2299 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2300 }
2301 
2302 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2303 			       unsigned long *aqm, unsigned long *adm)
2304 {
2305 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2306 
2307 	mutex_lock(&kvm->lock);
2308 	kvm_s390_vcpu_block_all(kvm);
2309 
2310 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2311 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2312 		memcpy(crycb->apcb1.apm, apm, 32);
2313 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2314 			 apm[0], apm[1], apm[2], apm[3]);
2315 		memcpy(crycb->apcb1.aqm, aqm, 32);
2316 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2317 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2318 		memcpy(crycb->apcb1.adm, adm, 32);
2319 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2320 			 adm[0], adm[1], adm[2], adm[3]);
2321 		break;
2322 	case CRYCB_FORMAT1:
2323 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2324 		memcpy(crycb->apcb0.apm, apm, 8);
2325 		memcpy(crycb->apcb0.aqm, aqm, 2);
2326 		memcpy(crycb->apcb0.adm, adm, 2);
2327 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2328 			 apm[0], *((unsigned short *)aqm),
2329 			 *((unsigned short *)adm));
2330 		break;
2331 	default:	/* Can not happen */
2332 		break;
2333 	}
2334 
2335 	/* recreate the shadow crycb for each vcpu */
2336 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2337 	kvm_s390_vcpu_unblock_all(kvm);
2338 	mutex_unlock(&kvm->lock);
2339 }
2340 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2341 
2342 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2343 {
2344 	mutex_lock(&kvm->lock);
2345 	kvm_s390_vcpu_block_all(kvm);
2346 
2347 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2348 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2349 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2350 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2351 
2352 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2353 	/* recreate the shadow crycb for each vcpu */
2354 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2355 	kvm_s390_vcpu_unblock_all(kvm);
2356 	mutex_unlock(&kvm->lock);
2357 }
2358 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2359 
2360 static u64 kvm_s390_get_initial_cpuid(void)
2361 {
2362 	struct cpuid cpuid;
2363 
2364 	get_cpu_id(&cpuid);
2365 	cpuid.version = 0xff;
2366 	return *((u64 *) &cpuid);
2367 }
2368 
2369 static void kvm_s390_crypto_init(struct kvm *kvm)
2370 {
2371 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2372 	kvm_s390_set_crycb_format(kvm);
2373 
2374 	if (!test_kvm_facility(kvm, 76))
2375 		return;
2376 
2377 	/* Enable AES/DEA protected key functions by default */
2378 	kvm->arch.crypto.aes_kw = 1;
2379 	kvm->arch.crypto.dea_kw = 1;
2380 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2381 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2382 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2383 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2384 }
2385 
2386 static void sca_dispose(struct kvm *kvm)
2387 {
2388 	if (kvm->arch.use_esca)
2389 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2390 	else
2391 		free_page((unsigned long)(kvm->arch.sca));
2392 	kvm->arch.sca = NULL;
2393 }
2394 
2395 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2396 {
2397 	gfp_t alloc_flags = GFP_KERNEL;
2398 	int i, rc;
2399 	char debug_name[16];
2400 	static unsigned long sca_offset;
2401 
2402 	rc = -EINVAL;
2403 #ifdef CONFIG_KVM_S390_UCONTROL
2404 	if (type & ~KVM_VM_S390_UCONTROL)
2405 		goto out_err;
2406 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2407 		goto out_err;
2408 #else
2409 	if (type)
2410 		goto out_err;
2411 #endif
2412 
2413 	rc = s390_enable_sie();
2414 	if (rc)
2415 		goto out_err;
2416 
2417 	rc = -ENOMEM;
2418 
2419 	if (!sclp.has_64bscao)
2420 		alloc_flags |= GFP_DMA;
2421 	rwlock_init(&kvm->arch.sca_lock);
2422 	/* start with basic SCA */
2423 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2424 	if (!kvm->arch.sca)
2425 		goto out_err;
2426 	mutex_lock(&kvm_lock);
2427 	sca_offset += 16;
2428 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2429 		sca_offset = 0;
2430 	kvm->arch.sca = (struct bsca_block *)
2431 			((char *) kvm->arch.sca + sca_offset);
2432 	mutex_unlock(&kvm_lock);
2433 
2434 	sprintf(debug_name, "kvm-%u", current->pid);
2435 
2436 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2437 	if (!kvm->arch.dbf)
2438 		goto out_err;
2439 
2440 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2441 	kvm->arch.sie_page2 =
2442 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2443 	if (!kvm->arch.sie_page2)
2444 		goto out_err;
2445 
2446 	kvm->arch.sie_page2->kvm = kvm;
2447 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2448 
2449 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2450 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2451 					      (kvm_s390_fac_base[i] |
2452 					       kvm_s390_fac_ext[i]);
2453 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2454 					      kvm_s390_fac_base[i];
2455 	}
2456 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2457 
2458 	/* we are always in czam mode - even on pre z14 machines */
2459 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2460 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2461 	/* we emulate STHYI in kvm */
2462 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2463 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2464 	if (MACHINE_HAS_TLB_GUEST) {
2465 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2466 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2467 	}
2468 
2469 	if (css_general_characteristics.aiv && test_facility(65))
2470 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2471 
2472 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2473 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2474 
2475 	kvm_s390_crypto_init(kvm);
2476 
2477 	mutex_init(&kvm->arch.float_int.ais_lock);
2478 	spin_lock_init(&kvm->arch.float_int.lock);
2479 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2480 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2481 	init_waitqueue_head(&kvm->arch.ipte_wq);
2482 	mutex_init(&kvm->arch.ipte_mutex);
2483 
2484 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2485 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2486 
2487 	if (type & KVM_VM_S390_UCONTROL) {
2488 		kvm->arch.gmap = NULL;
2489 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2490 	} else {
2491 		if (sclp.hamax == U64_MAX)
2492 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2493 		else
2494 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2495 						    sclp.hamax + 1);
2496 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2497 		if (!kvm->arch.gmap)
2498 			goto out_err;
2499 		kvm->arch.gmap->private = kvm;
2500 		kvm->arch.gmap->pfault_enabled = 0;
2501 	}
2502 
2503 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2504 	kvm->arch.use_skf = sclp.has_skey;
2505 	spin_lock_init(&kvm->arch.start_stop_lock);
2506 	kvm_s390_vsie_init(kvm);
2507 	kvm_s390_gisa_init(kvm);
2508 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2509 
2510 	return 0;
2511 out_err:
2512 	free_page((unsigned long)kvm->arch.sie_page2);
2513 	debug_unregister(kvm->arch.dbf);
2514 	sca_dispose(kvm);
2515 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2516 	return rc;
2517 }
2518 
2519 bool kvm_arch_has_vcpu_debugfs(void)
2520 {
2521 	return false;
2522 }
2523 
2524 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2525 {
2526 	return 0;
2527 }
2528 
2529 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2530 {
2531 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2532 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2533 	kvm_s390_clear_local_irqs(vcpu);
2534 	kvm_clear_async_pf_completion_queue(vcpu);
2535 	if (!kvm_is_ucontrol(vcpu->kvm))
2536 		sca_del_vcpu(vcpu);
2537 
2538 	if (kvm_is_ucontrol(vcpu->kvm))
2539 		gmap_remove(vcpu->arch.gmap);
2540 
2541 	if (vcpu->kvm->arch.use_cmma)
2542 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2543 	free_page((unsigned long)(vcpu->arch.sie_block));
2544 
2545 	kvm_vcpu_uninit(vcpu);
2546 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2547 }
2548 
2549 static void kvm_free_vcpus(struct kvm *kvm)
2550 {
2551 	unsigned int i;
2552 	struct kvm_vcpu *vcpu;
2553 
2554 	kvm_for_each_vcpu(i, vcpu, kvm)
2555 		kvm_arch_vcpu_destroy(vcpu);
2556 
2557 	mutex_lock(&kvm->lock);
2558 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2559 		kvm->vcpus[i] = NULL;
2560 
2561 	atomic_set(&kvm->online_vcpus, 0);
2562 	mutex_unlock(&kvm->lock);
2563 }
2564 
2565 void kvm_arch_destroy_vm(struct kvm *kvm)
2566 {
2567 	kvm_free_vcpus(kvm);
2568 	sca_dispose(kvm);
2569 	debug_unregister(kvm->arch.dbf);
2570 	kvm_s390_gisa_destroy(kvm);
2571 	free_page((unsigned long)kvm->arch.sie_page2);
2572 	if (!kvm_is_ucontrol(kvm))
2573 		gmap_remove(kvm->arch.gmap);
2574 	kvm_s390_destroy_adapters(kvm);
2575 	kvm_s390_clear_float_irqs(kvm);
2576 	kvm_s390_vsie_destroy(kvm);
2577 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2578 }
2579 
2580 /* Section: vcpu related */
2581 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2582 {
2583 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2584 	if (!vcpu->arch.gmap)
2585 		return -ENOMEM;
2586 	vcpu->arch.gmap->private = vcpu->kvm;
2587 
2588 	return 0;
2589 }
2590 
2591 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2592 {
2593 	if (!kvm_s390_use_sca_entries())
2594 		return;
2595 	read_lock(&vcpu->kvm->arch.sca_lock);
2596 	if (vcpu->kvm->arch.use_esca) {
2597 		struct esca_block *sca = vcpu->kvm->arch.sca;
2598 
2599 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2600 		sca->cpu[vcpu->vcpu_id].sda = 0;
2601 	} else {
2602 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2603 
2604 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2605 		sca->cpu[vcpu->vcpu_id].sda = 0;
2606 	}
2607 	read_unlock(&vcpu->kvm->arch.sca_lock);
2608 }
2609 
2610 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2611 {
2612 	if (!kvm_s390_use_sca_entries()) {
2613 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2614 
2615 		/* we still need the basic sca for the ipte control */
2616 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2617 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2618 		return;
2619 	}
2620 	read_lock(&vcpu->kvm->arch.sca_lock);
2621 	if (vcpu->kvm->arch.use_esca) {
2622 		struct esca_block *sca = vcpu->kvm->arch.sca;
2623 
2624 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2625 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2626 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2627 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2628 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2629 	} else {
2630 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2631 
2632 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2633 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2634 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2635 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2636 	}
2637 	read_unlock(&vcpu->kvm->arch.sca_lock);
2638 }
2639 
2640 /* Basic SCA to Extended SCA data copy routines */
2641 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2642 {
2643 	d->sda = s->sda;
2644 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2645 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2646 }
2647 
2648 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2649 {
2650 	int i;
2651 
2652 	d->ipte_control = s->ipte_control;
2653 	d->mcn[0] = s->mcn;
2654 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2655 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2656 }
2657 
2658 static int sca_switch_to_extended(struct kvm *kvm)
2659 {
2660 	struct bsca_block *old_sca = kvm->arch.sca;
2661 	struct esca_block *new_sca;
2662 	struct kvm_vcpu *vcpu;
2663 	unsigned int vcpu_idx;
2664 	u32 scaol, scaoh;
2665 
2666 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2667 	if (!new_sca)
2668 		return -ENOMEM;
2669 
2670 	scaoh = (u32)((u64)(new_sca) >> 32);
2671 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2672 
2673 	kvm_s390_vcpu_block_all(kvm);
2674 	write_lock(&kvm->arch.sca_lock);
2675 
2676 	sca_copy_b_to_e(new_sca, old_sca);
2677 
2678 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2679 		vcpu->arch.sie_block->scaoh = scaoh;
2680 		vcpu->arch.sie_block->scaol = scaol;
2681 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2682 	}
2683 	kvm->arch.sca = new_sca;
2684 	kvm->arch.use_esca = 1;
2685 
2686 	write_unlock(&kvm->arch.sca_lock);
2687 	kvm_s390_vcpu_unblock_all(kvm);
2688 
2689 	free_page((unsigned long)old_sca);
2690 
2691 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2692 		 old_sca, kvm->arch.sca);
2693 	return 0;
2694 }
2695 
2696 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2697 {
2698 	int rc;
2699 
2700 	if (!kvm_s390_use_sca_entries()) {
2701 		if (id < KVM_MAX_VCPUS)
2702 			return true;
2703 		return false;
2704 	}
2705 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2706 		return true;
2707 	if (!sclp.has_esca || !sclp.has_64bscao)
2708 		return false;
2709 
2710 	mutex_lock(&kvm->lock);
2711 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2712 	mutex_unlock(&kvm->lock);
2713 
2714 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2715 }
2716 
2717 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2718 {
2719 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2720 	kvm_clear_async_pf_completion_queue(vcpu);
2721 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2722 				    KVM_SYNC_GPRS |
2723 				    KVM_SYNC_ACRS |
2724 				    KVM_SYNC_CRS |
2725 				    KVM_SYNC_ARCH0 |
2726 				    KVM_SYNC_PFAULT;
2727 	kvm_s390_set_prefix(vcpu, 0);
2728 	if (test_kvm_facility(vcpu->kvm, 64))
2729 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2730 	if (test_kvm_facility(vcpu->kvm, 82))
2731 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2732 	if (test_kvm_facility(vcpu->kvm, 133))
2733 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2734 	if (test_kvm_facility(vcpu->kvm, 156))
2735 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2736 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2737 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2738 	 */
2739 	if (MACHINE_HAS_VX)
2740 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2741 	else
2742 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2743 
2744 	if (kvm_is_ucontrol(vcpu->kvm))
2745 		return __kvm_ucontrol_vcpu_init(vcpu);
2746 
2747 	return 0;
2748 }
2749 
2750 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2751 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2752 {
2753 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2754 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2755 	vcpu->arch.cputm_start = get_tod_clock_fast();
2756 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2757 }
2758 
2759 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2760 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2761 {
2762 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2763 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2764 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2765 	vcpu->arch.cputm_start = 0;
2766 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2767 }
2768 
2769 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2770 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2771 {
2772 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2773 	vcpu->arch.cputm_enabled = true;
2774 	__start_cpu_timer_accounting(vcpu);
2775 }
2776 
2777 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2778 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2779 {
2780 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2781 	__stop_cpu_timer_accounting(vcpu);
2782 	vcpu->arch.cputm_enabled = false;
2783 }
2784 
2785 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2786 {
2787 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2788 	__enable_cpu_timer_accounting(vcpu);
2789 	preempt_enable();
2790 }
2791 
2792 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2793 {
2794 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2795 	__disable_cpu_timer_accounting(vcpu);
2796 	preempt_enable();
2797 }
2798 
2799 /* set the cpu timer - may only be called from the VCPU thread itself */
2800 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2801 {
2802 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2803 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2804 	if (vcpu->arch.cputm_enabled)
2805 		vcpu->arch.cputm_start = get_tod_clock_fast();
2806 	vcpu->arch.sie_block->cputm = cputm;
2807 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2808 	preempt_enable();
2809 }
2810 
2811 /* update and get the cpu timer - can also be called from other VCPU threads */
2812 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2813 {
2814 	unsigned int seq;
2815 	__u64 value;
2816 
2817 	if (unlikely(!vcpu->arch.cputm_enabled))
2818 		return vcpu->arch.sie_block->cputm;
2819 
2820 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2821 	do {
2822 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2823 		/*
2824 		 * If the writer would ever execute a read in the critical
2825 		 * section, e.g. in irq context, we have a deadlock.
2826 		 */
2827 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2828 		value = vcpu->arch.sie_block->cputm;
2829 		/* if cputm_start is 0, accounting is being started/stopped */
2830 		if (likely(vcpu->arch.cputm_start))
2831 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2832 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2833 	preempt_enable();
2834 	return value;
2835 }
2836 
2837 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2838 {
2839 
2840 	gmap_enable(vcpu->arch.enabled_gmap);
2841 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2842 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2843 		__start_cpu_timer_accounting(vcpu);
2844 	vcpu->cpu = cpu;
2845 }
2846 
2847 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2848 {
2849 	vcpu->cpu = -1;
2850 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2851 		__stop_cpu_timer_accounting(vcpu);
2852 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2853 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2854 	gmap_disable(vcpu->arch.enabled_gmap);
2855 
2856 }
2857 
2858 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2859 {
2860 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2861 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2862 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2863 	kvm_s390_set_prefix(vcpu, 0);
2864 	kvm_s390_set_cpu_timer(vcpu, 0);
2865 	vcpu->arch.sie_block->ckc       = 0UL;
2866 	vcpu->arch.sie_block->todpr     = 0;
2867 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2868 	vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2869 					CR0_INTERRUPT_KEY_SUBMASK |
2870 					CR0_MEASUREMENT_ALERT_SUBMASK;
2871 	vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2872 					CR14_UNUSED_33 |
2873 					CR14_EXTERNAL_DAMAGE_SUBMASK;
2874 	/* make sure the new fpc will be lazily loaded */
2875 	save_fpu_regs();
2876 	current->thread.fpu.fpc = 0;
2877 	vcpu->arch.sie_block->gbea = 1;
2878 	vcpu->arch.sie_block->pp = 0;
2879 	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2880 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2881 	kvm_clear_async_pf_completion_queue(vcpu);
2882 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2883 		kvm_s390_vcpu_stop(vcpu);
2884 	kvm_s390_clear_local_irqs(vcpu);
2885 }
2886 
2887 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2888 {
2889 	mutex_lock(&vcpu->kvm->lock);
2890 	preempt_disable();
2891 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2892 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2893 	preempt_enable();
2894 	mutex_unlock(&vcpu->kvm->lock);
2895 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2896 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2897 		sca_add_vcpu(vcpu);
2898 	}
2899 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2900 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2901 	/* make vcpu_load load the right gmap on the first trigger */
2902 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2903 }
2904 
2905 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2906 {
2907 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2908 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2909 		return true;
2910 	return false;
2911 }
2912 
2913 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2914 {
2915 	/* At least one ECC subfunction must be present */
2916 	return kvm_has_pckmo_subfunc(kvm, 32) ||
2917 	       kvm_has_pckmo_subfunc(kvm, 33) ||
2918 	       kvm_has_pckmo_subfunc(kvm, 34) ||
2919 	       kvm_has_pckmo_subfunc(kvm, 40) ||
2920 	       kvm_has_pckmo_subfunc(kvm, 41);
2921 
2922 }
2923 
2924 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2925 {
2926 	/*
2927 	 * If the AP instructions are not being interpreted and the MSAX3
2928 	 * facility is not configured for the guest, there is nothing to set up.
2929 	 */
2930 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2931 		return;
2932 
2933 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2934 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2935 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
2936 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2937 
2938 	if (vcpu->kvm->arch.crypto.apie)
2939 		vcpu->arch.sie_block->eca |= ECA_APIE;
2940 
2941 	/* Set up protected key support */
2942 	if (vcpu->kvm->arch.crypto.aes_kw) {
2943 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2944 		/* ecc is also wrapped with AES key */
2945 		if (kvm_has_pckmo_ecc(vcpu->kvm))
2946 			vcpu->arch.sie_block->ecd |= ECD_ECC;
2947 	}
2948 
2949 	if (vcpu->kvm->arch.crypto.dea_kw)
2950 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2951 }
2952 
2953 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2954 {
2955 	free_page(vcpu->arch.sie_block->cbrlo);
2956 	vcpu->arch.sie_block->cbrlo = 0;
2957 }
2958 
2959 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2960 {
2961 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2962 	if (!vcpu->arch.sie_block->cbrlo)
2963 		return -ENOMEM;
2964 	return 0;
2965 }
2966 
2967 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2968 {
2969 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2970 
2971 	vcpu->arch.sie_block->ibc = model->ibc;
2972 	if (test_kvm_facility(vcpu->kvm, 7))
2973 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2974 }
2975 
2976 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2977 {
2978 	int rc = 0;
2979 
2980 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2981 						    CPUSTAT_SM |
2982 						    CPUSTAT_STOPPED);
2983 
2984 	if (test_kvm_facility(vcpu->kvm, 78))
2985 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2986 	else if (test_kvm_facility(vcpu->kvm, 8))
2987 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2988 
2989 	kvm_s390_vcpu_setup_model(vcpu);
2990 
2991 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2992 	if (MACHINE_HAS_ESOP)
2993 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2994 	if (test_kvm_facility(vcpu->kvm, 9))
2995 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2996 	if (test_kvm_facility(vcpu->kvm, 73))
2997 		vcpu->arch.sie_block->ecb |= ECB_TE;
2998 
2999 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3000 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3001 	if (test_kvm_facility(vcpu->kvm, 130))
3002 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3003 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3004 	if (sclp.has_cei)
3005 		vcpu->arch.sie_block->eca |= ECA_CEI;
3006 	if (sclp.has_ib)
3007 		vcpu->arch.sie_block->eca |= ECA_IB;
3008 	if (sclp.has_siif)
3009 		vcpu->arch.sie_block->eca |= ECA_SII;
3010 	if (sclp.has_sigpif)
3011 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3012 	if (test_kvm_facility(vcpu->kvm, 129)) {
3013 		vcpu->arch.sie_block->eca |= ECA_VX;
3014 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3015 	}
3016 	if (test_kvm_facility(vcpu->kvm, 139))
3017 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3018 	if (test_kvm_facility(vcpu->kvm, 156))
3019 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3020 	if (vcpu->arch.sie_block->gd) {
3021 		vcpu->arch.sie_block->eca |= ECA_AIV;
3022 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3023 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3024 	}
3025 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3026 					| SDNXC;
3027 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3028 
3029 	if (sclp.has_kss)
3030 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3031 	else
3032 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3033 
3034 	if (vcpu->kvm->arch.use_cmma) {
3035 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3036 		if (rc)
3037 			return rc;
3038 	}
3039 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3040 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3041 
3042 	vcpu->arch.sie_block->hpid = HPID_KVM;
3043 
3044 	kvm_s390_vcpu_crypto_setup(vcpu);
3045 
3046 	return rc;
3047 }
3048 
3049 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
3050 				      unsigned int id)
3051 {
3052 	struct kvm_vcpu *vcpu;
3053 	struct sie_page *sie_page;
3054 	int rc = -EINVAL;
3055 
3056 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3057 		goto out;
3058 
3059 	rc = -ENOMEM;
3060 
3061 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
3062 	if (!vcpu)
3063 		goto out;
3064 
3065 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3066 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3067 	if (!sie_page)
3068 		goto out_free_cpu;
3069 
3070 	vcpu->arch.sie_block = &sie_page->sie_block;
3071 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3072 
3073 	/* the real guest size will always be smaller than msl */
3074 	vcpu->arch.sie_block->mso = 0;
3075 	vcpu->arch.sie_block->msl = sclp.hamax;
3076 
3077 	vcpu->arch.sie_block->icpua = id;
3078 	spin_lock_init(&vcpu->arch.local_int.lock);
3079 	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
3080 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3081 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3082 	seqcount_init(&vcpu->arch.cputm_seqcount);
3083 
3084 	rc = kvm_vcpu_init(vcpu, kvm, id);
3085 	if (rc)
3086 		goto out_free_sie_block;
3087 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
3088 		 vcpu->arch.sie_block);
3089 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
3090 
3091 	return vcpu;
3092 out_free_sie_block:
3093 	free_page((unsigned long)(vcpu->arch.sie_block));
3094 out_free_cpu:
3095 	kmem_cache_free(kvm_vcpu_cache, vcpu);
3096 out:
3097 	return ERR_PTR(rc);
3098 }
3099 
3100 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3101 {
3102 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3103 }
3104 
3105 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3106 {
3107 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3108 }
3109 
3110 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3111 {
3112 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3113 	exit_sie(vcpu);
3114 }
3115 
3116 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3117 {
3118 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3119 }
3120 
3121 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3122 {
3123 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3124 	exit_sie(vcpu);
3125 }
3126 
3127 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3128 {
3129 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3130 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3131 }
3132 
3133 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3134 {
3135 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3136 }
3137 
3138 /*
3139  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3140  * If the CPU is not running (e.g. waiting as idle) the function will
3141  * return immediately. */
3142 void exit_sie(struct kvm_vcpu *vcpu)
3143 {
3144 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3145 	kvm_s390_vsie_kick(vcpu);
3146 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3147 		cpu_relax();
3148 }
3149 
3150 /* Kick a guest cpu out of SIE to process a request synchronously */
3151 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3152 {
3153 	kvm_make_request(req, vcpu);
3154 	kvm_s390_vcpu_request(vcpu);
3155 }
3156 
3157 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3158 			      unsigned long end)
3159 {
3160 	struct kvm *kvm = gmap->private;
3161 	struct kvm_vcpu *vcpu;
3162 	unsigned long prefix;
3163 	int i;
3164 
3165 	if (gmap_is_shadow(gmap))
3166 		return;
3167 	if (start >= 1UL << 31)
3168 		/* We are only interested in prefix pages */
3169 		return;
3170 	kvm_for_each_vcpu(i, vcpu, kvm) {
3171 		/* match against both prefix pages */
3172 		prefix = kvm_s390_get_prefix(vcpu);
3173 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3174 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3175 				   start, end);
3176 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3177 		}
3178 	}
3179 }
3180 
3181 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3182 {
3183 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3184 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3185 	    halt_poll_max_steal) {
3186 		vcpu->stat.halt_no_poll_steal++;
3187 		return true;
3188 	}
3189 	return false;
3190 }
3191 
3192 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3193 {
3194 	/* kvm common code refers to this, but never calls it */
3195 	BUG();
3196 	return 0;
3197 }
3198 
3199 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3200 					   struct kvm_one_reg *reg)
3201 {
3202 	int r = -EINVAL;
3203 
3204 	switch (reg->id) {
3205 	case KVM_REG_S390_TODPR:
3206 		r = put_user(vcpu->arch.sie_block->todpr,
3207 			     (u32 __user *)reg->addr);
3208 		break;
3209 	case KVM_REG_S390_EPOCHDIFF:
3210 		r = put_user(vcpu->arch.sie_block->epoch,
3211 			     (u64 __user *)reg->addr);
3212 		break;
3213 	case KVM_REG_S390_CPU_TIMER:
3214 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3215 			     (u64 __user *)reg->addr);
3216 		break;
3217 	case KVM_REG_S390_CLOCK_COMP:
3218 		r = put_user(vcpu->arch.sie_block->ckc,
3219 			     (u64 __user *)reg->addr);
3220 		break;
3221 	case KVM_REG_S390_PFTOKEN:
3222 		r = put_user(vcpu->arch.pfault_token,
3223 			     (u64 __user *)reg->addr);
3224 		break;
3225 	case KVM_REG_S390_PFCOMPARE:
3226 		r = put_user(vcpu->arch.pfault_compare,
3227 			     (u64 __user *)reg->addr);
3228 		break;
3229 	case KVM_REG_S390_PFSELECT:
3230 		r = put_user(vcpu->arch.pfault_select,
3231 			     (u64 __user *)reg->addr);
3232 		break;
3233 	case KVM_REG_S390_PP:
3234 		r = put_user(vcpu->arch.sie_block->pp,
3235 			     (u64 __user *)reg->addr);
3236 		break;
3237 	case KVM_REG_S390_GBEA:
3238 		r = put_user(vcpu->arch.sie_block->gbea,
3239 			     (u64 __user *)reg->addr);
3240 		break;
3241 	default:
3242 		break;
3243 	}
3244 
3245 	return r;
3246 }
3247 
3248 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3249 					   struct kvm_one_reg *reg)
3250 {
3251 	int r = -EINVAL;
3252 	__u64 val;
3253 
3254 	switch (reg->id) {
3255 	case KVM_REG_S390_TODPR:
3256 		r = get_user(vcpu->arch.sie_block->todpr,
3257 			     (u32 __user *)reg->addr);
3258 		break;
3259 	case KVM_REG_S390_EPOCHDIFF:
3260 		r = get_user(vcpu->arch.sie_block->epoch,
3261 			     (u64 __user *)reg->addr);
3262 		break;
3263 	case KVM_REG_S390_CPU_TIMER:
3264 		r = get_user(val, (u64 __user *)reg->addr);
3265 		if (!r)
3266 			kvm_s390_set_cpu_timer(vcpu, val);
3267 		break;
3268 	case KVM_REG_S390_CLOCK_COMP:
3269 		r = get_user(vcpu->arch.sie_block->ckc,
3270 			     (u64 __user *)reg->addr);
3271 		break;
3272 	case KVM_REG_S390_PFTOKEN:
3273 		r = get_user(vcpu->arch.pfault_token,
3274 			     (u64 __user *)reg->addr);
3275 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3276 			kvm_clear_async_pf_completion_queue(vcpu);
3277 		break;
3278 	case KVM_REG_S390_PFCOMPARE:
3279 		r = get_user(vcpu->arch.pfault_compare,
3280 			     (u64 __user *)reg->addr);
3281 		break;
3282 	case KVM_REG_S390_PFSELECT:
3283 		r = get_user(vcpu->arch.pfault_select,
3284 			     (u64 __user *)reg->addr);
3285 		break;
3286 	case KVM_REG_S390_PP:
3287 		r = get_user(vcpu->arch.sie_block->pp,
3288 			     (u64 __user *)reg->addr);
3289 		break;
3290 	case KVM_REG_S390_GBEA:
3291 		r = get_user(vcpu->arch.sie_block->gbea,
3292 			     (u64 __user *)reg->addr);
3293 		break;
3294 	default:
3295 		break;
3296 	}
3297 
3298 	return r;
3299 }
3300 
3301 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3302 {
3303 	kvm_s390_vcpu_initial_reset(vcpu);
3304 	return 0;
3305 }
3306 
3307 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3308 {
3309 	vcpu_load(vcpu);
3310 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3311 	vcpu_put(vcpu);
3312 	return 0;
3313 }
3314 
3315 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3316 {
3317 	vcpu_load(vcpu);
3318 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3319 	vcpu_put(vcpu);
3320 	return 0;
3321 }
3322 
3323 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3324 				  struct kvm_sregs *sregs)
3325 {
3326 	vcpu_load(vcpu);
3327 
3328 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3329 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3330 
3331 	vcpu_put(vcpu);
3332 	return 0;
3333 }
3334 
3335 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3336 				  struct kvm_sregs *sregs)
3337 {
3338 	vcpu_load(vcpu);
3339 
3340 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3341 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3342 
3343 	vcpu_put(vcpu);
3344 	return 0;
3345 }
3346 
3347 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3348 {
3349 	int ret = 0;
3350 
3351 	vcpu_load(vcpu);
3352 
3353 	if (test_fp_ctl(fpu->fpc)) {
3354 		ret = -EINVAL;
3355 		goto out;
3356 	}
3357 	vcpu->run->s.regs.fpc = fpu->fpc;
3358 	if (MACHINE_HAS_VX)
3359 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3360 				 (freg_t *) fpu->fprs);
3361 	else
3362 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3363 
3364 out:
3365 	vcpu_put(vcpu);
3366 	return ret;
3367 }
3368 
3369 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3370 {
3371 	vcpu_load(vcpu);
3372 
3373 	/* make sure we have the latest values */
3374 	save_fpu_regs();
3375 	if (MACHINE_HAS_VX)
3376 		convert_vx_to_fp((freg_t *) fpu->fprs,
3377 				 (__vector128 *) vcpu->run->s.regs.vrs);
3378 	else
3379 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3380 	fpu->fpc = vcpu->run->s.regs.fpc;
3381 
3382 	vcpu_put(vcpu);
3383 	return 0;
3384 }
3385 
3386 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3387 {
3388 	int rc = 0;
3389 
3390 	if (!is_vcpu_stopped(vcpu))
3391 		rc = -EBUSY;
3392 	else {
3393 		vcpu->run->psw_mask = psw.mask;
3394 		vcpu->run->psw_addr = psw.addr;
3395 	}
3396 	return rc;
3397 }
3398 
3399 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3400 				  struct kvm_translation *tr)
3401 {
3402 	return -EINVAL; /* not implemented yet */
3403 }
3404 
3405 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3406 			      KVM_GUESTDBG_USE_HW_BP | \
3407 			      KVM_GUESTDBG_ENABLE)
3408 
3409 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3410 					struct kvm_guest_debug *dbg)
3411 {
3412 	int rc = 0;
3413 
3414 	vcpu_load(vcpu);
3415 
3416 	vcpu->guest_debug = 0;
3417 	kvm_s390_clear_bp_data(vcpu);
3418 
3419 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3420 		rc = -EINVAL;
3421 		goto out;
3422 	}
3423 	if (!sclp.has_gpere) {
3424 		rc = -EINVAL;
3425 		goto out;
3426 	}
3427 
3428 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3429 		vcpu->guest_debug = dbg->control;
3430 		/* enforce guest PER */
3431 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3432 
3433 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3434 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3435 	} else {
3436 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3437 		vcpu->arch.guestdbg.last_bp = 0;
3438 	}
3439 
3440 	if (rc) {
3441 		vcpu->guest_debug = 0;
3442 		kvm_s390_clear_bp_data(vcpu);
3443 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3444 	}
3445 
3446 out:
3447 	vcpu_put(vcpu);
3448 	return rc;
3449 }
3450 
3451 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3452 				    struct kvm_mp_state *mp_state)
3453 {
3454 	int ret;
3455 
3456 	vcpu_load(vcpu);
3457 
3458 	/* CHECK_STOP and LOAD are not supported yet */
3459 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3460 				      KVM_MP_STATE_OPERATING;
3461 
3462 	vcpu_put(vcpu);
3463 	return ret;
3464 }
3465 
3466 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3467 				    struct kvm_mp_state *mp_state)
3468 {
3469 	int rc = 0;
3470 
3471 	vcpu_load(vcpu);
3472 
3473 	/* user space knows about this interface - let it control the state */
3474 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3475 
3476 	switch (mp_state->mp_state) {
3477 	case KVM_MP_STATE_STOPPED:
3478 		kvm_s390_vcpu_stop(vcpu);
3479 		break;
3480 	case KVM_MP_STATE_OPERATING:
3481 		kvm_s390_vcpu_start(vcpu);
3482 		break;
3483 	case KVM_MP_STATE_LOAD:
3484 	case KVM_MP_STATE_CHECK_STOP:
3485 		/* fall through - CHECK_STOP and LOAD are not supported yet */
3486 	default:
3487 		rc = -ENXIO;
3488 	}
3489 
3490 	vcpu_put(vcpu);
3491 	return rc;
3492 }
3493 
3494 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3495 {
3496 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3497 }
3498 
3499 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3500 {
3501 retry:
3502 	kvm_s390_vcpu_request_handled(vcpu);
3503 	if (!kvm_request_pending(vcpu))
3504 		return 0;
3505 	/*
3506 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3507 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3508 	 * This ensures that the ipte instruction for this request has
3509 	 * already finished. We might race against a second unmapper that
3510 	 * wants to set the blocking bit. Lets just retry the request loop.
3511 	 */
3512 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3513 		int rc;
3514 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3515 					  kvm_s390_get_prefix(vcpu),
3516 					  PAGE_SIZE * 2, PROT_WRITE);
3517 		if (rc) {
3518 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3519 			return rc;
3520 		}
3521 		goto retry;
3522 	}
3523 
3524 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3525 		vcpu->arch.sie_block->ihcpu = 0xffff;
3526 		goto retry;
3527 	}
3528 
3529 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3530 		if (!ibs_enabled(vcpu)) {
3531 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3532 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3533 		}
3534 		goto retry;
3535 	}
3536 
3537 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3538 		if (ibs_enabled(vcpu)) {
3539 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3540 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3541 		}
3542 		goto retry;
3543 	}
3544 
3545 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3546 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3547 		goto retry;
3548 	}
3549 
3550 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3551 		/*
3552 		 * Disable CMM virtualization; we will emulate the ESSA
3553 		 * instruction manually, in order to provide additional
3554 		 * functionalities needed for live migration.
3555 		 */
3556 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3557 		goto retry;
3558 	}
3559 
3560 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3561 		/*
3562 		 * Re-enable CMM virtualization if CMMA is available and
3563 		 * CMM has been used.
3564 		 */
3565 		if ((vcpu->kvm->arch.use_cmma) &&
3566 		    (vcpu->kvm->mm->context.uses_cmm))
3567 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3568 		goto retry;
3569 	}
3570 
3571 	/* nothing to do, just clear the request */
3572 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3573 	/* we left the vsie handler, nothing to do, just clear the request */
3574 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3575 
3576 	return 0;
3577 }
3578 
3579 void kvm_s390_set_tod_clock(struct kvm *kvm,
3580 			    const struct kvm_s390_vm_tod_clock *gtod)
3581 {
3582 	struct kvm_vcpu *vcpu;
3583 	struct kvm_s390_tod_clock_ext htod;
3584 	int i;
3585 
3586 	mutex_lock(&kvm->lock);
3587 	preempt_disable();
3588 
3589 	get_tod_clock_ext((char *)&htod);
3590 
3591 	kvm->arch.epoch = gtod->tod - htod.tod;
3592 	kvm->arch.epdx = 0;
3593 	if (test_kvm_facility(kvm, 139)) {
3594 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3595 		if (kvm->arch.epoch > gtod->tod)
3596 			kvm->arch.epdx -= 1;
3597 	}
3598 
3599 	kvm_s390_vcpu_block_all(kvm);
3600 	kvm_for_each_vcpu(i, vcpu, kvm) {
3601 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3602 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3603 	}
3604 
3605 	kvm_s390_vcpu_unblock_all(kvm);
3606 	preempt_enable();
3607 	mutex_unlock(&kvm->lock);
3608 }
3609 
3610 /**
3611  * kvm_arch_fault_in_page - fault-in guest page if necessary
3612  * @vcpu: The corresponding virtual cpu
3613  * @gpa: Guest physical address
3614  * @writable: Whether the page should be writable or not
3615  *
3616  * Make sure that a guest page has been faulted-in on the host.
3617  *
3618  * Return: Zero on success, negative error code otherwise.
3619  */
3620 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3621 {
3622 	return gmap_fault(vcpu->arch.gmap, gpa,
3623 			  writable ? FAULT_FLAG_WRITE : 0);
3624 }
3625 
3626 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3627 				      unsigned long token)
3628 {
3629 	struct kvm_s390_interrupt inti;
3630 	struct kvm_s390_irq irq;
3631 
3632 	if (start_token) {
3633 		irq.u.ext.ext_params2 = token;
3634 		irq.type = KVM_S390_INT_PFAULT_INIT;
3635 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3636 	} else {
3637 		inti.type = KVM_S390_INT_PFAULT_DONE;
3638 		inti.parm64 = token;
3639 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3640 	}
3641 }
3642 
3643 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3644 				     struct kvm_async_pf *work)
3645 {
3646 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3647 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3648 }
3649 
3650 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3651 				 struct kvm_async_pf *work)
3652 {
3653 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3654 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3655 }
3656 
3657 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3658 			       struct kvm_async_pf *work)
3659 {
3660 	/* s390 will always inject the page directly */
3661 }
3662 
3663 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3664 {
3665 	/*
3666 	 * s390 will always inject the page directly,
3667 	 * but we still want check_async_completion to cleanup
3668 	 */
3669 	return true;
3670 }
3671 
3672 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3673 {
3674 	hva_t hva;
3675 	struct kvm_arch_async_pf arch;
3676 	int rc;
3677 
3678 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3679 		return 0;
3680 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3681 	    vcpu->arch.pfault_compare)
3682 		return 0;
3683 	if (psw_extint_disabled(vcpu))
3684 		return 0;
3685 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3686 		return 0;
3687 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3688 		return 0;
3689 	if (!vcpu->arch.gmap->pfault_enabled)
3690 		return 0;
3691 
3692 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3693 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3694 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3695 		return 0;
3696 
3697 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3698 	return rc;
3699 }
3700 
3701 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3702 {
3703 	int rc, cpuflags;
3704 
3705 	/*
3706 	 * On s390 notifications for arriving pages will be delivered directly
3707 	 * to the guest but the house keeping for completed pfaults is
3708 	 * handled outside the worker.
3709 	 */
3710 	kvm_check_async_pf_completion(vcpu);
3711 
3712 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3713 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3714 
3715 	if (need_resched())
3716 		schedule();
3717 
3718 	if (test_cpu_flag(CIF_MCCK_PENDING))
3719 		s390_handle_mcck();
3720 
3721 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3722 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3723 		if (rc)
3724 			return rc;
3725 	}
3726 
3727 	rc = kvm_s390_handle_requests(vcpu);
3728 	if (rc)
3729 		return rc;
3730 
3731 	if (guestdbg_enabled(vcpu)) {
3732 		kvm_s390_backup_guest_per_regs(vcpu);
3733 		kvm_s390_patch_guest_per_regs(vcpu);
3734 	}
3735 
3736 	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3737 
3738 	vcpu->arch.sie_block->icptcode = 0;
3739 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3740 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3741 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3742 
3743 	return 0;
3744 }
3745 
3746 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3747 {
3748 	struct kvm_s390_pgm_info pgm_info = {
3749 		.code = PGM_ADDRESSING,
3750 	};
3751 	u8 opcode, ilen;
3752 	int rc;
3753 
3754 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3755 	trace_kvm_s390_sie_fault(vcpu);
3756 
3757 	/*
3758 	 * We want to inject an addressing exception, which is defined as a
3759 	 * suppressing or terminating exception. However, since we came here
3760 	 * by a DAT access exception, the PSW still points to the faulting
3761 	 * instruction since DAT exceptions are nullifying. So we've got
3762 	 * to look up the current opcode to get the length of the instruction
3763 	 * to be able to forward the PSW.
3764 	 */
3765 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3766 	ilen = insn_length(opcode);
3767 	if (rc < 0) {
3768 		return rc;
3769 	} else if (rc) {
3770 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3771 		 * Forward by arbitrary ilc, injection will take care of
3772 		 * nullification if necessary.
3773 		 */
3774 		pgm_info = vcpu->arch.pgm;
3775 		ilen = 4;
3776 	}
3777 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3778 	kvm_s390_forward_psw(vcpu, ilen);
3779 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3780 }
3781 
3782 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3783 {
3784 	struct mcck_volatile_info *mcck_info;
3785 	struct sie_page *sie_page;
3786 
3787 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3788 		   vcpu->arch.sie_block->icptcode);
3789 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3790 
3791 	if (guestdbg_enabled(vcpu))
3792 		kvm_s390_restore_guest_per_regs(vcpu);
3793 
3794 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3795 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3796 
3797 	if (exit_reason == -EINTR) {
3798 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3799 		sie_page = container_of(vcpu->arch.sie_block,
3800 					struct sie_page, sie_block);
3801 		mcck_info = &sie_page->mcck_info;
3802 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3803 		return 0;
3804 	}
3805 
3806 	if (vcpu->arch.sie_block->icptcode > 0) {
3807 		int rc = kvm_handle_sie_intercept(vcpu);
3808 
3809 		if (rc != -EOPNOTSUPP)
3810 			return rc;
3811 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3812 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3813 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3814 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3815 		return -EREMOTE;
3816 	} else if (exit_reason != -EFAULT) {
3817 		vcpu->stat.exit_null++;
3818 		return 0;
3819 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3820 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3821 		vcpu->run->s390_ucontrol.trans_exc_code =
3822 						current->thread.gmap_addr;
3823 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3824 		return -EREMOTE;
3825 	} else if (current->thread.gmap_pfault) {
3826 		trace_kvm_s390_major_guest_pfault(vcpu);
3827 		current->thread.gmap_pfault = 0;
3828 		if (kvm_arch_setup_async_pf(vcpu))
3829 			return 0;
3830 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3831 	}
3832 	return vcpu_post_run_fault_in_sie(vcpu);
3833 }
3834 
3835 static int __vcpu_run(struct kvm_vcpu *vcpu)
3836 {
3837 	int rc, exit_reason;
3838 
3839 	/*
3840 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3841 	 * ning the guest), so that memslots (and other stuff) are protected
3842 	 */
3843 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3844 
3845 	do {
3846 		rc = vcpu_pre_run(vcpu);
3847 		if (rc)
3848 			break;
3849 
3850 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3851 		/*
3852 		 * As PF_VCPU will be used in fault handler, between
3853 		 * guest_enter and guest_exit should be no uaccess.
3854 		 */
3855 		local_irq_disable();
3856 		guest_enter_irqoff();
3857 		__disable_cpu_timer_accounting(vcpu);
3858 		local_irq_enable();
3859 		exit_reason = sie64a(vcpu->arch.sie_block,
3860 				     vcpu->run->s.regs.gprs);
3861 		local_irq_disable();
3862 		__enable_cpu_timer_accounting(vcpu);
3863 		guest_exit_irqoff();
3864 		local_irq_enable();
3865 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3866 
3867 		rc = vcpu_post_run(vcpu, exit_reason);
3868 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3869 
3870 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3871 	return rc;
3872 }
3873 
3874 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3875 {
3876 	struct runtime_instr_cb *riccb;
3877 	struct gs_cb *gscb;
3878 
3879 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3880 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3881 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3882 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3883 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3884 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3885 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3886 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3887 		/* some control register changes require a tlb flush */
3888 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3889 	}
3890 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3891 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3892 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3893 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3894 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3895 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3896 	}
3897 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3898 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3899 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3900 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3901 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3902 			kvm_clear_async_pf_completion_queue(vcpu);
3903 	}
3904 	/*
3905 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3906 	 * we should enable RI here instead of doing the lazy enablement.
3907 	 */
3908 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3909 	    test_kvm_facility(vcpu->kvm, 64) &&
3910 	    riccb->v &&
3911 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3912 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3913 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3914 	}
3915 	/*
3916 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3917 	 * we should enable GS here instead of doing the lazy enablement.
3918 	 */
3919 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3920 	    test_kvm_facility(vcpu->kvm, 133) &&
3921 	    gscb->gssm &&
3922 	    !vcpu->arch.gs_enabled) {
3923 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3924 		vcpu->arch.sie_block->ecb |= ECB_GS;
3925 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3926 		vcpu->arch.gs_enabled = 1;
3927 	}
3928 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3929 	    test_kvm_facility(vcpu->kvm, 82)) {
3930 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3931 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3932 	}
3933 	save_access_regs(vcpu->arch.host_acrs);
3934 	restore_access_regs(vcpu->run->s.regs.acrs);
3935 	/* save host (userspace) fprs/vrs */
3936 	save_fpu_regs();
3937 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3938 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3939 	if (MACHINE_HAS_VX)
3940 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3941 	else
3942 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3943 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3944 	if (test_fp_ctl(current->thread.fpu.fpc))
3945 		/* User space provided an invalid FPC, let's clear it */
3946 		current->thread.fpu.fpc = 0;
3947 	if (MACHINE_HAS_GS) {
3948 		preempt_disable();
3949 		__ctl_set_bit(2, 4);
3950 		if (current->thread.gs_cb) {
3951 			vcpu->arch.host_gscb = current->thread.gs_cb;
3952 			save_gs_cb(vcpu->arch.host_gscb);
3953 		}
3954 		if (vcpu->arch.gs_enabled) {
3955 			current->thread.gs_cb = (struct gs_cb *)
3956 						&vcpu->run->s.regs.gscb;
3957 			restore_gs_cb(current->thread.gs_cb);
3958 		}
3959 		preempt_enable();
3960 	}
3961 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
3962 
3963 	kvm_run->kvm_dirty_regs = 0;
3964 }
3965 
3966 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3967 {
3968 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3969 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3970 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3971 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3972 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3973 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3974 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3975 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3976 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3977 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3978 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3979 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3980 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3981 	save_access_regs(vcpu->run->s.regs.acrs);
3982 	restore_access_regs(vcpu->arch.host_acrs);
3983 	/* Save guest register state */
3984 	save_fpu_regs();
3985 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3986 	/* Restore will be done lazily at return */
3987 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3988 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3989 	if (MACHINE_HAS_GS) {
3990 		__ctl_set_bit(2, 4);
3991 		if (vcpu->arch.gs_enabled)
3992 			save_gs_cb(current->thread.gs_cb);
3993 		preempt_disable();
3994 		current->thread.gs_cb = vcpu->arch.host_gscb;
3995 		restore_gs_cb(vcpu->arch.host_gscb);
3996 		preempt_enable();
3997 		if (!vcpu->arch.host_gscb)
3998 			__ctl_clear_bit(2, 4);
3999 		vcpu->arch.host_gscb = NULL;
4000 	}
4001 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4002 }
4003 
4004 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4005 {
4006 	int rc;
4007 
4008 	if (kvm_run->immediate_exit)
4009 		return -EINTR;
4010 
4011 	vcpu_load(vcpu);
4012 
4013 	if (guestdbg_exit_pending(vcpu)) {
4014 		kvm_s390_prepare_debug_exit(vcpu);
4015 		rc = 0;
4016 		goto out;
4017 	}
4018 
4019 	kvm_sigset_activate(vcpu);
4020 
4021 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4022 		kvm_s390_vcpu_start(vcpu);
4023 	} else if (is_vcpu_stopped(vcpu)) {
4024 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4025 				   vcpu->vcpu_id);
4026 		rc = -EINVAL;
4027 		goto out;
4028 	}
4029 
4030 	sync_regs(vcpu, kvm_run);
4031 	enable_cpu_timer_accounting(vcpu);
4032 
4033 	might_fault();
4034 	rc = __vcpu_run(vcpu);
4035 
4036 	if (signal_pending(current) && !rc) {
4037 		kvm_run->exit_reason = KVM_EXIT_INTR;
4038 		rc = -EINTR;
4039 	}
4040 
4041 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4042 		kvm_s390_prepare_debug_exit(vcpu);
4043 		rc = 0;
4044 	}
4045 
4046 	if (rc == -EREMOTE) {
4047 		/* userspace support is needed, kvm_run has been prepared */
4048 		rc = 0;
4049 	}
4050 
4051 	disable_cpu_timer_accounting(vcpu);
4052 	store_regs(vcpu, kvm_run);
4053 
4054 	kvm_sigset_deactivate(vcpu);
4055 
4056 	vcpu->stat.exit_userspace++;
4057 out:
4058 	vcpu_put(vcpu);
4059 	return rc;
4060 }
4061 
4062 /*
4063  * store status at address
4064  * we use have two special cases:
4065  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4066  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4067  */
4068 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4069 {
4070 	unsigned char archmode = 1;
4071 	freg_t fprs[NUM_FPRS];
4072 	unsigned int px;
4073 	u64 clkcomp, cputm;
4074 	int rc;
4075 
4076 	px = kvm_s390_get_prefix(vcpu);
4077 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4078 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4079 			return -EFAULT;
4080 		gpa = 0;
4081 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4082 		if (write_guest_real(vcpu, 163, &archmode, 1))
4083 			return -EFAULT;
4084 		gpa = px;
4085 	} else
4086 		gpa -= __LC_FPREGS_SAVE_AREA;
4087 
4088 	/* manually convert vector registers if necessary */
4089 	if (MACHINE_HAS_VX) {
4090 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4091 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4092 				     fprs, 128);
4093 	} else {
4094 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4095 				     vcpu->run->s.regs.fprs, 128);
4096 	}
4097 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4098 			      vcpu->run->s.regs.gprs, 128);
4099 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4100 			      &vcpu->arch.sie_block->gpsw, 16);
4101 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4102 			      &px, 4);
4103 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4104 			      &vcpu->run->s.regs.fpc, 4);
4105 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4106 			      &vcpu->arch.sie_block->todpr, 4);
4107 	cputm = kvm_s390_get_cpu_timer(vcpu);
4108 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4109 			      &cputm, 8);
4110 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4111 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4112 			      &clkcomp, 8);
4113 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4114 			      &vcpu->run->s.regs.acrs, 64);
4115 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4116 			      &vcpu->arch.sie_block->gcr, 128);
4117 	return rc ? -EFAULT : 0;
4118 }
4119 
4120 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4121 {
4122 	/*
4123 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4124 	 * switch in the run ioctl. Let's update our copies before we save
4125 	 * it into the save area
4126 	 */
4127 	save_fpu_regs();
4128 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4129 	save_access_regs(vcpu->run->s.regs.acrs);
4130 
4131 	return kvm_s390_store_status_unloaded(vcpu, addr);
4132 }
4133 
4134 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4135 {
4136 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4137 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4138 }
4139 
4140 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4141 {
4142 	unsigned int i;
4143 	struct kvm_vcpu *vcpu;
4144 
4145 	kvm_for_each_vcpu(i, vcpu, kvm) {
4146 		__disable_ibs_on_vcpu(vcpu);
4147 	}
4148 }
4149 
4150 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4151 {
4152 	if (!sclp.has_ibs)
4153 		return;
4154 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4155 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4156 }
4157 
4158 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4159 {
4160 	int i, online_vcpus, started_vcpus = 0;
4161 
4162 	if (!is_vcpu_stopped(vcpu))
4163 		return;
4164 
4165 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4166 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4167 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4168 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4169 
4170 	for (i = 0; i < online_vcpus; i++) {
4171 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4172 			started_vcpus++;
4173 	}
4174 
4175 	if (started_vcpus == 0) {
4176 		/* we're the only active VCPU -> speed it up */
4177 		__enable_ibs_on_vcpu(vcpu);
4178 	} else if (started_vcpus == 1) {
4179 		/*
4180 		 * As we are starting a second VCPU, we have to disable
4181 		 * the IBS facility on all VCPUs to remove potentially
4182 		 * oustanding ENABLE requests.
4183 		 */
4184 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4185 	}
4186 
4187 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4188 	/*
4189 	 * Another VCPU might have used IBS while we were offline.
4190 	 * Let's play safe and flush the VCPU at startup.
4191 	 */
4192 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4193 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4194 	return;
4195 }
4196 
4197 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4198 {
4199 	int i, online_vcpus, started_vcpus = 0;
4200 	struct kvm_vcpu *started_vcpu = NULL;
4201 
4202 	if (is_vcpu_stopped(vcpu))
4203 		return;
4204 
4205 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4206 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4207 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4208 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4209 
4210 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4211 	kvm_s390_clear_stop_irq(vcpu);
4212 
4213 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4214 	__disable_ibs_on_vcpu(vcpu);
4215 
4216 	for (i = 0; i < online_vcpus; i++) {
4217 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4218 			started_vcpus++;
4219 			started_vcpu = vcpu->kvm->vcpus[i];
4220 		}
4221 	}
4222 
4223 	if (started_vcpus == 1) {
4224 		/*
4225 		 * As we only have one VCPU left, we want to enable the
4226 		 * IBS facility for that VCPU to speed it up.
4227 		 */
4228 		__enable_ibs_on_vcpu(started_vcpu);
4229 	}
4230 
4231 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4232 	return;
4233 }
4234 
4235 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4236 				     struct kvm_enable_cap *cap)
4237 {
4238 	int r;
4239 
4240 	if (cap->flags)
4241 		return -EINVAL;
4242 
4243 	switch (cap->cap) {
4244 	case KVM_CAP_S390_CSS_SUPPORT:
4245 		if (!vcpu->kvm->arch.css_support) {
4246 			vcpu->kvm->arch.css_support = 1;
4247 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4248 			trace_kvm_s390_enable_css(vcpu->kvm);
4249 		}
4250 		r = 0;
4251 		break;
4252 	default:
4253 		r = -EINVAL;
4254 		break;
4255 	}
4256 	return r;
4257 }
4258 
4259 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4260 				  struct kvm_s390_mem_op *mop)
4261 {
4262 	void __user *uaddr = (void __user *)mop->buf;
4263 	void *tmpbuf = NULL;
4264 	int r, srcu_idx;
4265 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4266 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4267 
4268 	if (mop->flags & ~supported_flags)
4269 		return -EINVAL;
4270 
4271 	if (mop->size > MEM_OP_MAX_SIZE)
4272 		return -E2BIG;
4273 
4274 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4275 		tmpbuf = vmalloc(mop->size);
4276 		if (!tmpbuf)
4277 			return -ENOMEM;
4278 	}
4279 
4280 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4281 
4282 	switch (mop->op) {
4283 	case KVM_S390_MEMOP_LOGICAL_READ:
4284 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4285 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4286 					    mop->size, GACC_FETCH);
4287 			break;
4288 		}
4289 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4290 		if (r == 0) {
4291 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4292 				r = -EFAULT;
4293 		}
4294 		break;
4295 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4296 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4297 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4298 					    mop->size, GACC_STORE);
4299 			break;
4300 		}
4301 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4302 			r = -EFAULT;
4303 			break;
4304 		}
4305 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4306 		break;
4307 	default:
4308 		r = -EINVAL;
4309 	}
4310 
4311 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4312 
4313 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4314 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4315 
4316 	vfree(tmpbuf);
4317 	return r;
4318 }
4319 
4320 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4321 			       unsigned int ioctl, unsigned long arg)
4322 {
4323 	struct kvm_vcpu *vcpu = filp->private_data;
4324 	void __user *argp = (void __user *)arg;
4325 
4326 	switch (ioctl) {
4327 	case KVM_S390_IRQ: {
4328 		struct kvm_s390_irq s390irq;
4329 
4330 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4331 			return -EFAULT;
4332 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4333 	}
4334 	case KVM_S390_INTERRUPT: {
4335 		struct kvm_s390_interrupt s390int;
4336 		struct kvm_s390_irq s390irq;
4337 
4338 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4339 			return -EFAULT;
4340 		if (s390int_to_s390irq(&s390int, &s390irq))
4341 			return -EINVAL;
4342 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4343 	}
4344 	}
4345 	return -ENOIOCTLCMD;
4346 }
4347 
4348 long kvm_arch_vcpu_ioctl(struct file *filp,
4349 			 unsigned int ioctl, unsigned long arg)
4350 {
4351 	struct kvm_vcpu *vcpu = filp->private_data;
4352 	void __user *argp = (void __user *)arg;
4353 	int idx;
4354 	long r;
4355 
4356 	vcpu_load(vcpu);
4357 
4358 	switch (ioctl) {
4359 	case KVM_S390_STORE_STATUS:
4360 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4361 		r = kvm_s390_vcpu_store_status(vcpu, arg);
4362 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4363 		break;
4364 	case KVM_S390_SET_INITIAL_PSW: {
4365 		psw_t psw;
4366 
4367 		r = -EFAULT;
4368 		if (copy_from_user(&psw, argp, sizeof(psw)))
4369 			break;
4370 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4371 		break;
4372 	}
4373 	case KVM_S390_INITIAL_RESET:
4374 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4375 		break;
4376 	case KVM_SET_ONE_REG:
4377 	case KVM_GET_ONE_REG: {
4378 		struct kvm_one_reg reg;
4379 		r = -EFAULT;
4380 		if (copy_from_user(&reg, argp, sizeof(reg)))
4381 			break;
4382 		if (ioctl == KVM_SET_ONE_REG)
4383 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4384 		else
4385 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4386 		break;
4387 	}
4388 #ifdef CONFIG_KVM_S390_UCONTROL
4389 	case KVM_S390_UCAS_MAP: {
4390 		struct kvm_s390_ucas_mapping ucasmap;
4391 
4392 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4393 			r = -EFAULT;
4394 			break;
4395 		}
4396 
4397 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4398 			r = -EINVAL;
4399 			break;
4400 		}
4401 
4402 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4403 				     ucasmap.vcpu_addr, ucasmap.length);
4404 		break;
4405 	}
4406 	case KVM_S390_UCAS_UNMAP: {
4407 		struct kvm_s390_ucas_mapping ucasmap;
4408 
4409 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4410 			r = -EFAULT;
4411 			break;
4412 		}
4413 
4414 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4415 			r = -EINVAL;
4416 			break;
4417 		}
4418 
4419 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4420 			ucasmap.length);
4421 		break;
4422 	}
4423 #endif
4424 	case KVM_S390_VCPU_FAULT: {
4425 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4426 		break;
4427 	}
4428 	case KVM_ENABLE_CAP:
4429 	{
4430 		struct kvm_enable_cap cap;
4431 		r = -EFAULT;
4432 		if (copy_from_user(&cap, argp, sizeof(cap)))
4433 			break;
4434 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4435 		break;
4436 	}
4437 	case KVM_S390_MEM_OP: {
4438 		struct kvm_s390_mem_op mem_op;
4439 
4440 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4441 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4442 		else
4443 			r = -EFAULT;
4444 		break;
4445 	}
4446 	case KVM_S390_SET_IRQ_STATE: {
4447 		struct kvm_s390_irq_state irq_state;
4448 
4449 		r = -EFAULT;
4450 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4451 			break;
4452 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4453 		    irq_state.len == 0 ||
4454 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4455 			r = -EINVAL;
4456 			break;
4457 		}
4458 		/* do not use irq_state.flags, it will break old QEMUs */
4459 		r = kvm_s390_set_irq_state(vcpu,
4460 					   (void __user *) irq_state.buf,
4461 					   irq_state.len);
4462 		break;
4463 	}
4464 	case KVM_S390_GET_IRQ_STATE: {
4465 		struct kvm_s390_irq_state irq_state;
4466 
4467 		r = -EFAULT;
4468 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4469 			break;
4470 		if (irq_state.len == 0) {
4471 			r = -EINVAL;
4472 			break;
4473 		}
4474 		/* do not use irq_state.flags, it will break old QEMUs */
4475 		r = kvm_s390_get_irq_state(vcpu,
4476 					   (__u8 __user *)  irq_state.buf,
4477 					   irq_state.len);
4478 		break;
4479 	}
4480 	default:
4481 		r = -ENOTTY;
4482 	}
4483 
4484 	vcpu_put(vcpu);
4485 	return r;
4486 }
4487 
4488 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4489 {
4490 #ifdef CONFIG_KVM_S390_UCONTROL
4491 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4492 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4493 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4494 		get_page(vmf->page);
4495 		return 0;
4496 	}
4497 #endif
4498 	return VM_FAULT_SIGBUS;
4499 }
4500 
4501 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4502 			    unsigned long npages)
4503 {
4504 	return 0;
4505 }
4506 
4507 /* Section: memory related */
4508 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4509 				   struct kvm_memory_slot *memslot,
4510 				   const struct kvm_userspace_memory_region *mem,
4511 				   enum kvm_mr_change change)
4512 {
4513 	/* A few sanity checks. We can have memory slots which have to be
4514 	   located/ended at a segment boundary (1MB). The memory in userland is
4515 	   ok to be fragmented into various different vmas. It is okay to mmap()
4516 	   and munmap() stuff in this slot after doing this call at any time */
4517 
4518 	if (mem->userspace_addr & 0xffffful)
4519 		return -EINVAL;
4520 
4521 	if (mem->memory_size & 0xffffful)
4522 		return -EINVAL;
4523 
4524 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4525 		return -EINVAL;
4526 
4527 	return 0;
4528 }
4529 
4530 void kvm_arch_commit_memory_region(struct kvm *kvm,
4531 				const struct kvm_userspace_memory_region *mem,
4532 				const struct kvm_memory_slot *old,
4533 				const struct kvm_memory_slot *new,
4534 				enum kvm_mr_change change)
4535 {
4536 	int rc = 0;
4537 
4538 	switch (change) {
4539 	case KVM_MR_DELETE:
4540 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4541 					old->npages * PAGE_SIZE);
4542 		break;
4543 	case KVM_MR_MOVE:
4544 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4545 					old->npages * PAGE_SIZE);
4546 		if (rc)
4547 			break;
4548 		/* FALLTHROUGH */
4549 	case KVM_MR_CREATE:
4550 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4551 				      mem->guest_phys_addr, mem->memory_size);
4552 		break;
4553 	case KVM_MR_FLAGS_ONLY:
4554 		break;
4555 	default:
4556 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
4557 	}
4558 	if (rc)
4559 		pr_warn("failed to commit memory region\n");
4560 	return;
4561 }
4562 
4563 static inline unsigned long nonhyp_mask(int i)
4564 {
4565 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4566 
4567 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4568 }
4569 
4570 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4571 {
4572 	vcpu->valid_wakeup = false;
4573 }
4574 
4575 static int __init kvm_s390_init(void)
4576 {
4577 	int i;
4578 
4579 	if (!sclp.has_sief2) {
4580 		pr_info("SIE is not available\n");
4581 		return -ENODEV;
4582 	}
4583 
4584 	if (nested && hpage) {
4585 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4586 		return -EINVAL;
4587 	}
4588 
4589 	for (i = 0; i < 16; i++)
4590 		kvm_s390_fac_base[i] |=
4591 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4592 
4593 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4594 }
4595 
4596 static void __exit kvm_s390_exit(void)
4597 {
4598 	kvm_exit();
4599 }
4600 
4601 module_init(kvm_s390_init);
4602 module_exit(kvm_s390_exit);
4603 
4604 /*
4605  * Enable autoloading of the kvm module.
4606  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4607  * since x86 takes a different approach.
4608  */
4609 #include <linux/miscdevice.h>
4610 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4611 MODULE_ALIAS("devname:kvm");
4612