xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision ec2da07c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include "kvm-s390.h"
48 #include "gaccess.h"
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53 
54 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
58 
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61 
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
64 	{ "exit_null", VCPU_STAT(exit_null) },
65 	{ "exit_validity", VCPU_STAT(exit_validity) },
66 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
67 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
68 	{ "exit_io_request", VCPU_STAT(exit_io_request) },
69 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
71 	{ "exit_pei", VCPU_STAT(exit_pei) },
72 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 	{ "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
79 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
85 	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
86 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
87 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
88 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
89 	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
90 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
91 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
92 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
93 	{ "deliver_program", VCPU_STAT(deliver_program) },
94 	{ "deliver_io", VCPU_STAT(deliver_io) },
95 	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
96 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
97 	{ "inject_ckc", VCPU_STAT(inject_ckc) },
98 	{ "inject_cputm", VCPU_STAT(inject_cputm) },
99 	{ "inject_external_call", VCPU_STAT(inject_external_call) },
100 	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
101 	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
102 	{ "inject_io", VM_STAT(inject_io) },
103 	{ "inject_mchk", VCPU_STAT(inject_mchk) },
104 	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
105 	{ "inject_program", VCPU_STAT(inject_program) },
106 	{ "inject_restart", VCPU_STAT(inject_restart) },
107 	{ "inject_service_signal", VM_STAT(inject_service_signal) },
108 	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
109 	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
110 	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
111 	{ "inject_virtio", VM_STAT(inject_virtio) },
112 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
113 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
114 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
115 	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
116 	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
117 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
118 	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
119 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
120 	{ "instruction_sck", VCPU_STAT(instruction_sck) },
121 	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
122 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
123 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
124 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
125 	{ "instruction_iske", VCPU_STAT(instruction_iske) },
126 	{ "instruction_ri", VCPU_STAT(instruction_ri) },
127 	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
128 	{ "instruction_sske", VCPU_STAT(instruction_sske) },
129 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
130 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
131 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
132 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
133 	{ "instruction_tb", VCPU_STAT(instruction_tb) },
134 	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
135 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
136 	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
137 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
138 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
139 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
140 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
141 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
142 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
143 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
144 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
145 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
146 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
147 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
148 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
149 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
150 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
151 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
152 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
153 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
154 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
155 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
156 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
157 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
158 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
159 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
160 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
161 	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
162 	{ NULL }
163 };
164 
165 struct kvm_s390_tod_clock_ext {
166 	__u8 epoch_idx;
167 	__u64 tod;
168 	__u8 reserved[7];
169 } __packed;
170 
171 /* allow nested virtualization in KVM (if enabled by user space) */
172 static int nested;
173 module_param(nested, int, S_IRUGO);
174 MODULE_PARM_DESC(nested, "Nested virtualization support");
175 
176 /* allow 1m huge page guest backing, if !nested */
177 static int hpage;
178 module_param(hpage, int, 0444);
179 MODULE_PARM_DESC(hpage, "1m huge page backing support");
180 
181 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
182 static u8 halt_poll_max_steal = 10;
183 module_param(halt_poll_max_steal, byte, 0644);
184 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
185 
186 /*
187  * For now we handle at most 16 double words as this is what the s390 base
188  * kernel handles and stores in the prefix page. If we ever need to go beyond
189  * this, this requires changes to code, but the external uapi can stay.
190  */
191 #define SIZE_INTERNAL 16
192 
193 /*
194  * Base feature mask that defines default mask for facilities. Consists of the
195  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
196  */
197 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
198 /*
199  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
200  * and defines the facilities that can be enabled via a cpu model.
201  */
202 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
203 
204 static unsigned long kvm_s390_fac_size(void)
205 {
206 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
207 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
208 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
209 		sizeof(S390_lowcore.stfle_fac_list));
210 
211 	return SIZE_INTERNAL;
212 }
213 
214 /* available cpu features supported by kvm */
215 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
216 /* available subfunctions indicated via query / "test bit" */
217 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
218 
219 static struct gmap_notifier gmap_notifier;
220 static struct gmap_notifier vsie_gmap_notifier;
221 debug_info_t *kvm_s390_dbf;
222 
223 /* Section: not file related */
224 int kvm_arch_hardware_enable(void)
225 {
226 	/* every s390 is virtualization enabled ;-) */
227 	return 0;
228 }
229 
230 int kvm_arch_check_processor_compat(void)
231 {
232 	return 0;
233 }
234 
235 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
236 			      unsigned long end);
237 
238 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
239 {
240 	u8 delta_idx = 0;
241 
242 	/*
243 	 * The TOD jumps by delta, we have to compensate this by adding
244 	 * -delta to the epoch.
245 	 */
246 	delta = -delta;
247 
248 	/* sign-extension - we're adding to signed values below */
249 	if ((s64)delta < 0)
250 		delta_idx = -1;
251 
252 	scb->epoch += delta;
253 	if (scb->ecd & ECD_MEF) {
254 		scb->epdx += delta_idx;
255 		if (scb->epoch < delta)
256 			scb->epdx += 1;
257 	}
258 }
259 
260 /*
261  * This callback is executed during stop_machine(). All CPUs are therefore
262  * temporarily stopped. In order not to change guest behavior, we have to
263  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
264  * so a CPU won't be stopped while calculating with the epoch.
265  */
266 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
267 			  void *v)
268 {
269 	struct kvm *kvm;
270 	struct kvm_vcpu *vcpu;
271 	int i;
272 	unsigned long long *delta = v;
273 
274 	list_for_each_entry(kvm, &vm_list, vm_list) {
275 		kvm_for_each_vcpu(i, vcpu, kvm) {
276 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
277 			if (i == 0) {
278 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
279 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
280 			}
281 			if (vcpu->arch.cputm_enabled)
282 				vcpu->arch.cputm_start += *delta;
283 			if (vcpu->arch.vsie_block)
284 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
285 						   *delta);
286 		}
287 	}
288 	return NOTIFY_OK;
289 }
290 
291 static struct notifier_block kvm_clock_notifier = {
292 	.notifier_call = kvm_clock_sync,
293 };
294 
295 int kvm_arch_hardware_setup(void)
296 {
297 	gmap_notifier.notifier_call = kvm_gmap_notifier;
298 	gmap_register_pte_notifier(&gmap_notifier);
299 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
300 	gmap_register_pte_notifier(&vsie_gmap_notifier);
301 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
302 				       &kvm_clock_notifier);
303 	return 0;
304 }
305 
306 void kvm_arch_hardware_unsetup(void)
307 {
308 	gmap_unregister_pte_notifier(&gmap_notifier);
309 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
310 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
311 					 &kvm_clock_notifier);
312 }
313 
314 static void allow_cpu_feat(unsigned long nr)
315 {
316 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
317 }
318 
319 static inline int plo_test_bit(unsigned char nr)
320 {
321 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
322 	int cc;
323 
324 	asm volatile(
325 		/* Parameter registers are ignored for "test bit" */
326 		"	plo	0,0,0,0(0)\n"
327 		"	ipm	%0\n"
328 		"	srl	%0,28\n"
329 		: "=d" (cc)
330 		: "d" (r0)
331 		: "cc");
332 	return cc == 0;
333 }
334 
335 static inline void __insn32_query(unsigned int opcode, u8 query[32])
336 {
337 	register unsigned long r0 asm("0") = 0;	/* query function */
338 	register unsigned long r1 asm("1") = (unsigned long) query;
339 
340 	asm volatile(
341 		/* Parameter regs are ignored */
342 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
343 		: "=m" (*query)
344 		: "d" (r0), "a" (r1), [opc] "i" (opcode)
345 		: "cc");
346 }
347 
348 #define INSN_SORTL 0xb938
349 #define INSN_DFLTCC 0xb939
350 
351 static void kvm_s390_cpu_feat_init(void)
352 {
353 	int i;
354 
355 	for (i = 0; i < 256; ++i) {
356 		if (plo_test_bit(i))
357 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
358 	}
359 
360 	if (test_facility(28)) /* TOD-clock steering */
361 		ptff(kvm_s390_available_subfunc.ptff,
362 		     sizeof(kvm_s390_available_subfunc.ptff),
363 		     PTFF_QAF);
364 
365 	if (test_facility(17)) { /* MSA */
366 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
367 			      kvm_s390_available_subfunc.kmac);
368 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
369 			      kvm_s390_available_subfunc.kmc);
370 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
371 			      kvm_s390_available_subfunc.km);
372 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
373 			      kvm_s390_available_subfunc.kimd);
374 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
375 			      kvm_s390_available_subfunc.klmd);
376 	}
377 	if (test_facility(76)) /* MSA3 */
378 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
379 			      kvm_s390_available_subfunc.pckmo);
380 	if (test_facility(77)) { /* MSA4 */
381 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
382 			      kvm_s390_available_subfunc.kmctr);
383 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
384 			      kvm_s390_available_subfunc.kmf);
385 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
386 			      kvm_s390_available_subfunc.kmo);
387 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
388 			      kvm_s390_available_subfunc.pcc);
389 	}
390 	if (test_facility(57)) /* MSA5 */
391 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
392 			      kvm_s390_available_subfunc.ppno);
393 
394 	if (test_facility(146)) /* MSA8 */
395 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
396 			      kvm_s390_available_subfunc.kma);
397 
398 	if (test_facility(155)) /* MSA9 */
399 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
400 			      kvm_s390_available_subfunc.kdsa);
401 
402 	if (test_facility(150)) /* SORTL */
403 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
404 
405 	if (test_facility(151)) /* DFLTCC */
406 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
407 
408 	if (MACHINE_HAS_ESOP)
409 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
410 	/*
411 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
412 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
413 	 */
414 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
415 	    !test_facility(3) || !nested)
416 		return;
417 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
418 	if (sclp.has_64bscao)
419 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
420 	if (sclp.has_siif)
421 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
422 	if (sclp.has_gpere)
423 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
424 	if (sclp.has_gsls)
425 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
426 	if (sclp.has_ib)
427 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
428 	if (sclp.has_cei)
429 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
430 	if (sclp.has_ibs)
431 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
432 	if (sclp.has_kss)
433 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
434 	/*
435 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
436 	 * all skey handling functions read/set the skey from the PGSTE
437 	 * instead of the real storage key.
438 	 *
439 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
440 	 * pages being detected as preserved although they are resident.
441 	 *
442 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
443 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
444 	 *
445 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
446 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
447 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
448 	 *
449 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
450 	 * cannot easily shadow the SCA because of the ipte lock.
451 	 */
452 }
453 
454 int kvm_arch_init(void *opaque)
455 {
456 	int rc;
457 
458 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
459 	if (!kvm_s390_dbf)
460 		return -ENOMEM;
461 
462 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
463 		rc = -ENOMEM;
464 		goto out_debug_unreg;
465 	}
466 
467 	kvm_s390_cpu_feat_init();
468 
469 	/* Register floating interrupt controller interface. */
470 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
471 	if (rc) {
472 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
473 		goto out_debug_unreg;
474 	}
475 
476 	rc = kvm_s390_gib_init(GAL_ISC);
477 	if (rc)
478 		goto out_gib_destroy;
479 
480 	return 0;
481 
482 out_gib_destroy:
483 	kvm_s390_gib_destroy();
484 out_debug_unreg:
485 	debug_unregister(kvm_s390_dbf);
486 	return rc;
487 }
488 
489 void kvm_arch_exit(void)
490 {
491 	kvm_s390_gib_destroy();
492 	debug_unregister(kvm_s390_dbf);
493 }
494 
495 /* Section: device related */
496 long kvm_arch_dev_ioctl(struct file *filp,
497 			unsigned int ioctl, unsigned long arg)
498 {
499 	if (ioctl == KVM_S390_ENABLE_SIE)
500 		return s390_enable_sie();
501 	return -EINVAL;
502 }
503 
504 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
505 {
506 	int r;
507 
508 	switch (ext) {
509 	case KVM_CAP_S390_PSW:
510 	case KVM_CAP_S390_GMAP:
511 	case KVM_CAP_SYNC_MMU:
512 #ifdef CONFIG_KVM_S390_UCONTROL
513 	case KVM_CAP_S390_UCONTROL:
514 #endif
515 	case KVM_CAP_ASYNC_PF:
516 	case KVM_CAP_SYNC_REGS:
517 	case KVM_CAP_ONE_REG:
518 	case KVM_CAP_ENABLE_CAP:
519 	case KVM_CAP_S390_CSS_SUPPORT:
520 	case KVM_CAP_IOEVENTFD:
521 	case KVM_CAP_DEVICE_CTRL:
522 	case KVM_CAP_S390_IRQCHIP:
523 	case KVM_CAP_VM_ATTRIBUTES:
524 	case KVM_CAP_MP_STATE:
525 	case KVM_CAP_IMMEDIATE_EXIT:
526 	case KVM_CAP_S390_INJECT_IRQ:
527 	case KVM_CAP_S390_USER_SIGP:
528 	case KVM_CAP_S390_USER_STSI:
529 	case KVM_CAP_S390_SKEYS:
530 	case KVM_CAP_S390_IRQ_STATE:
531 	case KVM_CAP_S390_USER_INSTR0:
532 	case KVM_CAP_S390_CMMA_MIGRATION:
533 	case KVM_CAP_S390_AIS:
534 	case KVM_CAP_S390_AIS_MIGRATION:
535 		r = 1;
536 		break;
537 	case KVM_CAP_S390_HPAGE_1M:
538 		r = 0;
539 		if (hpage && !kvm_is_ucontrol(kvm))
540 			r = 1;
541 		break;
542 	case KVM_CAP_S390_MEM_OP:
543 		r = MEM_OP_MAX_SIZE;
544 		break;
545 	case KVM_CAP_NR_VCPUS:
546 	case KVM_CAP_MAX_VCPUS:
547 	case KVM_CAP_MAX_VCPU_ID:
548 		r = KVM_S390_BSCA_CPU_SLOTS;
549 		if (!kvm_s390_use_sca_entries())
550 			r = KVM_MAX_VCPUS;
551 		else if (sclp.has_esca && sclp.has_64bscao)
552 			r = KVM_S390_ESCA_CPU_SLOTS;
553 		break;
554 	case KVM_CAP_S390_COW:
555 		r = MACHINE_HAS_ESOP;
556 		break;
557 	case KVM_CAP_S390_VECTOR_REGISTERS:
558 		r = MACHINE_HAS_VX;
559 		break;
560 	case KVM_CAP_S390_RI:
561 		r = test_facility(64);
562 		break;
563 	case KVM_CAP_S390_GS:
564 		r = test_facility(133);
565 		break;
566 	case KVM_CAP_S390_BPB:
567 		r = test_facility(82);
568 		break;
569 	default:
570 		r = 0;
571 	}
572 	return r;
573 }
574 
575 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
576 				    struct kvm_memory_slot *memslot)
577 {
578 	int i;
579 	gfn_t cur_gfn, last_gfn;
580 	unsigned long gaddr, vmaddr;
581 	struct gmap *gmap = kvm->arch.gmap;
582 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
583 
584 	/* Loop over all guest segments */
585 	cur_gfn = memslot->base_gfn;
586 	last_gfn = memslot->base_gfn + memslot->npages;
587 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
588 		gaddr = gfn_to_gpa(cur_gfn);
589 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
590 		if (kvm_is_error_hva(vmaddr))
591 			continue;
592 
593 		bitmap_zero(bitmap, _PAGE_ENTRIES);
594 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
595 		for (i = 0; i < _PAGE_ENTRIES; i++) {
596 			if (test_bit(i, bitmap))
597 				mark_page_dirty(kvm, cur_gfn + i);
598 		}
599 
600 		if (fatal_signal_pending(current))
601 			return;
602 		cond_resched();
603 	}
604 }
605 
606 /* Section: vm related */
607 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
608 
609 /*
610  * Get (and clear) the dirty memory log for a memory slot.
611  */
612 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
613 			       struct kvm_dirty_log *log)
614 {
615 	int r;
616 	unsigned long n;
617 	struct kvm_memslots *slots;
618 	struct kvm_memory_slot *memslot;
619 	int is_dirty = 0;
620 
621 	if (kvm_is_ucontrol(kvm))
622 		return -EINVAL;
623 
624 	mutex_lock(&kvm->slots_lock);
625 
626 	r = -EINVAL;
627 	if (log->slot >= KVM_USER_MEM_SLOTS)
628 		goto out;
629 
630 	slots = kvm_memslots(kvm);
631 	memslot = id_to_memslot(slots, log->slot);
632 	r = -ENOENT;
633 	if (!memslot->dirty_bitmap)
634 		goto out;
635 
636 	kvm_s390_sync_dirty_log(kvm, memslot);
637 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
638 	if (r)
639 		goto out;
640 
641 	/* Clear the dirty log */
642 	if (is_dirty) {
643 		n = kvm_dirty_bitmap_bytes(memslot);
644 		memset(memslot->dirty_bitmap, 0, n);
645 	}
646 	r = 0;
647 out:
648 	mutex_unlock(&kvm->slots_lock);
649 	return r;
650 }
651 
652 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
653 {
654 	unsigned int i;
655 	struct kvm_vcpu *vcpu;
656 
657 	kvm_for_each_vcpu(i, vcpu, kvm) {
658 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
659 	}
660 }
661 
662 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
663 {
664 	int r;
665 
666 	if (cap->flags)
667 		return -EINVAL;
668 
669 	switch (cap->cap) {
670 	case KVM_CAP_S390_IRQCHIP:
671 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
672 		kvm->arch.use_irqchip = 1;
673 		r = 0;
674 		break;
675 	case KVM_CAP_S390_USER_SIGP:
676 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
677 		kvm->arch.user_sigp = 1;
678 		r = 0;
679 		break;
680 	case KVM_CAP_S390_VECTOR_REGISTERS:
681 		mutex_lock(&kvm->lock);
682 		if (kvm->created_vcpus) {
683 			r = -EBUSY;
684 		} else if (MACHINE_HAS_VX) {
685 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
686 			set_kvm_facility(kvm->arch.model.fac_list, 129);
687 			if (test_facility(134)) {
688 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
689 				set_kvm_facility(kvm->arch.model.fac_list, 134);
690 			}
691 			if (test_facility(135)) {
692 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
693 				set_kvm_facility(kvm->arch.model.fac_list, 135);
694 			}
695 			if (test_facility(148)) {
696 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
697 				set_kvm_facility(kvm->arch.model.fac_list, 148);
698 			}
699 			if (test_facility(152)) {
700 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
701 				set_kvm_facility(kvm->arch.model.fac_list, 152);
702 			}
703 			r = 0;
704 		} else
705 			r = -EINVAL;
706 		mutex_unlock(&kvm->lock);
707 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
708 			 r ? "(not available)" : "(success)");
709 		break;
710 	case KVM_CAP_S390_RI:
711 		r = -EINVAL;
712 		mutex_lock(&kvm->lock);
713 		if (kvm->created_vcpus) {
714 			r = -EBUSY;
715 		} else if (test_facility(64)) {
716 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
717 			set_kvm_facility(kvm->arch.model.fac_list, 64);
718 			r = 0;
719 		}
720 		mutex_unlock(&kvm->lock);
721 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
722 			 r ? "(not available)" : "(success)");
723 		break;
724 	case KVM_CAP_S390_AIS:
725 		mutex_lock(&kvm->lock);
726 		if (kvm->created_vcpus) {
727 			r = -EBUSY;
728 		} else {
729 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
730 			set_kvm_facility(kvm->arch.model.fac_list, 72);
731 			r = 0;
732 		}
733 		mutex_unlock(&kvm->lock);
734 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
735 			 r ? "(not available)" : "(success)");
736 		break;
737 	case KVM_CAP_S390_GS:
738 		r = -EINVAL;
739 		mutex_lock(&kvm->lock);
740 		if (kvm->created_vcpus) {
741 			r = -EBUSY;
742 		} else if (test_facility(133)) {
743 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
744 			set_kvm_facility(kvm->arch.model.fac_list, 133);
745 			r = 0;
746 		}
747 		mutex_unlock(&kvm->lock);
748 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
749 			 r ? "(not available)" : "(success)");
750 		break;
751 	case KVM_CAP_S390_HPAGE_1M:
752 		mutex_lock(&kvm->lock);
753 		if (kvm->created_vcpus)
754 			r = -EBUSY;
755 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
756 			r = -EINVAL;
757 		else {
758 			r = 0;
759 			down_write(&kvm->mm->mmap_sem);
760 			kvm->mm->context.allow_gmap_hpage_1m = 1;
761 			up_write(&kvm->mm->mmap_sem);
762 			/*
763 			 * We might have to create fake 4k page
764 			 * tables. To avoid that the hardware works on
765 			 * stale PGSTEs, we emulate these instructions.
766 			 */
767 			kvm->arch.use_skf = 0;
768 			kvm->arch.use_pfmfi = 0;
769 		}
770 		mutex_unlock(&kvm->lock);
771 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
772 			 r ? "(not available)" : "(success)");
773 		break;
774 	case KVM_CAP_S390_USER_STSI:
775 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
776 		kvm->arch.user_stsi = 1;
777 		r = 0;
778 		break;
779 	case KVM_CAP_S390_USER_INSTR0:
780 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
781 		kvm->arch.user_instr0 = 1;
782 		icpt_operexc_on_all_vcpus(kvm);
783 		r = 0;
784 		break;
785 	default:
786 		r = -EINVAL;
787 		break;
788 	}
789 	return r;
790 }
791 
792 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
793 {
794 	int ret;
795 
796 	switch (attr->attr) {
797 	case KVM_S390_VM_MEM_LIMIT_SIZE:
798 		ret = 0;
799 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
800 			 kvm->arch.mem_limit);
801 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
802 			ret = -EFAULT;
803 		break;
804 	default:
805 		ret = -ENXIO;
806 		break;
807 	}
808 	return ret;
809 }
810 
811 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
812 {
813 	int ret;
814 	unsigned int idx;
815 	switch (attr->attr) {
816 	case KVM_S390_VM_MEM_ENABLE_CMMA:
817 		ret = -ENXIO;
818 		if (!sclp.has_cmma)
819 			break;
820 
821 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
822 		mutex_lock(&kvm->lock);
823 		if (kvm->created_vcpus)
824 			ret = -EBUSY;
825 		else if (kvm->mm->context.allow_gmap_hpage_1m)
826 			ret = -EINVAL;
827 		else {
828 			kvm->arch.use_cmma = 1;
829 			/* Not compatible with cmma. */
830 			kvm->arch.use_pfmfi = 0;
831 			ret = 0;
832 		}
833 		mutex_unlock(&kvm->lock);
834 		break;
835 	case KVM_S390_VM_MEM_CLR_CMMA:
836 		ret = -ENXIO;
837 		if (!sclp.has_cmma)
838 			break;
839 		ret = -EINVAL;
840 		if (!kvm->arch.use_cmma)
841 			break;
842 
843 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
844 		mutex_lock(&kvm->lock);
845 		idx = srcu_read_lock(&kvm->srcu);
846 		s390_reset_cmma(kvm->arch.gmap->mm);
847 		srcu_read_unlock(&kvm->srcu, idx);
848 		mutex_unlock(&kvm->lock);
849 		ret = 0;
850 		break;
851 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
852 		unsigned long new_limit;
853 
854 		if (kvm_is_ucontrol(kvm))
855 			return -EINVAL;
856 
857 		if (get_user(new_limit, (u64 __user *)attr->addr))
858 			return -EFAULT;
859 
860 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
861 		    new_limit > kvm->arch.mem_limit)
862 			return -E2BIG;
863 
864 		if (!new_limit)
865 			return -EINVAL;
866 
867 		/* gmap_create takes last usable address */
868 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
869 			new_limit -= 1;
870 
871 		ret = -EBUSY;
872 		mutex_lock(&kvm->lock);
873 		if (!kvm->created_vcpus) {
874 			/* gmap_create will round the limit up */
875 			struct gmap *new = gmap_create(current->mm, new_limit);
876 
877 			if (!new) {
878 				ret = -ENOMEM;
879 			} else {
880 				gmap_remove(kvm->arch.gmap);
881 				new->private = kvm;
882 				kvm->arch.gmap = new;
883 				ret = 0;
884 			}
885 		}
886 		mutex_unlock(&kvm->lock);
887 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
888 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
889 			 (void *) kvm->arch.gmap->asce);
890 		break;
891 	}
892 	default:
893 		ret = -ENXIO;
894 		break;
895 	}
896 	return ret;
897 }
898 
899 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
900 
901 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
902 {
903 	struct kvm_vcpu *vcpu;
904 	int i;
905 
906 	kvm_s390_vcpu_block_all(kvm);
907 
908 	kvm_for_each_vcpu(i, vcpu, kvm) {
909 		kvm_s390_vcpu_crypto_setup(vcpu);
910 		/* recreate the shadow crycb by leaving the VSIE handler */
911 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
912 	}
913 
914 	kvm_s390_vcpu_unblock_all(kvm);
915 }
916 
917 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
918 {
919 	mutex_lock(&kvm->lock);
920 	switch (attr->attr) {
921 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
922 		if (!test_kvm_facility(kvm, 76)) {
923 			mutex_unlock(&kvm->lock);
924 			return -EINVAL;
925 		}
926 		get_random_bytes(
927 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
928 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
929 		kvm->arch.crypto.aes_kw = 1;
930 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
931 		break;
932 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
933 		if (!test_kvm_facility(kvm, 76)) {
934 			mutex_unlock(&kvm->lock);
935 			return -EINVAL;
936 		}
937 		get_random_bytes(
938 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
939 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
940 		kvm->arch.crypto.dea_kw = 1;
941 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
942 		break;
943 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
944 		if (!test_kvm_facility(kvm, 76)) {
945 			mutex_unlock(&kvm->lock);
946 			return -EINVAL;
947 		}
948 		kvm->arch.crypto.aes_kw = 0;
949 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
950 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
951 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
952 		break;
953 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
954 		if (!test_kvm_facility(kvm, 76)) {
955 			mutex_unlock(&kvm->lock);
956 			return -EINVAL;
957 		}
958 		kvm->arch.crypto.dea_kw = 0;
959 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
960 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
961 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
962 		break;
963 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
964 		if (!ap_instructions_available()) {
965 			mutex_unlock(&kvm->lock);
966 			return -EOPNOTSUPP;
967 		}
968 		kvm->arch.crypto.apie = 1;
969 		break;
970 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
971 		if (!ap_instructions_available()) {
972 			mutex_unlock(&kvm->lock);
973 			return -EOPNOTSUPP;
974 		}
975 		kvm->arch.crypto.apie = 0;
976 		break;
977 	default:
978 		mutex_unlock(&kvm->lock);
979 		return -ENXIO;
980 	}
981 
982 	kvm_s390_vcpu_crypto_reset_all(kvm);
983 	mutex_unlock(&kvm->lock);
984 	return 0;
985 }
986 
987 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
988 {
989 	int cx;
990 	struct kvm_vcpu *vcpu;
991 
992 	kvm_for_each_vcpu(cx, vcpu, kvm)
993 		kvm_s390_sync_request(req, vcpu);
994 }
995 
996 /*
997  * Must be called with kvm->srcu held to avoid races on memslots, and with
998  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
999  */
1000 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1001 {
1002 	struct kvm_memory_slot *ms;
1003 	struct kvm_memslots *slots;
1004 	unsigned long ram_pages = 0;
1005 	int slotnr;
1006 
1007 	/* migration mode already enabled */
1008 	if (kvm->arch.migration_mode)
1009 		return 0;
1010 	slots = kvm_memslots(kvm);
1011 	if (!slots || !slots->used_slots)
1012 		return -EINVAL;
1013 
1014 	if (!kvm->arch.use_cmma) {
1015 		kvm->arch.migration_mode = 1;
1016 		return 0;
1017 	}
1018 	/* mark all the pages in active slots as dirty */
1019 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1020 		ms = slots->memslots + slotnr;
1021 		/*
1022 		 * The second half of the bitmap is only used on x86,
1023 		 * and would be wasted otherwise, so we put it to good
1024 		 * use here to keep track of the state of the storage
1025 		 * attributes.
1026 		 */
1027 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1028 		ram_pages += ms->npages;
1029 	}
1030 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1031 	kvm->arch.migration_mode = 1;
1032 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1033 	return 0;
1034 }
1035 
1036 /*
1037  * Must be called with kvm->slots_lock to avoid races with ourselves and
1038  * kvm_s390_vm_start_migration.
1039  */
1040 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1041 {
1042 	/* migration mode already disabled */
1043 	if (!kvm->arch.migration_mode)
1044 		return 0;
1045 	kvm->arch.migration_mode = 0;
1046 	if (kvm->arch.use_cmma)
1047 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1048 	return 0;
1049 }
1050 
1051 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1052 				     struct kvm_device_attr *attr)
1053 {
1054 	int res = -ENXIO;
1055 
1056 	mutex_lock(&kvm->slots_lock);
1057 	switch (attr->attr) {
1058 	case KVM_S390_VM_MIGRATION_START:
1059 		res = kvm_s390_vm_start_migration(kvm);
1060 		break;
1061 	case KVM_S390_VM_MIGRATION_STOP:
1062 		res = kvm_s390_vm_stop_migration(kvm);
1063 		break;
1064 	default:
1065 		break;
1066 	}
1067 	mutex_unlock(&kvm->slots_lock);
1068 
1069 	return res;
1070 }
1071 
1072 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1073 				     struct kvm_device_attr *attr)
1074 {
1075 	u64 mig = kvm->arch.migration_mode;
1076 
1077 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1078 		return -ENXIO;
1079 
1080 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1081 		return -EFAULT;
1082 	return 0;
1083 }
1084 
1085 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1086 {
1087 	struct kvm_s390_vm_tod_clock gtod;
1088 
1089 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1090 		return -EFAULT;
1091 
1092 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1093 		return -EINVAL;
1094 	kvm_s390_set_tod_clock(kvm, &gtod);
1095 
1096 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1097 		gtod.epoch_idx, gtod.tod);
1098 
1099 	return 0;
1100 }
1101 
1102 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1103 {
1104 	u8 gtod_high;
1105 
1106 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1107 					   sizeof(gtod_high)))
1108 		return -EFAULT;
1109 
1110 	if (gtod_high != 0)
1111 		return -EINVAL;
1112 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1113 
1114 	return 0;
1115 }
1116 
1117 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1118 {
1119 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1120 
1121 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1122 			   sizeof(gtod.tod)))
1123 		return -EFAULT;
1124 
1125 	kvm_s390_set_tod_clock(kvm, &gtod);
1126 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1127 	return 0;
1128 }
1129 
1130 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1131 {
1132 	int ret;
1133 
1134 	if (attr->flags)
1135 		return -EINVAL;
1136 
1137 	switch (attr->attr) {
1138 	case KVM_S390_VM_TOD_EXT:
1139 		ret = kvm_s390_set_tod_ext(kvm, attr);
1140 		break;
1141 	case KVM_S390_VM_TOD_HIGH:
1142 		ret = kvm_s390_set_tod_high(kvm, attr);
1143 		break;
1144 	case KVM_S390_VM_TOD_LOW:
1145 		ret = kvm_s390_set_tod_low(kvm, attr);
1146 		break;
1147 	default:
1148 		ret = -ENXIO;
1149 		break;
1150 	}
1151 	return ret;
1152 }
1153 
1154 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1155 				   struct kvm_s390_vm_tod_clock *gtod)
1156 {
1157 	struct kvm_s390_tod_clock_ext htod;
1158 
1159 	preempt_disable();
1160 
1161 	get_tod_clock_ext((char *)&htod);
1162 
1163 	gtod->tod = htod.tod + kvm->arch.epoch;
1164 	gtod->epoch_idx = 0;
1165 	if (test_kvm_facility(kvm, 139)) {
1166 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1167 		if (gtod->tod < htod.tod)
1168 			gtod->epoch_idx += 1;
1169 	}
1170 
1171 	preempt_enable();
1172 }
1173 
1174 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1175 {
1176 	struct kvm_s390_vm_tod_clock gtod;
1177 
1178 	memset(&gtod, 0, sizeof(gtod));
1179 	kvm_s390_get_tod_clock(kvm, &gtod);
1180 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1181 		return -EFAULT;
1182 
1183 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1184 		gtod.epoch_idx, gtod.tod);
1185 	return 0;
1186 }
1187 
1188 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1189 {
1190 	u8 gtod_high = 0;
1191 
1192 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1193 					 sizeof(gtod_high)))
1194 		return -EFAULT;
1195 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1196 
1197 	return 0;
1198 }
1199 
1200 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1201 {
1202 	u64 gtod;
1203 
1204 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1205 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1206 		return -EFAULT;
1207 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1208 
1209 	return 0;
1210 }
1211 
1212 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1213 {
1214 	int ret;
1215 
1216 	if (attr->flags)
1217 		return -EINVAL;
1218 
1219 	switch (attr->attr) {
1220 	case KVM_S390_VM_TOD_EXT:
1221 		ret = kvm_s390_get_tod_ext(kvm, attr);
1222 		break;
1223 	case KVM_S390_VM_TOD_HIGH:
1224 		ret = kvm_s390_get_tod_high(kvm, attr);
1225 		break;
1226 	case KVM_S390_VM_TOD_LOW:
1227 		ret = kvm_s390_get_tod_low(kvm, attr);
1228 		break;
1229 	default:
1230 		ret = -ENXIO;
1231 		break;
1232 	}
1233 	return ret;
1234 }
1235 
1236 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238 	struct kvm_s390_vm_cpu_processor *proc;
1239 	u16 lowest_ibc, unblocked_ibc;
1240 	int ret = 0;
1241 
1242 	mutex_lock(&kvm->lock);
1243 	if (kvm->created_vcpus) {
1244 		ret = -EBUSY;
1245 		goto out;
1246 	}
1247 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1248 	if (!proc) {
1249 		ret = -ENOMEM;
1250 		goto out;
1251 	}
1252 	if (!copy_from_user(proc, (void __user *)attr->addr,
1253 			    sizeof(*proc))) {
1254 		kvm->arch.model.cpuid = proc->cpuid;
1255 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1256 		unblocked_ibc = sclp.ibc & 0xfff;
1257 		if (lowest_ibc && proc->ibc) {
1258 			if (proc->ibc > unblocked_ibc)
1259 				kvm->arch.model.ibc = unblocked_ibc;
1260 			else if (proc->ibc < lowest_ibc)
1261 				kvm->arch.model.ibc = lowest_ibc;
1262 			else
1263 				kvm->arch.model.ibc = proc->ibc;
1264 		}
1265 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1266 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1267 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1268 			 kvm->arch.model.ibc,
1269 			 kvm->arch.model.cpuid);
1270 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1271 			 kvm->arch.model.fac_list[0],
1272 			 kvm->arch.model.fac_list[1],
1273 			 kvm->arch.model.fac_list[2]);
1274 	} else
1275 		ret = -EFAULT;
1276 	kfree(proc);
1277 out:
1278 	mutex_unlock(&kvm->lock);
1279 	return ret;
1280 }
1281 
1282 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1283 				       struct kvm_device_attr *attr)
1284 {
1285 	struct kvm_s390_vm_cpu_feat data;
1286 
1287 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1288 		return -EFAULT;
1289 	if (!bitmap_subset((unsigned long *) data.feat,
1290 			   kvm_s390_available_cpu_feat,
1291 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1292 		return -EINVAL;
1293 
1294 	mutex_lock(&kvm->lock);
1295 	if (kvm->created_vcpus) {
1296 		mutex_unlock(&kvm->lock);
1297 		return -EBUSY;
1298 	}
1299 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1300 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1301 	mutex_unlock(&kvm->lock);
1302 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1303 			 data.feat[0],
1304 			 data.feat[1],
1305 			 data.feat[2]);
1306 	return 0;
1307 }
1308 
1309 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1310 					  struct kvm_device_attr *attr)
1311 {
1312 	mutex_lock(&kvm->lock);
1313 	if (kvm->created_vcpus) {
1314 		mutex_unlock(&kvm->lock);
1315 		return -EBUSY;
1316 	}
1317 
1318 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1319 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1320 		mutex_unlock(&kvm->lock);
1321 		return -EFAULT;
1322 	}
1323 	mutex_unlock(&kvm->lock);
1324 
1325 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1326 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1327 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1328 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1329 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1330 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1331 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1332 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1333 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1334 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1335 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1336 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1337 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1338 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1339 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1340 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1341 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1342 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1343 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1344 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1345 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1346 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1347 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1348 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1349 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1350 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1351 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1352 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1353 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1354 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1355 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1356 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1357 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1358 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1359 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1360 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1361 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1363 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1366 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1369 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1372 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1375 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1377 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1378 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1381 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1382 
1383 	return 0;
1384 }
1385 
1386 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1387 {
1388 	int ret = -ENXIO;
1389 
1390 	switch (attr->attr) {
1391 	case KVM_S390_VM_CPU_PROCESSOR:
1392 		ret = kvm_s390_set_processor(kvm, attr);
1393 		break;
1394 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1395 		ret = kvm_s390_set_processor_feat(kvm, attr);
1396 		break;
1397 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1398 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1399 		break;
1400 	}
1401 	return ret;
1402 }
1403 
1404 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1405 {
1406 	struct kvm_s390_vm_cpu_processor *proc;
1407 	int ret = 0;
1408 
1409 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1410 	if (!proc) {
1411 		ret = -ENOMEM;
1412 		goto out;
1413 	}
1414 	proc->cpuid = kvm->arch.model.cpuid;
1415 	proc->ibc = kvm->arch.model.ibc;
1416 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1417 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1418 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1419 		 kvm->arch.model.ibc,
1420 		 kvm->arch.model.cpuid);
1421 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1422 		 kvm->arch.model.fac_list[0],
1423 		 kvm->arch.model.fac_list[1],
1424 		 kvm->arch.model.fac_list[2]);
1425 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1426 		ret = -EFAULT;
1427 	kfree(proc);
1428 out:
1429 	return ret;
1430 }
1431 
1432 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1433 {
1434 	struct kvm_s390_vm_cpu_machine *mach;
1435 	int ret = 0;
1436 
1437 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1438 	if (!mach) {
1439 		ret = -ENOMEM;
1440 		goto out;
1441 	}
1442 	get_cpu_id((struct cpuid *) &mach->cpuid);
1443 	mach->ibc = sclp.ibc;
1444 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1445 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1446 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1447 	       sizeof(S390_lowcore.stfle_fac_list));
1448 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1449 		 kvm->arch.model.ibc,
1450 		 kvm->arch.model.cpuid);
1451 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1452 		 mach->fac_mask[0],
1453 		 mach->fac_mask[1],
1454 		 mach->fac_mask[2]);
1455 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1456 		 mach->fac_list[0],
1457 		 mach->fac_list[1],
1458 		 mach->fac_list[2]);
1459 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1460 		ret = -EFAULT;
1461 	kfree(mach);
1462 out:
1463 	return ret;
1464 }
1465 
1466 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1467 				       struct kvm_device_attr *attr)
1468 {
1469 	struct kvm_s390_vm_cpu_feat data;
1470 
1471 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1472 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1473 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1474 		return -EFAULT;
1475 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1476 			 data.feat[0],
1477 			 data.feat[1],
1478 			 data.feat[2]);
1479 	return 0;
1480 }
1481 
1482 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1483 				     struct kvm_device_attr *attr)
1484 {
1485 	struct kvm_s390_vm_cpu_feat data;
1486 
1487 	bitmap_copy((unsigned long *) data.feat,
1488 		    kvm_s390_available_cpu_feat,
1489 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1490 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1491 		return -EFAULT;
1492 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1493 			 data.feat[0],
1494 			 data.feat[1],
1495 			 data.feat[2]);
1496 	return 0;
1497 }
1498 
1499 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1500 					  struct kvm_device_attr *attr)
1501 {
1502 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1503 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1504 		return -EFAULT;
1505 
1506 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1507 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1508 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1509 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1510 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1511 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1512 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1513 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1514 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1515 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1516 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1517 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1518 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1519 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1520 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1521 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1522 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1523 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1524 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1525 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1526 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1527 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1528 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1529 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1530 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1531 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1532 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1533 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1534 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1535 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1536 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1537 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1538 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1539 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1540 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1541 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1542 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1544 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1547 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1550 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1553 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1556 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1558 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1559 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1562 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1563 
1564 	return 0;
1565 }
1566 
1567 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1568 					struct kvm_device_attr *attr)
1569 {
1570 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1571 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1572 		return -EFAULT;
1573 
1574 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1575 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1576 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1577 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1578 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1579 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1580 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1581 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1582 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1583 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1584 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1585 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1586 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1587 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1588 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1589 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1590 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1591 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1592 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1593 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1594 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1595 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1596 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1597 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1598 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1599 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1600 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1601 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1602 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1603 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1604 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1605 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1606 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1607 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1608 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1609 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1610 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1612 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1615 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1618 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1621 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1624 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1626 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1627 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1630 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1631 
1632 	return 0;
1633 }
1634 
1635 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1636 {
1637 	int ret = -ENXIO;
1638 
1639 	switch (attr->attr) {
1640 	case KVM_S390_VM_CPU_PROCESSOR:
1641 		ret = kvm_s390_get_processor(kvm, attr);
1642 		break;
1643 	case KVM_S390_VM_CPU_MACHINE:
1644 		ret = kvm_s390_get_machine(kvm, attr);
1645 		break;
1646 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1647 		ret = kvm_s390_get_processor_feat(kvm, attr);
1648 		break;
1649 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1650 		ret = kvm_s390_get_machine_feat(kvm, attr);
1651 		break;
1652 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1653 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1654 		break;
1655 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1656 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1657 		break;
1658 	}
1659 	return ret;
1660 }
1661 
1662 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1663 {
1664 	int ret;
1665 
1666 	switch (attr->group) {
1667 	case KVM_S390_VM_MEM_CTRL:
1668 		ret = kvm_s390_set_mem_control(kvm, attr);
1669 		break;
1670 	case KVM_S390_VM_TOD:
1671 		ret = kvm_s390_set_tod(kvm, attr);
1672 		break;
1673 	case KVM_S390_VM_CPU_MODEL:
1674 		ret = kvm_s390_set_cpu_model(kvm, attr);
1675 		break;
1676 	case KVM_S390_VM_CRYPTO:
1677 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1678 		break;
1679 	case KVM_S390_VM_MIGRATION:
1680 		ret = kvm_s390_vm_set_migration(kvm, attr);
1681 		break;
1682 	default:
1683 		ret = -ENXIO;
1684 		break;
1685 	}
1686 
1687 	return ret;
1688 }
1689 
1690 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1691 {
1692 	int ret;
1693 
1694 	switch (attr->group) {
1695 	case KVM_S390_VM_MEM_CTRL:
1696 		ret = kvm_s390_get_mem_control(kvm, attr);
1697 		break;
1698 	case KVM_S390_VM_TOD:
1699 		ret = kvm_s390_get_tod(kvm, attr);
1700 		break;
1701 	case KVM_S390_VM_CPU_MODEL:
1702 		ret = kvm_s390_get_cpu_model(kvm, attr);
1703 		break;
1704 	case KVM_S390_VM_MIGRATION:
1705 		ret = kvm_s390_vm_get_migration(kvm, attr);
1706 		break;
1707 	default:
1708 		ret = -ENXIO;
1709 		break;
1710 	}
1711 
1712 	return ret;
1713 }
1714 
1715 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1716 {
1717 	int ret;
1718 
1719 	switch (attr->group) {
1720 	case KVM_S390_VM_MEM_CTRL:
1721 		switch (attr->attr) {
1722 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1723 		case KVM_S390_VM_MEM_CLR_CMMA:
1724 			ret = sclp.has_cmma ? 0 : -ENXIO;
1725 			break;
1726 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1727 			ret = 0;
1728 			break;
1729 		default:
1730 			ret = -ENXIO;
1731 			break;
1732 		}
1733 		break;
1734 	case KVM_S390_VM_TOD:
1735 		switch (attr->attr) {
1736 		case KVM_S390_VM_TOD_LOW:
1737 		case KVM_S390_VM_TOD_HIGH:
1738 			ret = 0;
1739 			break;
1740 		default:
1741 			ret = -ENXIO;
1742 			break;
1743 		}
1744 		break;
1745 	case KVM_S390_VM_CPU_MODEL:
1746 		switch (attr->attr) {
1747 		case KVM_S390_VM_CPU_PROCESSOR:
1748 		case KVM_S390_VM_CPU_MACHINE:
1749 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1750 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1751 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1752 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1753 			ret = 0;
1754 			break;
1755 		default:
1756 			ret = -ENXIO;
1757 			break;
1758 		}
1759 		break;
1760 	case KVM_S390_VM_CRYPTO:
1761 		switch (attr->attr) {
1762 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1763 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1764 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1765 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1766 			ret = 0;
1767 			break;
1768 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1769 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1770 			ret = ap_instructions_available() ? 0 : -ENXIO;
1771 			break;
1772 		default:
1773 			ret = -ENXIO;
1774 			break;
1775 		}
1776 		break;
1777 	case KVM_S390_VM_MIGRATION:
1778 		ret = 0;
1779 		break;
1780 	default:
1781 		ret = -ENXIO;
1782 		break;
1783 	}
1784 
1785 	return ret;
1786 }
1787 
1788 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1789 {
1790 	uint8_t *keys;
1791 	uint64_t hva;
1792 	int srcu_idx, i, r = 0;
1793 
1794 	if (args->flags != 0)
1795 		return -EINVAL;
1796 
1797 	/* Is this guest using storage keys? */
1798 	if (!mm_uses_skeys(current->mm))
1799 		return KVM_S390_GET_SKEYS_NONE;
1800 
1801 	/* Enforce sane limit on memory allocation */
1802 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1803 		return -EINVAL;
1804 
1805 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1806 	if (!keys)
1807 		return -ENOMEM;
1808 
1809 	down_read(&current->mm->mmap_sem);
1810 	srcu_idx = srcu_read_lock(&kvm->srcu);
1811 	for (i = 0; i < args->count; i++) {
1812 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1813 		if (kvm_is_error_hva(hva)) {
1814 			r = -EFAULT;
1815 			break;
1816 		}
1817 
1818 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1819 		if (r)
1820 			break;
1821 	}
1822 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1823 	up_read(&current->mm->mmap_sem);
1824 
1825 	if (!r) {
1826 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1827 				 sizeof(uint8_t) * args->count);
1828 		if (r)
1829 			r = -EFAULT;
1830 	}
1831 
1832 	kvfree(keys);
1833 	return r;
1834 }
1835 
1836 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1837 {
1838 	uint8_t *keys;
1839 	uint64_t hva;
1840 	int srcu_idx, i, r = 0;
1841 	bool unlocked;
1842 
1843 	if (args->flags != 0)
1844 		return -EINVAL;
1845 
1846 	/* Enforce sane limit on memory allocation */
1847 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1848 		return -EINVAL;
1849 
1850 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1851 	if (!keys)
1852 		return -ENOMEM;
1853 
1854 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1855 			   sizeof(uint8_t) * args->count);
1856 	if (r) {
1857 		r = -EFAULT;
1858 		goto out;
1859 	}
1860 
1861 	/* Enable storage key handling for the guest */
1862 	r = s390_enable_skey();
1863 	if (r)
1864 		goto out;
1865 
1866 	i = 0;
1867 	down_read(&current->mm->mmap_sem);
1868 	srcu_idx = srcu_read_lock(&kvm->srcu);
1869         while (i < args->count) {
1870 		unlocked = false;
1871 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1872 		if (kvm_is_error_hva(hva)) {
1873 			r = -EFAULT;
1874 			break;
1875 		}
1876 
1877 		/* Lowest order bit is reserved */
1878 		if (keys[i] & 0x01) {
1879 			r = -EINVAL;
1880 			break;
1881 		}
1882 
1883 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1884 		if (r) {
1885 			r = fixup_user_fault(current, current->mm, hva,
1886 					     FAULT_FLAG_WRITE, &unlocked);
1887 			if (r)
1888 				break;
1889 		}
1890 		if (!r)
1891 			i++;
1892 	}
1893 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1894 	up_read(&current->mm->mmap_sem);
1895 out:
1896 	kvfree(keys);
1897 	return r;
1898 }
1899 
1900 /*
1901  * Base address and length must be sent at the start of each block, therefore
1902  * it's cheaper to send some clean data, as long as it's less than the size of
1903  * two longs.
1904  */
1905 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1906 /* for consistency */
1907 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1908 
1909 /*
1910  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1911  * address falls in a hole. In that case the index of one of the memslots
1912  * bordering the hole is returned.
1913  */
1914 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1915 {
1916 	int start = 0, end = slots->used_slots;
1917 	int slot = atomic_read(&slots->lru_slot);
1918 	struct kvm_memory_slot *memslots = slots->memslots;
1919 
1920 	if (gfn >= memslots[slot].base_gfn &&
1921 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1922 		return slot;
1923 
1924 	while (start < end) {
1925 		slot = start + (end - start) / 2;
1926 
1927 		if (gfn >= memslots[slot].base_gfn)
1928 			end = slot;
1929 		else
1930 			start = slot + 1;
1931 	}
1932 
1933 	if (gfn >= memslots[start].base_gfn &&
1934 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1935 		atomic_set(&slots->lru_slot, start);
1936 	}
1937 
1938 	return start;
1939 }
1940 
1941 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1942 			      u8 *res, unsigned long bufsize)
1943 {
1944 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1945 
1946 	args->count = 0;
1947 	while (args->count < bufsize) {
1948 		hva = gfn_to_hva(kvm, cur_gfn);
1949 		/*
1950 		 * We return an error if the first value was invalid, but we
1951 		 * return successfully if at least one value was copied.
1952 		 */
1953 		if (kvm_is_error_hva(hva))
1954 			return args->count ? 0 : -EFAULT;
1955 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1956 			pgstev = 0;
1957 		res[args->count++] = (pgstev >> 24) & 0x43;
1958 		cur_gfn++;
1959 	}
1960 
1961 	return 0;
1962 }
1963 
1964 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1965 					      unsigned long cur_gfn)
1966 {
1967 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1968 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1969 	unsigned long ofs = cur_gfn - ms->base_gfn;
1970 
1971 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1972 		slotidx--;
1973 		/* If we are above the highest slot, wrap around */
1974 		if (slotidx < 0)
1975 			slotidx = slots->used_slots - 1;
1976 
1977 		ms = slots->memslots + slotidx;
1978 		ofs = 0;
1979 	}
1980 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1981 	while ((slotidx > 0) && (ofs >= ms->npages)) {
1982 		slotidx--;
1983 		ms = slots->memslots + slotidx;
1984 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1985 	}
1986 	return ms->base_gfn + ofs;
1987 }
1988 
1989 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1990 			     u8 *res, unsigned long bufsize)
1991 {
1992 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1993 	struct kvm_memslots *slots = kvm_memslots(kvm);
1994 	struct kvm_memory_slot *ms;
1995 
1996 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1997 	ms = gfn_to_memslot(kvm, cur_gfn);
1998 	args->count = 0;
1999 	args->start_gfn = cur_gfn;
2000 	if (!ms)
2001 		return 0;
2002 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2003 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2004 
2005 	while (args->count < bufsize) {
2006 		hva = gfn_to_hva(kvm, cur_gfn);
2007 		if (kvm_is_error_hva(hva))
2008 			return 0;
2009 		/* Decrement only if we actually flipped the bit to 0 */
2010 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2011 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2012 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2013 			pgstev = 0;
2014 		/* Save the value */
2015 		res[args->count++] = (pgstev >> 24) & 0x43;
2016 		/* If the next bit is too far away, stop. */
2017 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2018 			return 0;
2019 		/* If we reached the previous "next", find the next one */
2020 		if (cur_gfn == next_gfn)
2021 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2022 		/* Reached the end of memory or of the buffer, stop */
2023 		if ((next_gfn >= mem_end) ||
2024 		    (next_gfn - args->start_gfn >= bufsize))
2025 			return 0;
2026 		cur_gfn++;
2027 		/* Reached the end of the current memslot, take the next one. */
2028 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2029 			ms = gfn_to_memslot(kvm, cur_gfn);
2030 			if (!ms)
2031 				return 0;
2032 		}
2033 	}
2034 	return 0;
2035 }
2036 
2037 /*
2038  * This function searches for the next page with dirty CMMA attributes, and
2039  * saves the attributes in the buffer up to either the end of the buffer or
2040  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2041  * no trailing clean bytes are saved.
2042  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2043  * output buffer will indicate 0 as length.
2044  */
2045 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2046 				  struct kvm_s390_cmma_log *args)
2047 {
2048 	unsigned long bufsize;
2049 	int srcu_idx, peek, ret;
2050 	u8 *values;
2051 
2052 	if (!kvm->arch.use_cmma)
2053 		return -ENXIO;
2054 	/* Invalid/unsupported flags were specified */
2055 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2056 		return -EINVAL;
2057 	/* Migration mode query, and we are not doing a migration */
2058 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2059 	if (!peek && !kvm->arch.migration_mode)
2060 		return -EINVAL;
2061 	/* CMMA is disabled or was not used, or the buffer has length zero */
2062 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2063 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2064 		memset(args, 0, sizeof(*args));
2065 		return 0;
2066 	}
2067 	/* We are not peeking, and there are no dirty pages */
2068 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2069 		memset(args, 0, sizeof(*args));
2070 		return 0;
2071 	}
2072 
2073 	values = vmalloc(bufsize);
2074 	if (!values)
2075 		return -ENOMEM;
2076 
2077 	down_read(&kvm->mm->mmap_sem);
2078 	srcu_idx = srcu_read_lock(&kvm->srcu);
2079 	if (peek)
2080 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2081 	else
2082 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2083 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2084 	up_read(&kvm->mm->mmap_sem);
2085 
2086 	if (kvm->arch.migration_mode)
2087 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2088 	else
2089 		args->remaining = 0;
2090 
2091 	if (copy_to_user((void __user *)args->values, values, args->count))
2092 		ret = -EFAULT;
2093 
2094 	vfree(values);
2095 	return ret;
2096 }
2097 
2098 /*
2099  * This function sets the CMMA attributes for the given pages. If the input
2100  * buffer has zero length, no action is taken, otherwise the attributes are
2101  * set and the mm->context.uses_cmm flag is set.
2102  */
2103 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2104 				  const struct kvm_s390_cmma_log *args)
2105 {
2106 	unsigned long hva, mask, pgstev, i;
2107 	uint8_t *bits;
2108 	int srcu_idx, r = 0;
2109 
2110 	mask = args->mask;
2111 
2112 	if (!kvm->arch.use_cmma)
2113 		return -ENXIO;
2114 	/* invalid/unsupported flags */
2115 	if (args->flags != 0)
2116 		return -EINVAL;
2117 	/* Enforce sane limit on memory allocation */
2118 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2119 		return -EINVAL;
2120 	/* Nothing to do */
2121 	if (args->count == 0)
2122 		return 0;
2123 
2124 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2125 	if (!bits)
2126 		return -ENOMEM;
2127 
2128 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2129 	if (r) {
2130 		r = -EFAULT;
2131 		goto out;
2132 	}
2133 
2134 	down_read(&kvm->mm->mmap_sem);
2135 	srcu_idx = srcu_read_lock(&kvm->srcu);
2136 	for (i = 0; i < args->count; i++) {
2137 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2138 		if (kvm_is_error_hva(hva)) {
2139 			r = -EFAULT;
2140 			break;
2141 		}
2142 
2143 		pgstev = bits[i];
2144 		pgstev = pgstev << 24;
2145 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2146 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2147 	}
2148 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2149 	up_read(&kvm->mm->mmap_sem);
2150 
2151 	if (!kvm->mm->context.uses_cmm) {
2152 		down_write(&kvm->mm->mmap_sem);
2153 		kvm->mm->context.uses_cmm = 1;
2154 		up_write(&kvm->mm->mmap_sem);
2155 	}
2156 out:
2157 	vfree(bits);
2158 	return r;
2159 }
2160 
2161 long kvm_arch_vm_ioctl(struct file *filp,
2162 		       unsigned int ioctl, unsigned long arg)
2163 {
2164 	struct kvm *kvm = filp->private_data;
2165 	void __user *argp = (void __user *)arg;
2166 	struct kvm_device_attr attr;
2167 	int r;
2168 
2169 	switch (ioctl) {
2170 	case KVM_S390_INTERRUPT: {
2171 		struct kvm_s390_interrupt s390int;
2172 
2173 		r = -EFAULT;
2174 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2175 			break;
2176 		r = kvm_s390_inject_vm(kvm, &s390int);
2177 		break;
2178 	}
2179 	case KVM_CREATE_IRQCHIP: {
2180 		struct kvm_irq_routing_entry routing;
2181 
2182 		r = -EINVAL;
2183 		if (kvm->arch.use_irqchip) {
2184 			/* Set up dummy routing. */
2185 			memset(&routing, 0, sizeof(routing));
2186 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2187 		}
2188 		break;
2189 	}
2190 	case KVM_SET_DEVICE_ATTR: {
2191 		r = -EFAULT;
2192 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2193 			break;
2194 		r = kvm_s390_vm_set_attr(kvm, &attr);
2195 		break;
2196 	}
2197 	case KVM_GET_DEVICE_ATTR: {
2198 		r = -EFAULT;
2199 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2200 			break;
2201 		r = kvm_s390_vm_get_attr(kvm, &attr);
2202 		break;
2203 	}
2204 	case KVM_HAS_DEVICE_ATTR: {
2205 		r = -EFAULT;
2206 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2207 			break;
2208 		r = kvm_s390_vm_has_attr(kvm, &attr);
2209 		break;
2210 	}
2211 	case KVM_S390_GET_SKEYS: {
2212 		struct kvm_s390_skeys args;
2213 
2214 		r = -EFAULT;
2215 		if (copy_from_user(&args, argp,
2216 				   sizeof(struct kvm_s390_skeys)))
2217 			break;
2218 		r = kvm_s390_get_skeys(kvm, &args);
2219 		break;
2220 	}
2221 	case KVM_S390_SET_SKEYS: {
2222 		struct kvm_s390_skeys args;
2223 
2224 		r = -EFAULT;
2225 		if (copy_from_user(&args, argp,
2226 				   sizeof(struct kvm_s390_skeys)))
2227 			break;
2228 		r = kvm_s390_set_skeys(kvm, &args);
2229 		break;
2230 	}
2231 	case KVM_S390_GET_CMMA_BITS: {
2232 		struct kvm_s390_cmma_log args;
2233 
2234 		r = -EFAULT;
2235 		if (copy_from_user(&args, argp, sizeof(args)))
2236 			break;
2237 		mutex_lock(&kvm->slots_lock);
2238 		r = kvm_s390_get_cmma_bits(kvm, &args);
2239 		mutex_unlock(&kvm->slots_lock);
2240 		if (!r) {
2241 			r = copy_to_user(argp, &args, sizeof(args));
2242 			if (r)
2243 				r = -EFAULT;
2244 		}
2245 		break;
2246 	}
2247 	case KVM_S390_SET_CMMA_BITS: {
2248 		struct kvm_s390_cmma_log args;
2249 
2250 		r = -EFAULT;
2251 		if (copy_from_user(&args, argp, sizeof(args)))
2252 			break;
2253 		mutex_lock(&kvm->slots_lock);
2254 		r = kvm_s390_set_cmma_bits(kvm, &args);
2255 		mutex_unlock(&kvm->slots_lock);
2256 		break;
2257 	}
2258 	default:
2259 		r = -ENOTTY;
2260 	}
2261 
2262 	return r;
2263 }
2264 
2265 static int kvm_s390_apxa_installed(void)
2266 {
2267 	struct ap_config_info info;
2268 
2269 	if (ap_instructions_available()) {
2270 		if (ap_qci(&info) == 0)
2271 			return info.apxa;
2272 	}
2273 
2274 	return 0;
2275 }
2276 
2277 /*
2278  * The format of the crypto control block (CRYCB) is specified in the 3 low
2279  * order bits of the CRYCB designation (CRYCBD) field as follows:
2280  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2281  *	     AP extended addressing (APXA) facility are installed.
2282  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2283  * Format 2: Both the APXA and MSAX3 facilities are installed
2284  */
2285 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2286 {
2287 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2288 
2289 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2290 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2291 
2292 	/* Check whether MSAX3 is installed */
2293 	if (!test_kvm_facility(kvm, 76))
2294 		return;
2295 
2296 	if (kvm_s390_apxa_installed())
2297 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2298 	else
2299 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2300 }
2301 
2302 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2303 			       unsigned long *aqm, unsigned long *adm)
2304 {
2305 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2306 
2307 	mutex_lock(&kvm->lock);
2308 	kvm_s390_vcpu_block_all(kvm);
2309 
2310 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2311 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2312 		memcpy(crycb->apcb1.apm, apm, 32);
2313 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2314 			 apm[0], apm[1], apm[2], apm[3]);
2315 		memcpy(crycb->apcb1.aqm, aqm, 32);
2316 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2317 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2318 		memcpy(crycb->apcb1.adm, adm, 32);
2319 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2320 			 adm[0], adm[1], adm[2], adm[3]);
2321 		break;
2322 	case CRYCB_FORMAT1:
2323 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2324 		memcpy(crycb->apcb0.apm, apm, 8);
2325 		memcpy(crycb->apcb0.aqm, aqm, 2);
2326 		memcpy(crycb->apcb0.adm, adm, 2);
2327 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2328 			 apm[0], *((unsigned short *)aqm),
2329 			 *((unsigned short *)adm));
2330 		break;
2331 	default:	/* Can not happen */
2332 		break;
2333 	}
2334 
2335 	/* recreate the shadow crycb for each vcpu */
2336 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2337 	kvm_s390_vcpu_unblock_all(kvm);
2338 	mutex_unlock(&kvm->lock);
2339 }
2340 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2341 
2342 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2343 {
2344 	mutex_lock(&kvm->lock);
2345 	kvm_s390_vcpu_block_all(kvm);
2346 
2347 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2348 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2349 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2350 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2351 
2352 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2353 	/* recreate the shadow crycb for each vcpu */
2354 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2355 	kvm_s390_vcpu_unblock_all(kvm);
2356 	mutex_unlock(&kvm->lock);
2357 }
2358 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2359 
2360 static u64 kvm_s390_get_initial_cpuid(void)
2361 {
2362 	struct cpuid cpuid;
2363 
2364 	get_cpu_id(&cpuid);
2365 	cpuid.version = 0xff;
2366 	return *((u64 *) &cpuid);
2367 }
2368 
2369 static void kvm_s390_crypto_init(struct kvm *kvm)
2370 {
2371 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2372 	kvm_s390_set_crycb_format(kvm);
2373 
2374 	if (!test_kvm_facility(kvm, 76))
2375 		return;
2376 
2377 	/* Enable AES/DEA protected key functions by default */
2378 	kvm->arch.crypto.aes_kw = 1;
2379 	kvm->arch.crypto.dea_kw = 1;
2380 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2381 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2382 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2383 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2384 }
2385 
2386 static void sca_dispose(struct kvm *kvm)
2387 {
2388 	if (kvm->arch.use_esca)
2389 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2390 	else
2391 		free_page((unsigned long)(kvm->arch.sca));
2392 	kvm->arch.sca = NULL;
2393 }
2394 
2395 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2396 {
2397 	gfp_t alloc_flags = GFP_KERNEL;
2398 	int i, rc;
2399 	char debug_name[16];
2400 	static unsigned long sca_offset;
2401 
2402 	rc = -EINVAL;
2403 #ifdef CONFIG_KVM_S390_UCONTROL
2404 	if (type & ~KVM_VM_S390_UCONTROL)
2405 		goto out_err;
2406 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2407 		goto out_err;
2408 #else
2409 	if (type)
2410 		goto out_err;
2411 #endif
2412 
2413 	rc = s390_enable_sie();
2414 	if (rc)
2415 		goto out_err;
2416 
2417 	rc = -ENOMEM;
2418 
2419 	if (!sclp.has_64bscao)
2420 		alloc_flags |= GFP_DMA;
2421 	rwlock_init(&kvm->arch.sca_lock);
2422 	/* start with basic SCA */
2423 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2424 	if (!kvm->arch.sca)
2425 		goto out_err;
2426 	mutex_lock(&kvm_lock);
2427 	sca_offset += 16;
2428 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2429 		sca_offset = 0;
2430 	kvm->arch.sca = (struct bsca_block *)
2431 			((char *) kvm->arch.sca + sca_offset);
2432 	mutex_unlock(&kvm_lock);
2433 
2434 	sprintf(debug_name, "kvm-%u", current->pid);
2435 
2436 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2437 	if (!kvm->arch.dbf)
2438 		goto out_err;
2439 
2440 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2441 	kvm->arch.sie_page2 =
2442 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2443 	if (!kvm->arch.sie_page2)
2444 		goto out_err;
2445 
2446 	kvm->arch.sie_page2->kvm = kvm;
2447 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2448 
2449 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2450 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2451 					      (kvm_s390_fac_base[i] |
2452 					       kvm_s390_fac_ext[i]);
2453 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2454 					      kvm_s390_fac_base[i];
2455 	}
2456 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2457 
2458 	/* we are always in czam mode - even on pre z14 machines */
2459 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2460 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2461 	/* we emulate STHYI in kvm */
2462 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2463 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2464 	if (MACHINE_HAS_TLB_GUEST) {
2465 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2466 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2467 	}
2468 
2469 	if (css_general_characteristics.aiv && test_facility(65))
2470 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2471 
2472 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2473 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2474 
2475 	kvm_s390_crypto_init(kvm);
2476 
2477 	mutex_init(&kvm->arch.float_int.ais_lock);
2478 	spin_lock_init(&kvm->arch.float_int.lock);
2479 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2480 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2481 	init_waitqueue_head(&kvm->arch.ipte_wq);
2482 	mutex_init(&kvm->arch.ipte_mutex);
2483 
2484 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2485 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2486 
2487 	if (type & KVM_VM_S390_UCONTROL) {
2488 		kvm->arch.gmap = NULL;
2489 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2490 	} else {
2491 		if (sclp.hamax == U64_MAX)
2492 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2493 		else
2494 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2495 						    sclp.hamax + 1);
2496 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2497 		if (!kvm->arch.gmap)
2498 			goto out_err;
2499 		kvm->arch.gmap->private = kvm;
2500 		kvm->arch.gmap->pfault_enabled = 0;
2501 	}
2502 
2503 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2504 	kvm->arch.use_skf = sclp.has_skey;
2505 	spin_lock_init(&kvm->arch.start_stop_lock);
2506 	kvm_s390_vsie_init(kvm);
2507 	kvm_s390_gisa_init(kvm);
2508 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2509 
2510 	return 0;
2511 out_err:
2512 	free_page((unsigned long)kvm->arch.sie_page2);
2513 	debug_unregister(kvm->arch.dbf);
2514 	sca_dispose(kvm);
2515 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2516 	return rc;
2517 }
2518 
2519 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2520 {
2521 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2522 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2523 	kvm_s390_clear_local_irqs(vcpu);
2524 	kvm_clear_async_pf_completion_queue(vcpu);
2525 	if (!kvm_is_ucontrol(vcpu->kvm))
2526 		sca_del_vcpu(vcpu);
2527 
2528 	if (kvm_is_ucontrol(vcpu->kvm))
2529 		gmap_remove(vcpu->arch.gmap);
2530 
2531 	if (vcpu->kvm->arch.use_cmma)
2532 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2533 	free_page((unsigned long)(vcpu->arch.sie_block));
2534 
2535 	kvm_vcpu_uninit(vcpu);
2536 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2537 }
2538 
2539 static void kvm_free_vcpus(struct kvm *kvm)
2540 {
2541 	unsigned int i;
2542 	struct kvm_vcpu *vcpu;
2543 
2544 	kvm_for_each_vcpu(i, vcpu, kvm)
2545 		kvm_arch_vcpu_destroy(vcpu);
2546 
2547 	mutex_lock(&kvm->lock);
2548 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2549 		kvm->vcpus[i] = NULL;
2550 
2551 	atomic_set(&kvm->online_vcpus, 0);
2552 	mutex_unlock(&kvm->lock);
2553 }
2554 
2555 void kvm_arch_destroy_vm(struct kvm *kvm)
2556 {
2557 	kvm_free_vcpus(kvm);
2558 	sca_dispose(kvm);
2559 	debug_unregister(kvm->arch.dbf);
2560 	kvm_s390_gisa_destroy(kvm);
2561 	free_page((unsigned long)kvm->arch.sie_page2);
2562 	if (!kvm_is_ucontrol(kvm))
2563 		gmap_remove(kvm->arch.gmap);
2564 	kvm_s390_destroy_adapters(kvm);
2565 	kvm_s390_clear_float_irqs(kvm);
2566 	kvm_s390_vsie_destroy(kvm);
2567 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2568 }
2569 
2570 /* Section: vcpu related */
2571 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2572 {
2573 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2574 	if (!vcpu->arch.gmap)
2575 		return -ENOMEM;
2576 	vcpu->arch.gmap->private = vcpu->kvm;
2577 
2578 	return 0;
2579 }
2580 
2581 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2582 {
2583 	if (!kvm_s390_use_sca_entries())
2584 		return;
2585 	read_lock(&vcpu->kvm->arch.sca_lock);
2586 	if (vcpu->kvm->arch.use_esca) {
2587 		struct esca_block *sca = vcpu->kvm->arch.sca;
2588 
2589 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2590 		sca->cpu[vcpu->vcpu_id].sda = 0;
2591 	} else {
2592 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2593 
2594 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2595 		sca->cpu[vcpu->vcpu_id].sda = 0;
2596 	}
2597 	read_unlock(&vcpu->kvm->arch.sca_lock);
2598 }
2599 
2600 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2601 {
2602 	if (!kvm_s390_use_sca_entries()) {
2603 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2604 
2605 		/* we still need the basic sca for the ipte control */
2606 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2607 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2608 		return;
2609 	}
2610 	read_lock(&vcpu->kvm->arch.sca_lock);
2611 	if (vcpu->kvm->arch.use_esca) {
2612 		struct esca_block *sca = vcpu->kvm->arch.sca;
2613 
2614 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2615 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2616 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2617 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2618 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2619 	} else {
2620 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2621 
2622 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2623 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2624 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2625 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2626 	}
2627 	read_unlock(&vcpu->kvm->arch.sca_lock);
2628 }
2629 
2630 /* Basic SCA to Extended SCA data copy routines */
2631 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2632 {
2633 	d->sda = s->sda;
2634 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2635 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2636 }
2637 
2638 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2639 {
2640 	int i;
2641 
2642 	d->ipte_control = s->ipte_control;
2643 	d->mcn[0] = s->mcn;
2644 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2645 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2646 }
2647 
2648 static int sca_switch_to_extended(struct kvm *kvm)
2649 {
2650 	struct bsca_block *old_sca = kvm->arch.sca;
2651 	struct esca_block *new_sca;
2652 	struct kvm_vcpu *vcpu;
2653 	unsigned int vcpu_idx;
2654 	u32 scaol, scaoh;
2655 
2656 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2657 	if (!new_sca)
2658 		return -ENOMEM;
2659 
2660 	scaoh = (u32)((u64)(new_sca) >> 32);
2661 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2662 
2663 	kvm_s390_vcpu_block_all(kvm);
2664 	write_lock(&kvm->arch.sca_lock);
2665 
2666 	sca_copy_b_to_e(new_sca, old_sca);
2667 
2668 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2669 		vcpu->arch.sie_block->scaoh = scaoh;
2670 		vcpu->arch.sie_block->scaol = scaol;
2671 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2672 	}
2673 	kvm->arch.sca = new_sca;
2674 	kvm->arch.use_esca = 1;
2675 
2676 	write_unlock(&kvm->arch.sca_lock);
2677 	kvm_s390_vcpu_unblock_all(kvm);
2678 
2679 	free_page((unsigned long)old_sca);
2680 
2681 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2682 		 old_sca, kvm->arch.sca);
2683 	return 0;
2684 }
2685 
2686 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2687 {
2688 	int rc;
2689 
2690 	if (!kvm_s390_use_sca_entries()) {
2691 		if (id < KVM_MAX_VCPUS)
2692 			return true;
2693 		return false;
2694 	}
2695 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2696 		return true;
2697 	if (!sclp.has_esca || !sclp.has_64bscao)
2698 		return false;
2699 
2700 	mutex_lock(&kvm->lock);
2701 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2702 	mutex_unlock(&kvm->lock);
2703 
2704 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2705 }
2706 
2707 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2708 {
2709 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2710 	kvm_clear_async_pf_completion_queue(vcpu);
2711 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2712 				    KVM_SYNC_GPRS |
2713 				    KVM_SYNC_ACRS |
2714 				    KVM_SYNC_CRS |
2715 				    KVM_SYNC_ARCH0 |
2716 				    KVM_SYNC_PFAULT;
2717 	kvm_s390_set_prefix(vcpu, 0);
2718 	if (test_kvm_facility(vcpu->kvm, 64))
2719 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2720 	if (test_kvm_facility(vcpu->kvm, 82))
2721 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2722 	if (test_kvm_facility(vcpu->kvm, 133))
2723 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2724 	if (test_kvm_facility(vcpu->kvm, 156))
2725 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2726 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2727 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2728 	 */
2729 	if (MACHINE_HAS_VX)
2730 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2731 	else
2732 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2733 
2734 	if (kvm_is_ucontrol(vcpu->kvm))
2735 		return __kvm_ucontrol_vcpu_init(vcpu);
2736 
2737 	return 0;
2738 }
2739 
2740 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2741 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2742 {
2743 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2744 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2745 	vcpu->arch.cputm_start = get_tod_clock_fast();
2746 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2747 }
2748 
2749 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2750 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2751 {
2752 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2753 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2754 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2755 	vcpu->arch.cputm_start = 0;
2756 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2757 }
2758 
2759 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2760 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2761 {
2762 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2763 	vcpu->arch.cputm_enabled = true;
2764 	__start_cpu_timer_accounting(vcpu);
2765 }
2766 
2767 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2768 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2769 {
2770 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2771 	__stop_cpu_timer_accounting(vcpu);
2772 	vcpu->arch.cputm_enabled = false;
2773 }
2774 
2775 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2776 {
2777 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2778 	__enable_cpu_timer_accounting(vcpu);
2779 	preempt_enable();
2780 }
2781 
2782 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2783 {
2784 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2785 	__disable_cpu_timer_accounting(vcpu);
2786 	preempt_enable();
2787 }
2788 
2789 /* set the cpu timer - may only be called from the VCPU thread itself */
2790 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2791 {
2792 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2793 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2794 	if (vcpu->arch.cputm_enabled)
2795 		vcpu->arch.cputm_start = get_tod_clock_fast();
2796 	vcpu->arch.sie_block->cputm = cputm;
2797 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2798 	preempt_enable();
2799 }
2800 
2801 /* update and get the cpu timer - can also be called from other VCPU threads */
2802 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2803 {
2804 	unsigned int seq;
2805 	__u64 value;
2806 
2807 	if (unlikely(!vcpu->arch.cputm_enabled))
2808 		return vcpu->arch.sie_block->cputm;
2809 
2810 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2811 	do {
2812 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2813 		/*
2814 		 * If the writer would ever execute a read in the critical
2815 		 * section, e.g. in irq context, we have a deadlock.
2816 		 */
2817 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2818 		value = vcpu->arch.sie_block->cputm;
2819 		/* if cputm_start is 0, accounting is being started/stopped */
2820 		if (likely(vcpu->arch.cputm_start))
2821 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2822 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2823 	preempt_enable();
2824 	return value;
2825 }
2826 
2827 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2828 {
2829 
2830 	gmap_enable(vcpu->arch.enabled_gmap);
2831 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2832 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2833 		__start_cpu_timer_accounting(vcpu);
2834 	vcpu->cpu = cpu;
2835 }
2836 
2837 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2838 {
2839 	vcpu->cpu = -1;
2840 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2841 		__stop_cpu_timer_accounting(vcpu);
2842 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2843 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2844 	gmap_disable(vcpu->arch.enabled_gmap);
2845 
2846 }
2847 
2848 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2849 {
2850 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2851 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2852 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2853 	kvm_s390_set_prefix(vcpu, 0);
2854 	kvm_s390_set_cpu_timer(vcpu, 0);
2855 	vcpu->arch.sie_block->ckc       = 0UL;
2856 	vcpu->arch.sie_block->todpr     = 0;
2857 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2858 	vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2859 					CR0_INTERRUPT_KEY_SUBMASK |
2860 					CR0_MEASUREMENT_ALERT_SUBMASK;
2861 	vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2862 					CR14_UNUSED_33 |
2863 					CR14_EXTERNAL_DAMAGE_SUBMASK;
2864 	/* make sure the new fpc will be lazily loaded */
2865 	save_fpu_regs();
2866 	current->thread.fpu.fpc = 0;
2867 	vcpu->arch.sie_block->gbea = 1;
2868 	vcpu->arch.sie_block->pp = 0;
2869 	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2870 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2871 	kvm_clear_async_pf_completion_queue(vcpu);
2872 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2873 		kvm_s390_vcpu_stop(vcpu);
2874 	kvm_s390_clear_local_irqs(vcpu);
2875 }
2876 
2877 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2878 {
2879 	mutex_lock(&vcpu->kvm->lock);
2880 	preempt_disable();
2881 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2882 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2883 	preempt_enable();
2884 	mutex_unlock(&vcpu->kvm->lock);
2885 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2886 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2887 		sca_add_vcpu(vcpu);
2888 	}
2889 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2890 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2891 	/* make vcpu_load load the right gmap on the first trigger */
2892 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2893 }
2894 
2895 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2896 {
2897 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2898 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2899 		return true;
2900 	return false;
2901 }
2902 
2903 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2904 {
2905 	/* At least one ECC subfunction must be present */
2906 	return kvm_has_pckmo_subfunc(kvm, 32) ||
2907 	       kvm_has_pckmo_subfunc(kvm, 33) ||
2908 	       kvm_has_pckmo_subfunc(kvm, 34) ||
2909 	       kvm_has_pckmo_subfunc(kvm, 40) ||
2910 	       kvm_has_pckmo_subfunc(kvm, 41);
2911 
2912 }
2913 
2914 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2915 {
2916 	/*
2917 	 * If the AP instructions are not being interpreted and the MSAX3
2918 	 * facility is not configured for the guest, there is nothing to set up.
2919 	 */
2920 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2921 		return;
2922 
2923 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2924 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2925 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
2926 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2927 
2928 	if (vcpu->kvm->arch.crypto.apie)
2929 		vcpu->arch.sie_block->eca |= ECA_APIE;
2930 
2931 	/* Set up protected key support */
2932 	if (vcpu->kvm->arch.crypto.aes_kw) {
2933 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2934 		/* ecc is also wrapped with AES key */
2935 		if (kvm_has_pckmo_ecc(vcpu->kvm))
2936 			vcpu->arch.sie_block->ecd |= ECD_ECC;
2937 	}
2938 
2939 	if (vcpu->kvm->arch.crypto.dea_kw)
2940 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2941 }
2942 
2943 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2944 {
2945 	free_page(vcpu->arch.sie_block->cbrlo);
2946 	vcpu->arch.sie_block->cbrlo = 0;
2947 }
2948 
2949 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2950 {
2951 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2952 	if (!vcpu->arch.sie_block->cbrlo)
2953 		return -ENOMEM;
2954 	return 0;
2955 }
2956 
2957 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2958 {
2959 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2960 
2961 	vcpu->arch.sie_block->ibc = model->ibc;
2962 	if (test_kvm_facility(vcpu->kvm, 7))
2963 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2964 }
2965 
2966 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2967 {
2968 	int rc = 0;
2969 
2970 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2971 						    CPUSTAT_SM |
2972 						    CPUSTAT_STOPPED);
2973 
2974 	if (test_kvm_facility(vcpu->kvm, 78))
2975 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2976 	else if (test_kvm_facility(vcpu->kvm, 8))
2977 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2978 
2979 	kvm_s390_vcpu_setup_model(vcpu);
2980 
2981 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2982 	if (MACHINE_HAS_ESOP)
2983 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2984 	if (test_kvm_facility(vcpu->kvm, 9))
2985 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2986 	if (test_kvm_facility(vcpu->kvm, 73))
2987 		vcpu->arch.sie_block->ecb |= ECB_TE;
2988 
2989 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2990 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2991 	if (test_kvm_facility(vcpu->kvm, 130))
2992 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2993 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2994 	if (sclp.has_cei)
2995 		vcpu->arch.sie_block->eca |= ECA_CEI;
2996 	if (sclp.has_ib)
2997 		vcpu->arch.sie_block->eca |= ECA_IB;
2998 	if (sclp.has_siif)
2999 		vcpu->arch.sie_block->eca |= ECA_SII;
3000 	if (sclp.has_sigpif)
3001 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3002 	if (test_kvm_facility(vcpu->kvm, 129)) {
3003 		vcpu->arch.sie_block->eca |= ECA_VX;
3004 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3005 	}
3006 	if (test_kvm_facility(vcpu->kvm, 139))
3007 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3008 	if (test_kvm_facility(vcpu->kvm, 156))
3009 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3010 	if (vcpu->arch.sie_block->gd) {
3011 		vcpu->arch.sie_block->eca |= ECA_AIV;
3012 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3013 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3014 	}
3015 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3016 					| SDNXC;
3017 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3018 
3019 	if (sclp.has_kss)
3020 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3021 	else
3022 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3023 
3024 	if (vcpu->kvm->arch.use_cmma) {
3025 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3026 		if (rc)
3027 			return rc;
3028 	}
3029 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3030 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3031 
3032 	vcpu->arch.sie_block->hpid = HPID_KVM;
3033 
3034 	kvm_s390_vcpu_crypto_setup(vcpu);
3035 
3036 	return rc;
3037 }
3038 
3039 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
3040 				      unsigned int id)
3041 {
3042 	struct kvm_vcpu *vcpu;
3043 	struct sie_page *sie_page;
3044 	int rc = -EINVAL;
3045 
3046 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3047 		goto out;
3048 
3049 	rc = -ENOMEM;
3050 
3051 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
3052 	if (!vcpu)
3053 		goto out;
3054 
3055 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3056 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3057 	if (!sie_page)
3058 		goto out_free_cpu;
3059 
3060 	vcpu->arch.sie_block = &sie_page->sie_block;
3061 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3062 
3063 	/* the real guest size will always be smaller than msl */
3064 	vcpu->arch.sie_block->mso = 0;
3065 	vcpu->arch.sie_block->msl = sclp.hamax;
3066 
3067 	vcpu->arch.sie_block->icpua = id;
3068 	spin_lock_init(&vcpu->arch.local_int.lock);
3069 	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
3070 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3071 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3072 	seqcount_init(&vcpu->arch.cputm_seqcount);
3073 
3074 	rc = kvm_vcpu_init(vcpu, kvm, id);
3075 	if (rc)
3076 		goto out_free_sie_block;
3077 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
3078 		 vcpu->arch.sie_block);
3079 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
3080 
3081 	return vcpu;
3082 out_free_sie_block:
3083 	free_page((unsigned long)(vcpu->arch.sie_block));
3084 out_free_cpu:
3085 	kmem_cache_free(kvm_vcpu_cache, vcpu);
3086 out:
3087 	return ERR_PTR(rc);
3088 }
3089 
3090 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3091 {
3092 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3093 }
3094 
3095 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3096 {
3097 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3098 }
3099 
3100 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3101 {
3102 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3103 	exit_sie(vcpu);
3104 }
3105 
3106 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3107 {
3108 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3109 }
3110 
3111 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3112 {
3113 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3114 	exit_sie(vcpu);
3115 }
3116 
3117 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3118 {
3119 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3120 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3121 }
3122 
3123 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3124 {
3125 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3126 }
3127 
3128 /*
3129  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3130  * If the CPU is not running (e.g. waiting as idle) the function will
3131  * return immediately. */
3132 void exit_sie(struct kvm_vcpu *vcpu)
3133 {
3134 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3135 	kvm_s390_vsie_kick(vcpu);
3136 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3137 		cpu_relax();
3138 }
3139 
3140 /* Kick a guest cpu out of SIE to process a request synchronously */
3141 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3142 {
3143 	kvm_make_request(req, vcpu);
3144 	kvm_s390_vcpu_request(vcpu);
3145 }
3146 
3147 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3148 			      unsigned long end)
3149 {
3150 	struct kvm *kvm = gmap->private;
3151 	struct kvm_vcpu *vcpu;
3152 	unsigned long prefix;
3153 	int i;
3154 
3155 	if (gmap_is_shadow(gmap))
3156 		return;
3157 	if (start >= 1UL << 31)
3158 		/* We are only interested in prefix pages */
3159 		return;
3160 	kvm_for_each_vcpu(i, vcpu, kvm) {
3161 		/* match against both prefix pages */
3162 		prefix = kvm_s390_get_prefix(vcpu);
3163 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3164 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3165 				   start, end);
3166 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3167 		}
3168 	}
3169 }
3170 
3171 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3172 {
3173 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3174 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3175 	    halt_poll_max_steal) {
3176 		vcpu->stat.halt_no_poll_steal++;
3177 		return true;
3178 	}
3179 	return false;
3180 }
3181 
3182 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3183 {
3184 	/* kvm common code refers to this, but never calls it */
3185 	BUG();
3186 	return 0;
3187 }
3188 
3189 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3190 					   struct kvm_one_reg *reg)
3191 {
3192 	int r = -EINVAL;
3193 
3194 	switch (reg->id) {
3195 	case KVM_REG_S390_TODPR:
3196 		r = put_user(vcpu->arch.sie_block->todpr,
3197 			     (u32 __user *)reg->addr);
3198 		break;
3199 	case KVM_REG_S390_EPOCHDIFF:
3200 		r = put_user(vcpu->arch.sie_block->epoch,
3201 			     (u64 __user *)reg->addr);
3202 		break;
3203 	case KVM_REG_S390_CPU_TIMER:
3204 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3205 			     (u64 __user *)reg->addr);
3206 		break;
3207 	case KVM_REG_S390_CLOCK_COMP:
3208 		r = put_user(vcpu->arch.sie_block->ckc,
3209 			     (u64 __user *)reg->addr);
3210 		break;
3211 	case KVM_REG_S390_PFTOKEN:
3212 		r = put_user(vcpu->arch.pfault_token,
3213 			     (u64 __user *)reg->addr);
3214 		break;
3215 	case KVM_REG_S390_PFCOMPARE:
3216 		r = put_user(vcpu->arch.pfault_compare,
3217 			     (u64 __user *)reg->addr);
3218 		break;
3219 	case KVM_REG_S390_PFSELECT:
3220 		r = put_user(vcpu->arch.pfault_select,
3221 			     (u64 __user *)reg->addr);
3222 		break;
3223 	case KVM_REG_S390_PP:
3224 		r = put_user(vcpu->arch.sie_block->pp,
3225 			     (u64 __user *)reg->addr);
3226 		break;
3227 	case KVM_REG_S390_GBEA:
3228 		r = put_user(vcpu->arch.sie_block->gbea,
3229 			     (u64 __user *)reg->addr);
3230 		break;
3231 	default:
3232 		break;
3233 	}
3234 
3235 	return r;
3236 }
3237 
3238 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3239 					   struct kvm_one_reg *reg)
3240 {
3241 	int r = -EINVAL;
3242 	__u64 val;
3243 
3244 	switch (reg->id) {
3245 	case KVM_REG_S390_TODPR:
3246 		r = get_user(vcpu->arch.sie_block->todpr,
3247 			     (u32 __user *)reg->addr);
3248 		break;
3249 	case KVM_REG_S390_EPOCHDIFF:
3250 		r = get_user(vcpu->arch.sie_block->epoch,
3251 			     (u64 __user *)reg->addr);
3252 		break;
3253 	case KVM_REG_S390_CPU_TIMER:
3254 		r = get_user(val, (u64 __user *)reg->addr);
3255 		if (!r)
3256 			kvm_s390_set_cpu_timer(vcpu, val);
3257 		break;
3258 	case KVM_REG_S390_CLOCK_COMP:
3259 		r = get_user(vcpu->arch.sie_block->ckc,
3260 			     (u64 __user *)reg->addr);
3261 		break;
3262 	case KVM_REG_S390_PFTOKEN:
3263 		r = get_user(vcpu->arch.pfault_token,
3264 			     (u64 __user *)reg->addr);
3265 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3266 			kvm_clear_async_pf_completion_queue(vcpu);
3267 		break;
3268 	case KVM_REG_S390_PFCOMPARE:
3269 		r = get_user(vcpu->arch.pfault_compare,
3270 			     (u64 __user *)reg->addr);
3271 		break;
3272 	case KVM_REG_S390_PFSELECT:
3273 		r = get_user(vcpu->arch.pfault_select,
3274 			     (u64 __user *)reg->addr);
3275 		break;
3276 	case KVM_REG_S390_PP:
3277 		r = get_user(vcpu->arch.sie_block->pp,
3278 			     (u64 __user *)reg->addr);
3279 		break;
3280 	case KVM_REG_S390_GBEA:
3281 		r = get_user(vcpu->arch.sie_block->gbea,
3282 			     (u64 __user *)reg->addr);
3283 		break;
3284 	default:
3285 		break;
3286 	}
3287 
3288 	return r;
3289 }
3290 
3291 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3292 {
3293 	kvm_s390_vcpu_initial_reset(vcpu);
3294 	return 0;
3295 }
3296 
3297 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3298 {
3299 	vcpu_load(vcpu);
3300 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3301 	vcpu_put(vcpu);
3302 	return 0;
3303 }
3304 
3305 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3306 {
3307 	vcpu_load(vcpu);
3308 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3309 	vcpu_put(vcpu);
3310 	return 0;
3311 }
3312 
3313 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3314 				  struct kvm_sregs *sregs)
3315 {
3316 	vcpu_load(vcpu);
3317 
3318 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3319 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3320 
3321 	vcpu_put(vcpu);
3322 	return 0;
3323 }
3324 
3325 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3326 				  struct kvm_sregs *sregs)
3327 {
3328 	vcpu_load(vcpu);
3329 
3330 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3331 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3332 
3333 	vcpu_put(vcpu);
3334 	return 0;
3335 }
3336 
3337 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3338 {
3339 	int ret = 0;
3340 
3341 	vcpu_load(vcpu);
3342 
3343 	if (test_fp_ctl(fpu->fpc)) {
3344 		ret = -EINVAL;
3345 		goto out;
3346 	}
3347 	vcpu->run->s.regs.fpc = fpu->fpc;
3348 	if (MACHINE_HAS_VX)
3349 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3350 				 (freg_t *) fpu->fprs);
3351 	else
3352 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3353 
3354 out:
3355 	vcpu_put(vcpu);
3356 	return ret;
3357 }
3358 
3359 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3360 {
3361 	vcpu_load(vcpu);
3362 
3363 	/* make sure we have the latest values */
3364 	save_fpu_regs();
3365 	if (MACHINE_HAS_VX)
3366 		convert_vx_to_fp((freg_t *) fpu->fprs,
3367 				 (__vector128 *) vcpu->run->s.regs.vrs);
3368 	else
3369 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3370 	fpu->fpc = vcpu->run->s.regs.fpc;
3371 
3372 	vcpu_put(vcpu);
3373 	return 0;
3374 }
3375 
3376 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3377 {
3378 	int rc = 0;
3379 
3380 	if (!is_vcpu_stopped(vcpu))
3381 		rc = -EBUSY;
3382 	else {
3383 		vcpu->run->psw_mask = psw.mask;
3384 		vcpu->run->psw_addr = psw.addr;
3385 	}
3386 	return rc;
3387 }
3388 
3389 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3390 				  struct kvm_translation *tr)
3391 {
3392 	return -EINVAL; /* not implemented yet */
3393 }
3394 
3395 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3396 			      KVM_GUESTDBG_USE_HW_BP | \
3397 			      KVM_GUESTDBG_ENABLE)
3398 
3399 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3400 					struct kvm_guest_debug *dbg)
3401 {
3402 	int rc = 0;
3403 
3404 	vcpu_load(vcpu);
3405 
3406 	vcpu->guest_debug = 0;
3407 	kvm_s390_clear_bp_data(vcpu);
3408 
3409 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3410 		rc = -EINVAL;
3411 		goto out;
3412 	}
3413 	if (!sclp.has_gpere) {
3414 		rc = -EINVAL;
3415 		goto out;
3416 	}
3417 
3418 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3419 		vcpu->guest_debug = dbg->control;
3420 		/* enforce guest PER */
3421 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3422 
3423 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3424 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3425 	} else {
3426 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3427 		vcpu->arch.guestdbg.last_bp = 0;
3428 	}
3429 
3430 	if (rc) {
3431 		vcpu->guest_debug = 0;
3432 		kvm_s390_clear_bp_data(vcpu);
3433 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3434 	}
3435 
3436 out:
3437 	vcpu_put(vcpu);
3438 	return rc;
3439 }
3440 
3441 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3442 				    struct kvm_mp_state *mp_state)
3443 {
3444 	int ret;
3445 
3446 	vcpu_load(vcpu);
3447 
3448 	/* CHECK_STOP and LOAD are not supported yet */
3449 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3450 				      KVM_MP_STATE_OPERATING;
3451 
3452 	vcpu_put(vcpu);
3453 	return ret;
3454 }
3455 
3456 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3457 				    struct kvm_mp_state *mp_state)
3458 {
3459 	int rc = 0;
3460 
3461 	vcpu_load(vcpu);
3462 
3463 	/* user space knows about this interface - let it control the state */
3464 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3465 
3466 	switch (mp_state->mp_state) {
3467 	case KVM_MP_STATE_STOPPED:
3468 		kvm_s390_vcpu_stop(vcpu);
3469 		break;
3470 	case KVM_MP_STATE_OPERATING:
3471 		kvm_s390_vcpu_start(vcpu);
3472 		break;
3473 	case KVM_MP_STATE_LOAD:
3474 	case KVM_MP_STATE_CHECK_STOP:
3475 		/* fall through - CHECK_STOP and LOAD are not supported yet */
3476 	default:
3477 		rc = -ENXIO;
3478 	}
3479 
3480 	vcpu_put(vcpu);
3481 	return rc;
3482 }
3483 
3484 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3485 {
3486 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3487 }
3488 
3489 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3490 {
3491 retry:
3492 	kvm_s390_vcpu_request_handled(vcpu);
3493 	if (!kvm_request_pending(vcpu))
3494 		return 0;
3495 	/*
3496 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3497 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3498 	 * This ensures that the ipte instruction for this request has
3499 	 * already finished. We might race against a second unmapper that
3500 	 * wants to set the blocking bit. Lets just retry the request loop.
3501 	 */
3502 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3503 		int rc;
3504 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3505 					  kvm_s390_get_prefix(vcpu),
3506 					  PAGE_SIZE * 2, PROT_WRITE);
3507 		if (rc) {
3508 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3509 			return rc;
3510 		}
3511 		goto retry;
3512 	}
3513 
3514 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3515 		vcpu->arch.sie_block->ihcpu = 0xffff;
3516 		goto retry;
3517 	}
3518 
3519 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3520 		if (!ibs_enabled(vcpu)) {
3521 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3522 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3523 		}
3524 		goto retry;
3525 	}
3526 
3527 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3528 		if (ibs_enabled(vcpu)) {
3529 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3530 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3531 		}
3532 		goto retry;
3533 	}
3534 
3535 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3536 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3537 		goto retry;
3538 	}
3539 
3540 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3541 		/*
3542 		 * Disable CMM virtualization; we will emulate the ESSA
3543 		 * instruction manually, in order to provide additional
3544 		 * functionalities needed for live migration.
3545 		 */
3546 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3547 		goto retry;
3548 	}
3549 
3550 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3551 		/*
3552 		 * Re-enable CMM virtualization if CMMA is available and
3553 		 * CMM has been used.
3554 		 */
3555 		if ((vcpu->kvm->arch.use_cmma) &&
3556 		    (vcpu->kvm->mm->context.uses_cmm))
3557 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3558 		goto retry;
3559 	}
3560 
3561 	/* nothing to do, just clear the request */
3562 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3563 	/* we left the vsie handler, nothing to do, just clear the request */
3564 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3565 
3566 	return 0;
3567 }
3568 
3569 void kvm_s390_set_tod_clock(struct kvm *kvm,
3570 			    const struct kvm_s390_vm_tod_clock *gtod)
3571 {
3572 	struct kvm_vcpu *vcpu;
3573 	struct kvm_s390_tod_clock_ext htod;
3574 	int i;
3575 
3576 	mutex_lock(&kvm->lock);
3577 	preempt_disable();
3578 
3579 	get_tod_clock_ext((char *)&htod);
3580 
3581 	kvm->arch.epoch = gtod->tod - htod.tod;
3582 	kvm->arch.epdx = 0;
3583 	if (test_kvm_facility(kvm, 139)) {
3584 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3585 		if (kvm->arch.epoch > gtod->tod)
3586 			kvm->arch.epdx -= 1;
3587 	}
3588 
3589 	kvm_s390_vcpu_block_all(kvm);
3590 	kvm_for_each_vcpu(i, vcpu, kvm) {
3591 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3592 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3593 	}
3594 
3595 	kvm_s390_vcpu_unblock_all(kvm);
3596 	preempt_enable();
3597 	mutex_unlock(&kvm->lock);
3598 }
3599 
3600 /**
3601  * kvm_arch_fault_in_page - fault-in guest page if necessary
3602  * @vcpu: The corresponding virtual cpu
3603  * @gpa: Guest physical address
3604  * @writable: Whether the page should be writable or not
3605  *
3606  * Make sure that a guest page has been faulted-in on the host.
3607  *
3608  * Return: Zero on success, negative error code otherwise.
3609  */
3610 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3611 {
3612 	return gmap_fault(vcpu->arch.gmap, gpa,
3613 			  writable ? FAULT_FLAG_WRITE : 0);
3614 }
3615 
3616 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3617 				      unsigned long token)
3618 {
3619 	struct kvm_s390_interrupt inti;
3620 	struct kvm_s390_irq irq;
3621 
3622 	if (start_token) {
3623 		irq.u.ext.ext_params2 = token;
3624 		irq.type = KVM_S390_INT_PFAULT_INIT;
3625 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3626 	} else {
3627 		inti.type = KVM_S390_INT_PFAULT_DONE;
3628 		inti.parm64 = token;
3629 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3630 	}
3631 }
3632 
3633 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3634 				     struct kvm_async_pf *work)
3635 {
3636 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3637 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3638 }
3639 
3640 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3641 				 struct kvm_async_pf *work)
3642 {
3643 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3644 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3645 }
3646 
3647 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3648 			       struct kvm_async_pf *work)
3649 {
3650 	/* s390 will always inject the page directly */
3651 }
3652 
3653 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3654 {
3655 	/*
3656 	 * s390 will always inject the page directly,
3657 	 * but we still want check_async_completion to cleanup
3658 	 */
3659 	return true;
3660 }
3661 
3662 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3663 {
3664 	hva_t hva;
3665 	struct kvm_arch_async_pf arch;
3666 	int rc;
3667 
3668 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3669 		return 0;
3670 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3671 	    vcpu->arch.pfault_compare)
3672 		return 0;
3673 	if (psw_extint_disabled(vcpu))
3674 		return 0;
3675 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3676 		return 0;
3677 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3678 		return 0;
3679 	if (!vcpu->arch.gmap->pfault_enabled)
3680 		return 0;
3681 
3682 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3683 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3684 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3685 		return 0;
3686 
3687 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3688 	return rc;
3689 }
3690 
3691 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3692 {
3693 	int rc, cpuflags;
3694 
3695 	/*
3696 	 * On s390 notifications for arriving pages will be delivered directly
3697 	 * to the guest but the house keeping for completed pfaults is
3698 	 * handled outside the worker.
3699 	 */
3700 	kvm_check_async_pf_completion(vcpu);
3701 
3702 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3703 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3704 
3705 	if (need_resched())
3706 		schedule();
3707 
3708 	if (test_cpu_flag(CIF_MCCK_PENDING))
3709 		s390_handle_mcck();
3710 
3711 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3712 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3713 		if (rc)
3714 			return rc;
3715 	}
3716 
3717 	rc = kvm_s390_handle_requests(vcpu);
3718 	if (rc)
3719 		return rc;
3720 
3721 	if (guestdbg_enabled(vcpu)) {
3722 		kvm_s390_backup_guest_per_regs(vcpu);
3723 		kvm_s390_patch_guest_per_regs(vcpu);
3724 	}
3725 
3726 	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3727 
3728 	vcpu->arch.sie_block->icptcode = 0;
3729 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3730 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3731 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3732 
3733 	return 0;
3734 }
3735 
3736 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3737 {
3738 	struct kvm_s390_pgm_info pgm_info = {
3739 		.code = PGM_ADDRESSING,
3740 	};
3741 	u8 opcode, ilen;
3742 	int rc;
3743 
3744 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3745 	trace_kvm_s390_sie_fault(vcpu);
3746 
3747 	/*
3748 	 * We want to inject an addressing exception, which is defined as a
3749 	 * suppressing or terminating exception. However, since we came here
3750 	 * by a DAT access exception, the PSW still points to the faulting
3751 	 * instruction since DAT exceptions are nullifying. So we've got
3752 	 * to look up the current opcode to get the length of the instruction
3753 	 * to be able to forward the PSW.
3754 	 */
3755 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3756 	ilen = insn_length(opcode);
3757 	if (rc < 0) {
3758 		return rc;
3759 	} else if (rc) {
3760 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3761 		 * Forward by arbitrary ilc, injection will take care of
3762 		 * nullification if necessary.
3763 		 */
3764 		pgm_info = vcpu->arch.pgm;
3765 		ilen = 4;
3766 	}
3767 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3768 	kvm_s390_forward_psw(vcpu, ilen);
3769 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3770 }
3771 
3772 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3773 {
3774 	struct mcck_volatile_info *mcck_info;
3775 	struct sie_page *sie_page;
3776 
3777 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3778 		   vcpu->arch.sie_block->icptcode);
3779 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3780 
3781 	if (guestdbg_enabled(vcpu))
3782 		kvm_s390_restore_guest_per_regs(vcpu);
3783 
3784 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3785 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3786 
3787 	if (exit_reason == -EINTR) {
3788 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3789 		sie_page = container_of(vcpu->arch.sie_block,
3790 					struct sie_page, sie_block);
3791 		mcck_info = &sie_page->mcck_info;
3792 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3793 		return 0;
3794 	}
3795 
3796 	if (vcpu->arch.sie_block->icptcode > 0) {
3797 		int rc = kvm_handle_sie_intercept(vcpu);
3798 
3799 		if (rc != -EOPNOTSUPP)
3800 			return rc;
3801 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3802 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3803 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3804 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3805 		return -EREMOTE;
3806 	} else if (exit_reason != -EFAULT) {
3807 		vcpu->stat.exit_null++;
3808 		return 0;
3809 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3810 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3811 		vcpu->run->s390_ucontrol.trans_exc_code =
3812 						current->thread.gmap_addr;
3813 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3814 		return -EREMOTE;
3815 	} else if (current->thread.gmap_pfault) {
3816 		trace_kvm_s390_major_guest_pfault(vcpu);
3817 		current->thread.gmap_pfault = 0;
3818 		if (kvm_arch_setup_async_pf(vcpu))
3819 			return 0;
3820 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3821 	}
3822 	return vcpu_post_run_fault_in_sie(vcpu);
3823 }
3824 
3825 static int __vcpu_run(struct kvm_vcpu *vcpu)
3826 {
3827 	int rc, exit_reason;
3828 
3829 	/*
3830 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3831 	 * ning the guest), so that memslots (and other stuff) are protected
3832 	 */
3833 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3834 
3835 	do {
3836 		rc = vcpu_pre_run(vcpu);
3837 		if (rc)
3838 			break;
3839 
3840 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3841 		/*
3842 		 * As PF_VCPU will be used in fault handler, between
3843 		 * guest_enter and guest_exit should be no uaccess.
3844 		 */
3845 		local_irq_disable();
3846 		guest_enter_irqoff();
3847 		__disable_cpu_timer_accounting(vcpu);
3848 		local_irq_enable();
3849 		exit_reason = sie64a(vcpu->arch.sie_block,
3850 				     vcpu->run->s.regs.gprs);
3851 		local_irq_disable();
3852 		__enable_cpu_timer_accounting(vcpu);
3853 		guest_exit_irqoff();
3854 		local_irq_enable();
3855 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3856 
3857 		rc = vcpu_post_run(vcpu, exit_reason);
3858 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3859 
3860 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3861 	return rc;
3862 }
3863 
3864 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3865 {
3866 	struct runtime_instr_cb *riccb;
3867 	struct gs_cb *gscb;
3868 
3869 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3870 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3871 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3872 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3873 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3874 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3875 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3876 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3877 		/* some control register changes require a tlb flush */
3878 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3879 	}
3880 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3881 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3882 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3883 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3884 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3885 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3886 	}
3887 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3888 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3889 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3890 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3891 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3892 			kvm_clear_async_pf_completion_queue(vcpu);
3893 	}
3894 	/*
3895 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3896 	 * we should enable RI here instead of doing the lazy enablement.
3897 	 */
3898 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3899 	    test_kvm_facility(vcpu->kvm, 64) &&
3900 	    riccb->v &&
3901 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3902 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3903 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3904 	}
3905 	/*
3906 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3907 	 * we should enable GS here instead of doing the lazy enablement.
3908 	 */
3909 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3910 	    test_kvm_facility(vcpu->kvm, 133) &&
3911 	    gscb->gssm &&
3912 	    !vcpu->arch.gs_enabled) {
3913 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3914 		vcpu->arch.sie_block->ecb |= ECB_GS;
3915 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3916 		vcpu->arch.gs_enabled = 1;
3917 	}
3918 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3919 	    test_kvm_facility(vcpu->kvm, 82)) {
3920 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3921 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3922 	}
3923 	save_access_regs(vcpu->arch.host_acrs);
3924 	restore_access_regs(vcpu->run->s.regs.acrs);
3925 	/* save host (userspace) fprs/vrs */
3926 	save_fpu_regs();
3927 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3928 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3929 	if (MACHINE_HAS_VX)
3930 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3931 	else
3932 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3933 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3934 	if (test_fp_ctl(current->thread.fpu.fpc))
3935 		/* User space provided an invalid FPC, let's clear it */
3936 		current->thread.fpu.fpc = 0;
3937 	if (MACHINE_HAS_GS) {
3938 		preempt_disable();
3939 		__ctl_set_bit(2, 4);
3940 		if (current->thread.gs_cb) {
3941 			vcpu->arch.host_gscb = current->thread.gs_cb;
3942 			save_gs_cb(vcpu->arch.host_gscb);
3943 		}
3944 		if (vcpu->arch.gs_enabled) {
3945 			current->thread.gs_cb = (struct gs_cb *)
3946 						&vcpu->run->s.regs.gscb;
3947 			restore_gs_cb(current->thread.gs_cb);
3948 		}
3949 		preempt_enable();
3950 	}
3951 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
3952 
3953 	kvm_run->kvm_dirty_regs = 0;
3954 }
3955 
3956 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3957 {
3958 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3959 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3960 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3961 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3962 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3963 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3964 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3965 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3966 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3967 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3968 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3969 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3970 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3971 	save_access_regs(vcpu->run->s.regs.acrs);
3972 	restore_access_regs(vcpu->arch.host_acrs);
3973 	/* Save guest register state */
3974 	save_fpu_regs();
3975 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3976 	/* Restore will be done lazily at return */
3977 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3978 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3979 	if (MACHINE_HAS_GS) {
3980 		__ctl_set_bit(2, 4);
3981 		if (vcpu->arch.gs_enabled)
3982 			save_gs_cb(current->thread.gs_cb);
3983 		preempt_disable();
3984 		current->thread.gs_cb = vcpu->arch.host_gscb;
3985 		restore_gs_cb(vcpu->arch.host_gscb);
3986 		preempt_enable();
3987 		if (!vcpu->arch.host_gscb)
3988 			__ctl_clear_bit(2, 4);
3989 		vcpu->arch.host_gscb = NULL;
3990 	}
3991 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
3992 }
3993 
3994 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3995 {
3996 	int rc;
3997 
3998 	if (kvm_run->immediate_exit)
3999 		return -EINTR;
4000 
4001 	vcpu_load(vcpu);
4002 
4003 	if (guestdbg_exit_pending(vcpu)) {
4004 		kvm_s390_prepare_debug_exit(vcpu);
4005 		rc = 0;
4006 		goto out;
4007 	}
4008 
4009 	kvm_sigset_activate(vcpu);
4010 
4011 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4012 		kvm_s390_vcpu_start(vcpu);
4013 	} else if (is_vcpu_stopped(vcpu)) {
4014 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4015 				   vcpu->vcpu_id);
4016 		rc = -EINVAL;
4017 		goto out;
4018 	}
4019 
4020 	sync_regs(vcpu, kvm_run);
4021 	enable_cpu_timer_accounting(vcpu);
4022 
4023 	might_fault();
4024 	rc = __vcpu_run(vcpu);
4025 
4026 	if (signal_pending(current) && !rc) {
4027 		kvm_run->exit_reason = KVM_EXIT_INTR;
4028 		rc = -EINTR;
4029 	}
4030 
4031 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4032 		kvm_s390_prepare_debug_exit(vcpu);
4033 		rc = 0;
4034 	}
4035 
4036 	if (rc == -EREMOTE) {
4037 		/* userspace support is needed, kvm_run has been prepared */
4038 		rc = 0;
4039 	}
4040 
4041 	disable_cpu_timer_accounting(vcpu);
4042 	store_regs(vcpu, kvm_run);
4043 
4044 	kvm_sigset_deactivate(vcpu);
4045 
4046 	vcpu->stat.exit_userspace++;
4047 out:
4048 	vcpu_put(vcpu);
4049 	return rc;
4050 }
4051 
4052 /*
4053  * store status at address
4054  * we use have two special cases:
4055  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4056  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4057  */
4058 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4059 {
4060 	unsigned char archmode = 1;
4061 	freg_t fprs[NUM_FPRS];
4062 	unsigned int px;
4063 	u64 clkcomp, cputm;
4064 	int rc;
4065 
4066 	px = kvm_s390_get_prefix(vcpu);
4067 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4068 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4069 			return -EFAULT;
4070 		gpa = 0;
4071 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4072 		if (write_guest_real(vcpu, 163, &archmode, 1))
4073 			return -EFAULT;
4074 		gpa = px;
4075 	} else
4076 		gpa -= __LC_FPREGS_SAVE_AREA;
4077 
4078 	/* manually convert vector registers if necessary */
4079 	if (MACHINE_HAS_VX) {
4080 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4081 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4082 				     fprs, 128);
4083 	} else {
4084 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4085 				     vcpu->run->s.regs.fprs, 128);
4086 	}
4087 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4088 			      vcpu->run->s.regs.gprs, 128);
4089 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4090 			      &vcpu->arch.sie_block->gpsw, 16);
4091 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4092 			      &px, 4);
4093 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4094 			      &vcpu->run->s.regs.fpc, 4);
4095 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4096 			      &vcpu->arch.sie_block->todpr, 4);
4097 	cputm = kvm_s390_get_cpu_timer(vcpu);
4098 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4099 			      &cputm, 8);
4100 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4101 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4102 			      &clkcomp, 8);
4103 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4104 			      &vcpu->run->s.regs.acrs, 64);
4105 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4106 			      &vcpu->arch.sie_block->gcr, 128);
4107 	return rc ? -EFAULT : 0;
4108 }
4109 
4110 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4111 {
4112 	/*
4113 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4114 	 * switch in the run ioctl. Let's update our copies before we save
4115 	 * it into the save area
4116 	 */
4117 	save_fpu_regs();
4118 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4119 	save_access_regs(vcpu->run->s.regs.acrs);
4120 
4121 	return kvm_s390_store_status_unloaded(vcpu, addr);
4122 }
4123 
4124 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4125 {
4126 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4127 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4128 }
4129 
4130 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4131 {
4132 	unsigned int i;
4133 	struct kvm_vcpu *vcpu;
4134 
4135 	kvm_for_each_vcpu(i, vcpu, kvm) {
4136 		__disable_ibs_on_vcpu(vcpu);
4137 	}
4138 }
4139 
4140 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4141 {
4142 	if (!sclp.has_ibs)
4143 		return;
4144 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4145 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4146 }
4147 
4148 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4149 {
4150 	int i, online_vcpus, started_vcpus = 0;
4151 
4152 	if (!is_vcpu_stopped(vcpu))
4153 		return;
4154 
4155 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4156 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4157 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4158 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4159 
4160 	for (i = 0; i < online_vcpus; i++) {
4161 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4162 			started_vcpus++;
4163 	}
4164 
4165 	if (started_vcpus == 0) {
4166 		/* we're the only active VCPU -> speed it up */
4167 		__enable_ibs_on_vcpu(vcpu);
4168 	} else if (started_vcpus == 1) {
4169 		/*
4170 		 * As we are starting a second VCPU, we have to disable
4171 		 * the IBS facility on all VCPUs to remove potentially
4172 		 * oustanding ENABLE requests.
4173 		 */
4174 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4175 	}
4176 
4177 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4178 	/*
4179 	 * Another VCPU might have used IBS while we were offline.
4180 	 * Let's play safe and flush the VCPU at startup.
4181 	 */
4182 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4183 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4184 	return;
4185 }
4186 
4187 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4188 {
4189 	int i, online_vcpus, started_vcpus = 0;
4190 	struct kvm_vcpu *started_vcpu = NULL;
4191 
4192 	if (is_vcpu_stopped(vcpu))
4193 		return;
4194 
4195 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4196 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4197 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4198 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4199 
4200 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4201 	kvm_s390_clear_stop_irq(vcpu);
4202 
4203 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4204 	__disable_ibs_on_vcpu(vcpu);
4205 
4206 	for (i = 0; i < online_vcpus; i++) {
4207 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4208 			started_vcpus++;
4209 			started_vcpu = vcpu->kvm->vcpus[i];
4210 		}
4211 	}
4212 
4213 	if (started_vcpus == 1) {
4214 		/*
4215 		 * As we only have one VCPU left, we want to enable the
4216 		 * IBS facility for that VCPU to speed it up.
4217 		 */
4218 		__enable_ibs_on_vcpu(started_vcpu);
4219 	}
4220 
4221 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4222 	return;
4223 }
4224 
4225 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4226 				     struct kvm_enable_cap *cap)
4227 {
4228 	int r;
4229 
4230 	if (cap->flags)
4231 		return -EINVAL;
4232 
4233 	switch (cap->cap) {
4234 	case KVM_CAP_S390_CSS_SUPPORT:
4235 		if (!vcpu->kvm->arch.css_support) {
4236 			vcpu->kvm->arch.css_support = 1;
4237 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4238 			trace_kvm_s390_enable_css(vcpu->kvm);
4239 		}
4240 		r = 0;
4241 		break;
4242 	default:
4243 		r = -EINVAL;
4244 		break;
4245 	}
4246 	return r;
4247 }
4248 
4249 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4250 				  struct kvm_s390_mem_op *mop)
4251 {
4252 	void __user *uaddr = (void __user *)mop->buf;
4253 	void *tmpbuf = NULL;
4254 	int r, srcu_idx;
4255 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4256 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4257 
4258 	if (mop->flags & ~supported_flags)
4259 		return -EINVAL;
4260 
4261 	if (mop->size > MEM_OP_MAX_SIZE)
4262 		return -E2BIG;
4263 
4264 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4265 		tmpbuf = vmalloc(mop->size);
4266 		if (!tmpbuf)
4267 			return -ENOMEM;
4268 	}
4269 
4270 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4271 
4272 	switch (mop->op) {
4273 	case KVM_S390_MEMOP_LOGICAL_READ:
4274 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4275 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4276 					    mop->size, GACC_FETCH);
4277 			break;
4278 		}
4279 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4280 		if (r == 0) {
4281 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4282 				r = -EFAULT;
4283 		}
4284 		break;
4285 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4286 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4287 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4288 					    mop->size, GACC_STORE);
4289 			break;
4290 		}
4291 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4292 			r = -EFAULT;
4293 			break;
4294 		}
4295 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4296 		break;
4297 	default:
4298 		r = -EINVAL;
4299 	}
4300 
4301 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4302 
4303 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4304 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4305 
4306 	vfree(tmpbuf);
4307 	return r;
4308 }
4309 
4310 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4311 			       unsigned int ioctl, unsigned long arg)
4312 {
4313 	struct kvm_vcpu *vcpu = filp->private_data;
4314 	void __user *argp = (void __user *)arg;
4315 
4316 	switch (ioctl) {
4317 	case KVM_S390_IRQ: {
4318 		struct kvm_s390_irq s390irq;
4319 
4320 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4321 			return -EFAULT;
4322 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4323 	}
4324 	case KVM_S390_INTERRUPT: {
4325 		struct kvm_s390_interrupt s390int;
4326 		struct kvm_s390_irq s390irq;
4327 
4328 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4329 			return -EFAULT;
4330 		if (s390int_to_s390irq(&s390int, &s390irq))
4331 			return -EINVAL;
4332 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4333 	}
4334 	}
4335 	return -ENOIOCTLCMD;
4336 }
4337 
4338 long kvm_arch_vcpu_ioctl(struct file *filp,
4339 			 unsigned int ioctl, unsigned long arg)
4340 {
4341 	struct kvm_vcpu *vcpu = filp->private_data;
4342 	void __user *argp = (void __user *)arg;
4343 	int idx;
4344 	long r;
4345 
4346 	vcpu_load(vcpu);
4347 
4348 	switch (ioctl) {
4349 	case KVM_S390_STORE_STATUS:
4350 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4351 		r = kvm_s390_vcpu_store_status(vcpu, arg);
4352 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4353 		break;
4354 	case KVM_S390_SET_INITIAL_PSW: {
4355 		psw_t psw;
4356 
4357 		r = -EFAULT;
4358 		if (copy_from_user(&psw, argp, sizeof(psw)))
4359 			break;
4360 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4361 		break;
4362 	}
4363 	case KVM_S390_INITIAL_RESET:
4364 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4365 		break;
4366 	case KVM_SET_ONE_REG:
4367 	case KVM_GET_ONE_REG: {
4368 		struct kvm_one_reg reg;
4369 		r = -EFAULT;
4370 		if (copy_from_user(&reg, argp, sizeof(reg)))
4371 			break;
4372 		if (ioctl == KVM_SET_ONE_REG)
4373 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4374 		else
4375 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4376 		break;
4377 	}
4378 #ifdef CONFIG_KVM_S390_UCONTROL
4379 	case KVM_S390_UCAS_MAP: {
4380 		struct kvm_s390_ucas_mapping ucasmap;
4381 
4382 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4383 			r = -EFAULT;
4384 			break;
4385 		}
4386 
4387 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4388 			r = -EINVAL;
4389 			break;
4390 		}
4391 
4392 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4393 				     ucasmap.vcpu_addr, ucasmap.length);
4394 		break;
4395 	}
4396 	case KVM_S390_UCAS_UNMAP: {
4397 		struct kvm_s390_ucas_mapping ucasmap;
4398 
4399 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4400 			r = -EFAULT;
4401 			break;
4402 		}
4403 
4404 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4405 			r = -EINVAL;
4406 			break;
4407 		}
4408 
4409 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4410 			ucasmap.length);
4411 		break;
4412 	}
4413 #endif
4414 	case KVM_S390_VCPU_FAULT: {
4415 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4416 		break;
4417 	}
4418 	case KVM_ENABLE_CAP:
4419 	{
4420 		struct kvm_enable_cap cap;
4421 		r = -EFAULT;
4422 		if (copy_from_user(&cap, argp, sizeof(cap)))
4423 			break;
4424 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4425 		break;
4426 	}
4427 	case KVM_S390_MEM_OP: {
4428 		struct kvm_s390_mem_op mem_op;
4429 
4430 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4431 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4432 		else
4433 			r = -EFAULT;
4434 		break;
4435 	}
4436 	case KVM_S390_SET_IRQ_STATE: {
4437 		struct kvm_s390_irq_state irq_state;
4438 
4439 		r = -EFAULT;
4440 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4441 			break;
4442 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4443 		    irq_state.len == 0 ||
4444 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4445 			r = -EINVAL;
4446 			break;
4447 		}
4448 		/* do not use irq_state.flags, it will break old QEMUs */
4449 		r = kvm_s390_set_irq_state(vcpu,
4450 					   (void __user *) irq_state.buf,
4451 					   irq_state.len);
4452 		break;
4453 	}
4454 	case KVM_S390_GET_IRQ_STATE: {
4455 		struct kvm_s390_irq_state irq_state;
4456 
4457 		r = -EFAULT;
4458 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4459 			break;
4460 		if (irq_state.len == 0) {
4461 			r = -EINVAL;
4462 			break;
4463 		}
4464 		/* do not use irq_state.flags, it will break old QEMUs */
4465 		r = kvm_s390_get_irq_state(vcpu,
4466 					   (__u8 __user *)  irq_state.buf,
4467 					   irq_state.len);
4468 		break;
4469 	}
4470 	default:
4471 		r = -ENOTTY;
4472 	}
4473 
4474 	vcpu_put(vcpu);
4475 	return r;
4476 }
4477 
4478 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4479 {
4480 #ifdef CONFIG_KVM_S390_UCONTROL
4481 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4482 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4483 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4484 		get_page(vmf->page);
4485 		return 0;
4486 	}
4487 #endif
4488 	return VM_FAULT_SIGBUS;
4489 }
4490 
4491 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4492 			    unsigned long npages)
4493 {
4494 	return 0;
4495 }
4496 
4497 /* Section: memory related */
4498 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4499 				   struct kvm_memory_slot *memslot,
4500 				   const struct kvm_userspace_memory_region *mem,
4501 				   enum kvm_mr_change change)
4502 {
4503 	/* A few sanity checks. We can have memory slots which have to be
4504 	   located/ended at a segment boundary (1MB). The memory in userland is
4505 	   ok to be fragmented into various different vmas. It is okay to mmap()
4506 	   and munmap() stuff in this slot after doing this call at any time */
4507 
4508 	if (mem->userspace_addr & 0xffffful)
4509 		return -EINVAL;
4510 
4511 	if (mem->memory_size & 0xffffful)
4512 		return -EINVAL;
4513 
4514 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4515 		return -EINVAL;
4516 
4517 	return 0;
4518 }
4519 
4520 void kvm_arch_commit_memory_region(struct kvm *kvm,
4521 				const struct kvm_userspace_memory_region *mem,
4522 				const struct kvm_memory_slot *old,
4523 				const struct kvm_memory_slot *new,
4524 				enum kvm_mr_change change)
4525 {
4526 	int rc = 0;
4527 
4528 	switch (change) {
4529 	case KVM_MR_DELETE:
4530 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4531 					old->npages * PAGE_SIZE);
4532 		break;
4533 	case KVM_MR_MOVE:
4534 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4535 					old->npages * PAGE_SIZE);
4536 		if (rc)
4537 			break;
4538 		/* FALLTHROUGH */
4539 	case KVM_MR_CREATE:
4540 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4541 				      mem->guest_phys_addr, mem->memory_size);
4542 		break;
4543 	case KVM_MR_FLAGS_ONLY:
4544 		break;
4545 	default:
4546 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
4547 	}
4548 	if (rc)
4549 		pr_warn("failed to commit memory region\n");
4550 	return;
4551 }
4552 
4553 static inline unsigned long nonhyp_mask(int i)
4554 {
4555 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4556 
4557 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4558 }
4559 
4560 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4561 {
4562 	vcpu->valid_wakeup = false;
4563 }
4564 
4565 static int __init kvm_s390_init(void)
4566 {
4567 	int i;
4568 
4569 	if (!sclp.has_sief2) {
4570 		pr_info("SIE is not available\n");
4571 		return -ENODEV;
4572 	}
4573 
4574 	if (nested && hpage) {
4575 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4576 		return -EINVAL;
4577 	}
4578 
4579 	for (i = 0; i < 16; i++)
4580 		kvm_s390_fac_base[i] |=
4581 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4582 
4583 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4584 }
4585 
4586 static void __exit kvm_s390_exit(void)
4587 {
4588 	kvm_exit();
4589 }
4590 
4591 module_init(kvm_s390_init);
4592 module_exit(kvm_s390_exit);
4593 
4594 /*
4595  * Enable autoloading of the kvm module.
4596  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4597  * since x86 takes a different approach.
4598  */
4599 #include <linux/miscdevice.h>
4600 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4601 MODULE_ALIAS("devname:kvm");
4602