xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 151f4e2b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include "kvm-s390.h"
48 #include "gaccess.h"
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53 
54 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
58 
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61 
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
64 	{ "exit_null", VCPU_STAT(exit_null) },
65 	{ "exit_validity", VCPU_STAT(exit_validity) },
66 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
67 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
68 	{ "exit_io_request", VCPU_STAT(exit_io_request) },
69 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
71 	{ "exit_pei", VCPU_STAT(exit_pei) },
72 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 	{ "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
79 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
85 	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
86 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
87 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
88 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
89 	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
90 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
91 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
92 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
93 	{ "deliver_program", VCPU_STAT(deliver_program) },
94 	{ "deliver_io", VCPU_STAT(deliver_io) },
95 	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
96 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
97 	{ "inject_ckc", VCPU_STAT(inject_ckc) },
98 	{ "inject_cputm", VCPU_STAT(inject_cputm) },
99 	{ "inject_external_call", VCPU_STAT(inject_external_call) },
100 	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
101 	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
102 	{ "inject_io", VM_STAT(inject_io) },
103 	{ "inject_mchk", VCPU_STAT(inject_mchk) },
104 	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
105 	{ "inject_program", VCPU_STAT(inject_program) },
106 	{ "inject_restart", VCPU_STAT(inject_restart) },
107 	{ "inject_service_signal", VM_STAT(inject_service_signal) },
108 	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
109 	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
110 	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
111 	{ "inject_virtio", VM_STAT(inject_virtio) },
112 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
113 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
114 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
115 	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
116 	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
117 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
118 	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
119 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
120 	{ "instruction_sck", VCPU_STAT(instruction_sck) },
121 	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
122 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
123 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
124 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
125 	{ "instruction_iske", VCPU_STAT(instruction_iske) },
126 	{ "instruction_ri", VCPU_STAT(instruction_ri) },
127 	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
128 	{ "instruction_sske", VCPU_STAT(instruction_sske) },
129 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
130 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
131 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
132 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
133 	{ "instruction_tb", VCPU_STAT(instruction_tb) },
134 	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
135 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
136 	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
137 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
138 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
139 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
140 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
141 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
142 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
143 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
144 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
145 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
146 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
147 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
148 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
149 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
150 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
151 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
152 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
153 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
154 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
155 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
156 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
157 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
158 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
159 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
160 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
161 	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
162 	{ NULL }
163 };
164 
165 struct kvm_s390_tod_clock_ext {
166 	__u8 epoch_idx;
167 	__u64 tod;
168 	__u8 reserved[7];
169 } __packed;
170 
171 /* allow nested virtualization in KVM (if enabled by user space) */
172 static int nested;
173 module_param(nested, int, S_IRUGO);
174 MODULE_PARM_DESC(nested, "Nested virtualization support");
175 
176 /* allow 1m huge page guest backing, if !nested */
177 static int hpage;
178 module_param(hpage, int, 0444);
179 MODULE_PARM_DESC(hpage, "1m huge page backing support");
180 
181 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
182 static u8 halt_poll_max_steal = 10;
183 module_param(halt_poll_max_steal, byte, 0644);
184 MODULE_PARM_DESC(hpage, "Maximum percentage of steal time to allow polling");
185 
186 /*
187  * For now we handle at most 16 double words as this is what the s390 base
188  * kernel handles and stores in the prefix page. If we ever need to go beyond
189  * this, this requires changes to code, but the external uapi can stay.
190  */
191 #define SIZE_INTERNAL 16
192 
193 /*
194  * Base feature mask that defines default mask for facilities. Consists of the
195  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
196  */
197 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
198 /*
199  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
200  * and defines the facilities that can be enabled via a cpu model.
201  */
202 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
203 
204 static unsigned long kvm_s390_fac_size(void)
205 {
206 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
207 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
208 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
209 		sizeof(S390_lowcore.stfle_fac_list));
210 
211 	return SIZE_INTERNAL;
212 }
213 
214 /* available cpu features supported by kvm */
215 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
216 /* available subfunctions indicated via query / "test bit" */
217 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
218 
219 static struct gmap_notifier gmap_notifier;
220 static struct gmap_notifier vsie_gmap_notifier;
221 debug_info_t *kvm_s390_dbf;
222 
223 /* Section: not file related */
224 int kvm_arch_hardware_enable(void)
225 {
226 	/* every s390 is virtualization enabled ;-) */
227 	return 0;
228 }
229 
230 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
231 			      unsigned long end);
232 
233 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
234 {
235 	u8 delta_idx = 0;
236 
237 	/*
238 	 * The TOD jumps by delta, we have to compensate this by adding
239 	 * -delta to the epoch.
240 	 */
241 	delta = -delta;
242 
243 	/* sign-extension - we're adding to signed values below */
244 	if ((s64)delta < 0)
245 		delta_idx = -1;
246 
247 	scb->epoch += delta;
248 	if (scb->ecd & ECD_MEF) {
249 		scb->epdx += delta_idx;
250 		if (scb->epoch < delta)
251 			scb->epdx += 1;
252 	}
253 }
254 
255 /*
256  * This callback is executed during stop_machine(). All CPUs are therefore
257  * temporarily stopped. In order not to change guest behavior, we have to
258  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
259  * so a CPU won't be stopped while calculating with the epoch.
260  */
261 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
262 			  void *v)
263 {
264 	struct kvm *kvm;
265 	struct kvm_vcpu *vcpu;
266 	int i;
267 	unsigned long long *delta = v;
268 
269 	list_for_each_entry(kvm, &vm_list, vm_list) {
270 		kvm_for_each_vcpu(i, vcpu, kvm) {
271 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
272 			if (i == 0) {
273 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
274 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
275 			}
276 			if (vcpu->arch.cputm_enabled)
277 				vcpu->arch.cputm_start += *delta;
278 			if (vcpu->arch.vsie_block)
279 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
280 						   *delta);
281 		}
282 	}
283 	return NOTIFY_OK;
284 }
285 
286 static struct notifier_block kvm_clock_notifier = {
287 	.notifier_call = kvm_clock_sync,
288 };
289 
290 int kvm_arch_hardware_setup(void)
291 {
292 	gmap_notifier.notifier_call = kvm_gmap_notifier;
293 	gmap_register_pte_notifier(&gmap_notifier);
294 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
295 	gmap_register_pte_notifier(&vsie_gmap_notifier);
296 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
297 				       &kvm_clock_notifier);
298 	return 0;
299 }
300 
301 void kvm_arch_hardware_unsetup(void)
302 {
303 	gmap_unregister_pte_notifier(&gmap_notifier);
304 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
305 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
306 					 &kvm_clock_notifier);
307 }
308 
309 static void allow_cpu_feat(unsigned long nr)
310 {
311 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
312 }
313 
314 static inline int plo_test_bit(unsigned char nr)
315 {
316 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
317 	int cc;
318 
319 	asm volatile(
320 		/* Parameter registers are ignored for "test bit" */
321 		"	plo	0,0,0,0(0)\n"
322 		"	ipm	%0\n"
323 		"	srl	%0,28\n"
324 		: "=d" (cc)
325 		: "d" (r0)
326 		: "cc");
327 	return cc == 0;
328 }
329 
330 static inline void __insn32_query(unsigned int opcode, u8 query[32])
331 {
332 	register unsigned long r0 asm("0") = 0;	/* query function */
333 	register unsigned long r1 asm("1") = (unsigned long) query;
334 
335 	asm volatile(
336 		/* Parameter regs are ignored */
337 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
338 		: "=m" (*query)
339 		: "d" (r0), "a" (r1), [opc] "i" (opcode)
340 		: "cc");
341 }
342 
343 #define INSN_SORTL 0xb938
344 #define INSN_DFLTCC 0xb939
345 
346 static void kvm_s390_cpu_feat_init(void)
347 {
348 	int i;
349 
350 	for (i = 0; i < 256; ++i) {
351 		if (plo_test_bit(i))
352 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
353 	}
354 
355 	if (test_facility(28)) /* TOD-clock steering */
356 		ptff(kvm_s390_available_subfunc.ptff,
357 		     sizeof(kvm_s390_available_subfunc.ptff),
358 		     PTFF_QAF);
359 
360 	if (test_facility(17)) { /* MSA */
361 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
362 			      kvm_s390_available_subfunc.kmac);
363 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
364 			      kvm_s390_available_subfunc.kmc);
365 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
366 			      kvm_s390_available_subfunc.km);
367 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
368 			      kvm_s390_available_subfunc.kimd);
369 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
370 			      kvm_s390_available_subfunc.klmd);
371 	}
372 	if (test_facility(76)) /* MSA3 */
373 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
374 			      kvm_s390_available_subfunc.pckmo);
375 	if (test_facility(77)) { /* MSA4 */
376 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
377 			      kvm_s390_available_subfunc.kmctr);
378 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
379 			      kvm_s390_available_subfunc.kmf);
380 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
381 			      kvm_s390_available_subfunc.kmo);
382 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
383 			      kvm_s390_available_subfunc.pcc);
384 	}
385 	if (test_facility(57)) /* MSA5 */
386 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
387 			      kvm_s390_available_subfunc.ppno);
388 
389 	if (test_facility(146)) /* MSA8 */
390 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
391 			      kvm_s390_available_subfunc.kma);
392 
393 	if (test_facility(155)) /* MSA9 */
394 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
395 			      kvm_s390_available_subfunc.kdsa);
396 
397 	if (test_facility(150)) /* SORTL */
398 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
399 
400 	if (test_facility(151)) /* DFLTCC */
401 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
402 
403 	if (MACHINE_HAS_ESOP)
404 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
405 	/*
406 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
407 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
408 	 */
409 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
410 	    !test_facility(3) || !nested)
411 		return;
412 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
413 	if (sclp.has_64bscao)
414 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
415 	if (sclp.has_siif)
416 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
417 	if (sclp.has_gpere)
418 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
419 	if (sclp.has_gsls)
420 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
421 	if (sclp.has_ib)
422 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
423 	if (sclp.has_cei)
424 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
425 	if (sclp.has_ibs)
426 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
427 	if (sclp.has_kss)
428 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
429 	/*
430 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
431 	 * all skey handling functions read/set the skey from the PGSTE
432 	 * instead of the real storage key.
433 	 *
434 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
435 	 * pages being detected as preserved although they are resident.
436 	 *
437 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
438 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
439 	 *
440 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
441 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
442 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
443 	 *
444 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
445 	 * cannot easily shadow the SCA because of the ipte lock.
446 	 */
447 }
448 
449 int kvm_arch_init(void *opaque)
450 {
451 	int rc;
452 
453 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
454 	if (!kvm_s390_dbf)
455 		return -ENOMEM;
456 
457 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
458 		rc = -ENOMEM;
459 		goto out_debug_unreg;
460 	}
461 
462 	kvm_s390_cpu_feat_init();
463 
464 	/* Register floating interrupt controller interface. */
465 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
466 	if (rc) {
467 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
468 		goto out_debug_unreg;
469 	}
470 
471 	rc = kvm_s390_gib_init(GAL_ISC);
472 	if (rc)
473 		goto out_gib_destroy;
474 
475 	return 0;
476 
477 out_gib_destroy:
478 	kvm_s390_gib_destroy();
479 out_debug_unreg:
480 	debug_unregister(kvm_s390_dbf);
481 	return rc;
482 }
483 
484 void kvm_arch_exit(void)
485 {
486 	kvm_s390_gib_destroy();
487 	debug_unregister(kvm_s390_dbf);
488 }
489 
490 /* Section: device related */
491 long kvm_arch_dev_ioctl(struct file *filp,
492 			unsigned int ioctl, unsigned long arg)
493 {
494 	if (ioctl == KVM_S390_ENABLE_SIE)
495 		return s390_enable_sie();
496 	return -EINVAL;
497 }
498 
499 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
500 {
501 	int r;
502 
503 	switch (ext) {
504 	case KVM_CAP_S390_PSW:
505 	case KVM_CAP_S390_GMAP:
506 	case KVM_CAP_SYNC_MMU:
507 #ifdef CONFIG_KVM_S390_UCONTROL
508 	case KVM_CAP_S390_UCONTROL:
509 #endif
510 	case KVM_CAP_ASYNC_PF:
511 	case KVM_CAP_SYNC_REGS:
512 	case KVM_CAP_ONE_REG:
513 	case KVM_CAP_ENABLE_CAP:
514 	case KVM_CAP_S390_CSS_SUPPORT:
515 	case KVM_CAP_IOEVENTFD:
516 	case KVM_CAP_DEVICE_CTRL:
517 	case KVM_CAP_S390_IRQCHIP:
518 	case KVM_CAP_VM_ATTRIBUTES:
519 	case KVM_CAP_MP_STATE:
520 	case KVM_CAP_IMMEDIATE_EXIT:
521 	case KVM_CAP_S390_INJECT_IRQ:
522 	case KVM_CAP_S390_USER_SIGP:
523 	case KVM_CAP_S390_USER_STSI:
524 	case KVM_CAP_S390_SKEYS:
525 	case KVM_CAP_S390_IRQ_STATE:
526 	case KVM_CAP_S390_USER_INSTR0:
527 	case KVM_CAP_S390_CMMA_MIGRATION:
528 	case KVM_CAP_S390_AIS:
529 	case KVM_CAP_S390_AIS_MIGRATION:
530 		r = 1;
531 		break;
532 	case KVM_CAP_S390_HPAGE_1M:
533 		r = 0;
534 		if (hpage && !kvm_is_ucontrol(kvm))
535 			r = 1;
536 		break;
537 	case KVM_CAP_S390_MEM_OP:
538 		r = MEM_OP_MAX_SIZE;
539 		break;
540 	case KVM_CAP_NR_VCPUS:
541 	case KVM_CAP_MAX_VCPUS:
542 		r = KVM_S390_BSCA_CPU_SLOTS;
543 		if (!kvm_s390_use_sca_entries())
544 			r = KVM_MAX_VCPUS;
545 		else if (sclp.has_esca && sclp.has_64bscao)
546 			r = KVM_S390_ESCA_CPU_SLOTS;
547 		break;
548 	case KVM_CAP_S390_COW:
549 		r = MACHINE_HAS_ESOP;
550 		break;
551 	case KVM_CAP_S390_VECTOR_REGISTERS:
552 		r = MACHINE_HAS_VX;
553 		break;
554 	case KVM_CAP_S390_RI:
555 		r = test_facility(64);
556 		break;
557 	case KVM_CAP_S390_GS:
558 		r = test_facility(133);
559 		break;
560 	case KVM_CAP_S390_BPB:
561 		r = test_facility(82);
562 		break;
563 	default:
564 		r = 0;
565 	}
566 	return r;
567 }
568 
569 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
570 				    struct kvm_memory_slot *memslot)
571 {
572 	int i;
573 	gfn_t cur_gfn, last_gfn;
574 	unsigned long gaddr, vmaddr;
575 	struct gmap *gmap = kvm->arch.gmap;
576 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
577 
578 	/* Loop over all guest segments */
579 	cur_gfn = memslot->base_gfn;
580 	last_gfn = memslot->base_gfn + memslot->npages;
581 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
582 		gaddr = gfn_to_gpa(cur_gfn);
583 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
584 		if (kvm_is_error_hva(vmaddr))
585 			continue;
586 
587 		bitmap_zero(bitmap, _PAGE_ENTRIES);
588 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
589 		for (i = 0; i < _PAGE_ENTRIES; i++) {
590 			if (test_bit(i, bitmap))
591 				mark_page_dirty(kvm, cur_gfn + i);
592 		}
593 
594 		if (fatal_signal_pending(current))
595 			return;
596 		cond_resched();
597 	}
598 }
599 
600 /* Section: vm related */
601 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
602 
603 /*
604  * Get (and clear) the dirty memory log for a memory slot.
605  */
606 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
607 			       struct kvm_dirty_log *log)
608 {
609 	int r;
610 	unsigned long n;
611 	struct kvm_memslots *slots;
612 	struct kvm_memory_slot *memslot;
613 	int is_dirty = 0;
614 
615 	if (kvm_is_ucontrol(kvm))
616 		return -EINVAL;
617 
618 	mutex_lock(&kvm->slots_lock);
619 
620 	r = -EINVAL;
621 	if (log->slot >= KVM_USER_MEM_SLOTS)
622 		goto out;
623 
624 	slots = kvm_memslots(kvm);
625 	memslot = id_to_memslot(slots, log->slot);
626 	r = -ENOENT;
627 	if (!memslot->dirty_bitmap)
628 		goto out;
629 
630 	kvm_s390_sync_dirty_log(kvm, memslot);
631 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
632 	if (r)
633 		goto out;
634 
635 	/* Clear the dirty log */
636 	if (is_dirty) {
637 		n = kvm_dirty_bitmap_bytes(memslot);
638 		memset(memslot->dirty_bitmap, 0, n);
639 	}
640 	r = 0;
641 out:
642 	mutex_unlock(&kvm->slots_lock);
643 	return r;
644 }
645 
646 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
647 {
648 	unsigned int i;
649 	struct kvm_vcpu *vcpu;
650 
651 	kvm_for_each_vcpu(i, vcpu, kvm) {
652 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
653 	}
654 }
655 
656 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
657 {
658 	int r;
659 
660 	if (cap->flags)
661 		return -EINVAL;
662 
663 	switch (cap->cap) {
664 	case KVM_CAP_S390_IRQCHIP:
665 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
666 		kvm->arch.use_irqchip = 1;
667 		r = 0;
668 		break;
669 	case KVM_CAP_S390_USER_SIGP:
670 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
671 		kvm->arch.user_sigp = 1;
672 		r = 0;
673 		break;
674 	case KVM_CAP_S390_VECTOR_REGISTERS:
675 		mutex_lock(&kvm->lock);
676 		if (kvm->created_vcpus) {
677 			r = -EBUSY;
678 		} else if (MACHINE_HAS_VX) {
679 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
680 			set_kvm_facility(kvm->arch.model.fac_list, 129);
681 			if (test_facility(134)) {
682 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
683 				set_kvm_facility(kvm->arch.model.fac_list, 134);
684 			}
685 			if (test_facility(135)) {
686 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
687 				set_kvm_facility(kvm->arch.model.fac_list, 135);
688 			}
689 			if (test_facility(148)) {
690 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
691 				set_kvm_facility(kvm->arch.model.fac_list, 148);
692 			}
693 			if (test_facility(152)) {
694 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
695 				set_kvm_facility(kvm->arch.model.fac_list, 152);
696 			}
697 			r = 0;
698 		} else
699 			r = -EINVAL;
700 		mutex_unlock(&kvm->lock);
701 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
702 			 r ? "(not available)" : "(success)");
703 		break;
704 	case KVM_CAP_S390_RI:
705 		r = -EINVAL;
706 		mutex_lock(&kvm->lock);
707 		if (kvm->created_vcpus) {
708 			r = -EBUSY;
709 		} else if (test_facility(64)) {
710 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
711 			set_kvm_facility(kvm->arch.model.fac_list, 64);
712 			r = 0;
713 		}
714 		mutex_unlock(&kvm->lock);
715 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
716 			 r ? "(not available)" : "(success)");
717 		break;
718 	case KVM_CAP_S390_AIS:
719 		mutex_lock(&kvm->lock);
720 		if (kvm->created_vcpus) {
721 			r = -EBUSY;
722 		} else {
723 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
724 			set_kvm_facility(kvm->arch.model.fac_list, 72);
725 			r = 0;
726 		}
727 		mutex_unlock(&kvm->lock);
728 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
729 			 r ? "(not available)" : "(success)");
730 		break;
731 	case KVM_CAP_S390_GS:
732 		r = -EINVAL;
733 		mutex_lock(&kvm->lock);
734 		if (kvm->created_vcpus) {
735 			r = -EBUSY;
736 		} else if (test_facility(133)) {
737 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
738 			set_kvm_facility(kvm->arch.model.fac_list, 133);
739 			r = 0;
740 		}
741 		mutex_unlock(&kvm->lock);
742 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
743 			 r ? "(not available)" : "(success)");
744 		break;
745 	case KVM_CAP_S390_HPAGE_1M:
746 		mutex_lock(&kvm->lock);
747 		if (kvm->created_vcpus)
748 			r = -EBUSY;
749 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
750 			r = -EINVAL;
751 		else {
752 			r = 0;
753 			down_write(&kvm->mm->mmap_sem);
754 			kvm->mm->context.allow_gmap_hpage_1m = 1;
755 			up_write(&kvm->mm->mmap_sem);
756 			/*
757 			 * We might have to create fake 4k page
758 			 * tables. To avoid that the hardware works on
759 			 * stale PGSTEs, we emulate these instructions.
760 			 */
761 			kvm->arch.use_skf = 0;
762 			kvm->arch.use_pfmfi = 0;
763 		}
764 		mutex_unlock(&kvm->lock);
765 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
766 			 r ? "(not available)" : "(success)");
767 		break;
768 	case KVM_CAP_S390_USER_STSI:
769 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
770 		kvm->arch.user_stsi = 1;
771 		r = 0;
772 		break;
773 	case KVM_CAP_S390_USER_INSTR0:
774 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
775 		kvm->arch.user_instr0 = 1;
776 		icpt_operexc_on_all_vcpus(kvm);
777 		r = 0;
778 		break;
779 	default:
780 		r = -EINVAL;
781 		break;
782 	}
783 	return r;
784 }
785 
786 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
787 {
788 	int ret;
789 
790 	switch (attr->attr) {
791 	case KVM_S390_VM_MEM_LIMIT_SIZE:
792 		ret = 0;
793 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
794 			 kvm->arch.mem_limit);
795 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
796 			ret = -EFAULT;
797 		break;
798 	default:
799 		ret = -ENXIO;
800 		break;
801 	}
802 	return ret;
803 }
804 
805 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
806 {
807 	int ret;
808 	unsigned int idx;
809 	switch (attr->attr) {
810 	case KVM_S390_VM_MEM_ENABLE_CMMA:
811 		ret = -ENXIO;
812 		if (!sclp.has_cmma)
813 			break;
814 
815 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
816 		mutex_lock(&kvm->lock);
817 		if (kvm->created_vcpus)
818 			ret = -EBUSY;
819 		else if (kvm->mm->context.allow_gmap_hpage_1m)
820 			ret = -EINVAL;
821 		else {
822 			kvm->arch.use_cmma = 1;
823 			/* Not compatible with cmma. */
824 			kvm->arch.use_pfmfi = 0;
825 			ret = 0;
826 		}
827 		mutex_unlock(&kvm->lock);
828 		break;
829 	case KVM_S390_VM_MEM_CLR_CMMA:
830 		ret = -ENXIO;
831 		if (!sclp.has_cmma)
832 			break;
833 		ret = -EINVAL;
834 		if (!kvm->arch.use_cmma)
835 			break;
836 
837 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
838 		mutex_lock(&kvm->lock);
839 		idx = srcu_read_lock(&kvm->srcu);
840 		s390_reset_cmma(kvm->arch.gmap->mm);
841 		srcu_read_unlock(&kvm->srcu, idx);
842 		mutex_unlock(&kvm->lock);
843 		ret = 0;
844 		break;
845 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
846 		unsigned long new_limit;
847 
848 		if (kvm_is_ucontrol(kvm))
849 			return -EINVAL;
850 
851 		if (get_user(new_limit, (u64 __user *)attr->addr))
852 			return -EFAULT;
853 
854 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
855 		    new_limit > kvm->arch.mem_limit)
856 			return -E2BIG;
857 
858 		if (!new_limit)
859 			return -EINVAL;
860 
861 		/* gmap_create takes last usable address */
862 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
863 			new_limit -= 1;
864 
865 		ret = -EBUSY;
866 		mutex_lock(&kvm->lock);
867 		if (!kvm->created_vcpus) {
868 			/* gmap_create will round the limit up */
869 			struct gmap *new = gmap_create(current->mm, new_limit);
870 
871 			if (!new) {
872 				ret = -ENOMEM;
873 			} else {
874 				gmap_remove(kvm->arch.gmap);
875 				new->private = kvm;
876 				kvm->arch.gmap = new;
877 				ret = 0;
878 			}
879 		}
880 		mutex_unlock(&kvm->lock);
881 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
882 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
883 			 (void *) kvm->arch.gmap->asce);
884 		break;
885 	}
886 	default:
887 		ret = -ENXIO;
888 		break;
889 	}
890 	return ret;
891 }
892 
893 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
894 
895 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
896 {
897 	struct kvm_vcpu *vcpu;
898 	int i;
899 
900 	kvm_s390_vcpu_block_all(kvm);
901 
902 	kvm_for_each_vcpu(i, vcpu, kvm) {
903 		kvm_s390_vcpu_crypto_setup(vcpu);
904 		/* recreate the shadow crycb by leaving the VSIE handler */
905 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
906 	}
907 
908 	kvm_s390_vcpu_unblock_all(kvm);
909 }
910 
911 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
912 {
913 	mutex_lock(&kvm->lock);
914 	switch (attr->attr) {
915 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
916 		if (!test_kvm_facility(kvm, 76)) {
917 			mutex_unlock(&kvm->lock);
918 			return -EINVAL;
919 		}
920 		get_random_bytes(
921 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
922 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
923 		kvm->arch.crypto.aes_kw = 1;
924 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
925 		break;
926 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
927 		if (!test_kvm_facility(kvm, 76)) {
928 			mutex_unlock(&kvm->lock);
929 			return -EINVAL;
930 		}
931 		get_random_bytes(
932 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
933 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
934 		kvm->arch.crypto.dea_kw = 1;
935 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
936 		break;
937 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
938 		if (!test_kvm_facility(kvm, 76)) {
939 			mutex_unlock(&kvm->lock);
940 			return -EINVAL;
941 		}
942 		kvm->arch.crypto.aes_kw = 0;
943 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
944 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
945 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
946 		break;
947 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
948 		if (!test_kvm_facility(kvm, 76)) {
949 			mutex_unlock(&kvm->lock);
950 			return -EINVAL;
951 		}
952 		kvm->arch.crypto.dea_kw = 0;
953 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
954 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
955 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
956 		break;
957 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
958 		if (!ap_instructions_available()) {
959 			mutex_unlock(&kvm->lock);
960 			return -EOPNOTSUPP;
961 		}
962 		kvm->arch.crypto.apie = 1;
963 		break;
964 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
965 		if (!ap_instructions_available()) {
966 			mutex_unlock(&kvm->lock);
967 			return -EOPNOTSUPP;
968 		}
969 		kvm->arch.crypto.apie = 0;
970 		break;
971 	default:
972 		mutex_unlock(&kvm->lock);
973 		return -ENXIO;
974 	}
975 
976 	kvm_s390_vcpu_crypto_reset_all(kvm);
977 	mutex_unlock(&kvm->lock);
978 	return 0;
979 }
980 
981 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
982 {
983 	int cx;
984 	struct kvm_vcpu *vcpu;
985 
986 	kvm_for_each_vcpu(cx, vcpu, kvm)
987 		kvm_s390_sync_request(req, vcpu);
988 }
989 
990 /*
991  * Must be called with kvm->srcu held to avoid races on memslots, and with
992  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
993  */
994 static int kvm_s390_vm_start_migration(struct kvm *kvm)
995 {
996 	struct kvm_memory_slot *ms;
997 	struct kvm_memslots *slots;
998 	unsigned long ram_pages = 0;
999 	int slotnr;
1000 
1001 	/* migration mode already enabled */
1002 	if (kvm->arch.migration_mode)
1003 		return 0;
1004 	slots = kvm_memslots(kvm);
1005 	if (!slots || !slots->used_slots)
1006 		return -EINVAL;
1007 
1008 	if (!kvm->arch.use_cmma) {
1009 		kvm->arch.migration_mode = 1;
1010 		return 0;
1011 	}
1012 	/* mark all the pages in active slots as dirty */
1013 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1014 		ms = slots->memslots + slotnr;
1015 		/*
1016 		 * The second half of the bitmap is only used on x86,
1017 		 * and would be wasted otherwise, so we put it to good
1018 		 * use here to keep track of the state of the storage
1019 		 * attributes.
1020 		 */
1021 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1022 		ram_pages += ms->npages;
1023 	}
1024 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1025 	kvm->arch.migration_mode = 1;
1026 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1027 	return 0;
1028 }
1029 
1030 /*
1031  * Must be called with kvm->slots_lock to avoid races with ourselves and
1032  * kvm_s390_vm_start_migration.
1033  */
1034 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1035 {
1036 	/* migration mode already disabled */
1037 	if (!kvm->arch.migration_mode)
1038 		return 0;
1039 	kvm->arch.migration_mode = 0;
1040 	if (kvm->arch.use_cmma)
1041 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1042 	return 0;
1043 }
1044 
1045 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1046 				     struct kvm_device_attr *attr)
1047 {
1048 	int res = -ENXIO;
1049 
1050 	mutex_lock(&kvm->slots_lock);
1051 	switch (attr->attr) {
1052 	case KVM_S390_VM_MIGRATION_START:
1053 		res = kvm_s390_vm_start_migration(kvm);
1054 		break;
1055 	case KVM_S390_VM_MIGRATION_STOP:
1056 		res = kvm_s390_vm_stop_migration(kvm);
1057 		break;
1058 	default:
1059 		break;
1060 	}
1061 	mutex_unlock(&kvm->slots_lock);
1062 
1063 	return res;
1064 }
1065 
1066 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1067 				     struct kvm_device_attr *attr)
1068 {
1069 	u64 mig = kvm->arch.migration_mode;
1070 
1071 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1072 		return -ENXIO;
1073 
1074 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1075 		return -EFAULT;
1076 	return 0;
1077 }
1078 
1079 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1080 {
1081 	struct kvm_s390_vm_tod_clock gtod;
1082 
1083 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1084 		return -EFAULT;
1085 
1086 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1087 		return -EINVAL;
1088 	kvm_s390_set_tod_clock(kvm, &gtod);
1089 
1090 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1091 		gtod.epoch_idx, gtod.tod);
1092 
1093 	return 0;
1094 }
1095 
1096 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1097 {
1098 	u8 gtod_high;
1099 
1100 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1101 					   sizeof(gtod_high)))
1102 		return -EFAULT;
1103 
1104 	if (gtod_high != 0)
1105 		return -EINVAL;
1106 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1107 
1108 	return 0;
1109 }
1110 
1111 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1112 {
1113 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1114 
1115 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1116 			   sizeof(gtod.tod)))
1117 		return -EFAULT;
1118 
1119 	kvm_s390_set_tod_clock(kvm, &gtod);
1120 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1121 	return 0;
1122 }
1123 
1124 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1125 {
1126 	int ret;
1127 
1128 	if (attr->flags)
1129 		return -EINVAL;
1130 
1131 	switch (attr->attr) {
1132 	case KVM_S390_VM_TOD_EXT:
1133 		ret = kvm_s390_set_tod_ext(kvm, attr);
1134 		break;
1135 	case KVM_S390_VM_TOD_HIGH:
1136 		ret = kvm_s390_set_tod_high(kvm, attr);
1137 		break;
1138 	case KVM_S390_VM_TOD_LOW:
1139 		ret = kvm_s390_set_tod_low(kvm, attr);
1140 		break;
1141 	default:
1142 		ret = -ENXIO;
1143 		break;
1144 	}
1145 	return ret;
1146 }
1147 
1148 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1149 				   struct kvm_s390_vm_tod_clock *gtod)
1150 {
1151 	struct kvm_s390_tod_clock_ext htod;
1152 
1153 	preempt_disable();
1154 
1155 	get_tod_clock_ext((char *)&htod);
1156 
1157 	gtod->tod = htod.tod + kvm->arch.epoch;
1158 	gtod->epoch_idx = 0;
1159 	if (test_kvm_facility(kvm, 139)) {
1160 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1161 		if (gtod->tod < htod.tod)
1162 			gtod->epoch_idx += 1;
1163 	}
1164 
1165 	preempt_enable();
1166 }
1167 
1168 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1169 {
1170 	struct kvm_s390_vm_tod_clock gtod;
1171 
1172 	memset(&gtod, 0, sizeof(gtod));
1173 	kvm_s390_get_tod_clock(kvm, &gtod);
1174 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1175 		return -EFAULT;
1176 
1177 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1178 		gtod.epoch_idx, gtod.tod);
1179 	return 0;
1180 }
1181 
1182 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1183 {
1184 	u8 gtod_high = 0;
1185 
1186 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1187 					 sizeof(gtod_high)))
1188 		return -EFAULT;
1189 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1190 
1191 	return 0;
1192 }
1193 
1194 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1195 {
1196 	u64 gtod;
1197 
1198 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1199 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1200 		return -EFAULT;
1201 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1202 
1203 	return 0;
1204 }
1205 
1206 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1207 {
1208 	int ret;
1209 
1210 	if (attr->flags)
1211 		return -EINVAL;
1212 
1213 	switch (attr->attr) {
1214 	case KVM_S390_VM_TOD_EXT:
1215 		ret = kvm_s390_get_tod_ext(kvm, attr);
1216 		break;
1217 	case KVM_S390_VM_TOD_HIGH:
1218 		ret = kvm_s390_get_tod_high(kvm, attr);
1219 		break;
1220 	case KVM_S390_VM_TOD_LOW:
1221 		ret = kvm_s390_get_tod_low(kvm, attr);
1222 		break;
1223 	default:
1224 		ret = -ENXIO;
1225 		break;
1226 	}
1227 	return ret;
1228 }
1229 
1230 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1231 {
1232 	struct kvm_s390_vm_cpu_processor *proc;
1233 	u16 lowest_ibc, unblocked_ibc;
1234 	int ret = 0;
1235 
1236 	mutex_lock(&kvm->lock);
1237 	if (kvm->created_vcpus) {
1238 		ret = -EBUSY;
1239 		goto out;
1240 	}
1241 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1242 	if (!proc) {
1243 		ret = -ENOMEM;
1244 		goto out;
1245 	}
1246 	if (!copy_from_user(proc, (void __user *)attr->addr,
1247 			    sizeof(*proc))) {
1248 		kvm->arch.model.cpuid = proc->cpuid;
1249 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1250 		unblocked_ibc = sclp.ibc & 0xfff;
1251 		if (lowest_ibc && proc->ibc) {
1252 			if (proc->ibc > unblocked_ibc)
1253 				kvm->arch.model.ibc = unblocked_ibc;
1254 			else if (proc->ibc < lowest_ibc)
1255 				kvm->arch.model.ibc = lowest_ibc;
1256 			else
1257 				kvm->arch.model.ibc = proc->ibc;
1258 		}
1259 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1260 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1261 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1262 			 kvm->arch.model.ibc,
1263 			 kvm->arch.model.cpuid);
1264 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1265 			 kvm->arch.model.fac_list[0],
1266 			 kvm->arch.model.fac_list[1],
1267 			 kvm->arch.model.fac_list[2]);
1268 	} else
1269 		ret = -EFAULT;
1270 	kfree(proc);
1271 out:
1272 	mutex_unlock(&kvm->lock);
1273 	return ret;
1274 }
1275 
1276 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1277 				       struct kvm_device_attr *attr)
1278 {
1279 	struct kvm_s390_vm_cpu_feat data;
1280 
1281 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1282 		return -EFAULT;
1283 	if (!bitmap_subset((unsigned long *) data.feat,
1284 			   kvm_s390_available_cpu_feat,
1285 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1286 		return -EINVAL;
1287 
1288 	mutex_lock(&kvm->lock);
1289 	if (kvm->created_vcpus) {
1290 		mutex_unlock(&kvm->lock);
1291 		return -EBUSY;
1292 	}
1293 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1294 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1295 	mutex_unlock(&kvm->lock);
1296 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1297 			 data.feat[0],
1298 			 data.feat[1],
1299 			 data.feat[2]);
1300 	return 0;
1301 }
1302 
1303 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1304 					  struct kvm_device_attr *attr)
1305 {
1306 	mutex_lock(&kvm->lock);
1307 	if (kvm->created_vcpus) {
1308 		mutex_unlock(&kvm->lock);
1309 		return -EBUSY;
1310 	}
1311 
1312 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1313 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1314 		mutex_unlock(&kvm->lock);
1315 		return -EFAULT;
1316 	}
1317 	mutex_unlock(&kvm->lock);
1318 
1319 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1320 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1321 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1322 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1323 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1324 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1325 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1326 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1327 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1328 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1329 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1330 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1331 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1332 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1333 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1334 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1335 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1336 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1337 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1338 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1339 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1340 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1341 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1342 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1343 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1344 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1345 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1346 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1347 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1348 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1349 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1350 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1351 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1352 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1353 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1354 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1355 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1356 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1357 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1358 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1359 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1360 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1361 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1363 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1366 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1369 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1371 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1372 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1375 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1376 
1377 	return 0;
1378 }
1379 
1380 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1381 {
1382 	int ret = -ENXIO;
1383 
1384 	switch (attr->attr) {
1385 	case KVM_S390_VM_CPU_PROCESSOR:
1386 		ret = kvm_s390_set_processor(kvm, attr);
1387 		break;
1388 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1389 		ret = kvm_s390_set_processor_feat(kvm, attr);
1390 		break;
1391 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1392 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1393 		break;
1394 	}
1395 	return ret;
1396 }
1397 
1398 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1399 {
1400 	struct kvm_s390_vm_cpu_processor *proc;
1401 	int ret = 0;
1402 
1403 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1404 	if (!proc) {
1405 		ret = -ENOMEM;
1406 		goto out;
1407 	}
1408 	proc->cpuid = kvm->arch.model.cpuid;
1409 	proc->ibc = kvm->arch.model.ibc;
1410 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1411 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1412 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1413 		 kvm->arch.model.ibc,
1414 		 kvm->arch.model.cpuid);
1415 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1416 		 kvm->arch.model.fac_list[0],
1417 		 kvm->arch.model.fac_list[1],
1418 		 kvm->arch.model.fac_list[2]);
1419 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1420 		ret = -EFAULT;
1421 	kfree(proc);
1422 out:
1423 	return ret;
1424 }
1425 
1426 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1427 {
1428 	struct kvm_s390_vm_cpu_machine *mach;
1429 	int ret = 0;
1430 
1431 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1432 	if (!mach) {
1433 		ret = -ENOMEM;
1434 		goto out;
1435 	}
1436 	get_cpu_id((struct cpuid *) &mach->cpuid);
1437 	mach->ibc = sclp.ibc;
1438 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1439 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1440 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1441 	       sizeof(S390_lowcore.stfle_fac_list));
1442 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1443 		 kvm->arch.model.ibc,
1444 		 kvm->arch.model.cpuid);
1445 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1446 		 mach->fac_mask[0],
1447 		 mach->fac_mask[1],
1448 		 mach->fac_mask[2]);
1449 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1450 		 mach->fac_list[0],
1451 		 mach->fac_list[1],
1452 		 mach->fac_list[2]);
1453 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1454 		ret = -EFAULT;
1455 	kfree(mach);
1456 out:
1457 	return ret;
1458 }
1459 
1460 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1461 				       struct kvm_device_attr *attr)
1462 {
1463 	struct kvm_s390_vm_cpu_feat data;
1464 
1465 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1466 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1467 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1468 		return -EFAULT;
1469 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1470 			 data.feat[0],
1471 			 data.feat[1],
1472 			 data.feat[2]);
1473 	return 0;
1474 }
1475 
1476 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1477 				     struct kvm_device_attr *attr)
1478 {
1479 	struct kvm_s390_vm_cpu_feat data;
1480 
1481 	bitmap_copy((unsigned long *) data.feat,
1482 		    kvm_s390_available_cpu_feat,
1483 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1484 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1485 		return -EFAULT;
1486 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1487 			 data.feat[0],
1488 			 data.feat[1],
1489 			 data.feat[2]);
1490 	return 0;
1491 }
1492 
1493 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1494 					  struct kvm_device_attr *attr)
1495 {
1496 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1497 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1498 		return -EFAULT;
1499 
1500 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1501 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1502 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1503 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1504 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1505 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1506 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1507 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1508 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1509 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1510 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1511 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1512 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1513 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1514 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1515 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1516 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1517 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1518 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1519 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1520 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1521 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1522 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1523 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1524 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1525 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1526 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1527 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1528 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1529 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1530 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1531 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1532 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1533 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1534 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1535 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1536 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1537 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1538 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1539 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1540 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1541 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1542 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1544 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1547 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1550 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1552 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1553 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1556 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1557 
1558 	return 0;
1559 }
1560 
1561 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1562 					struct kvm_device_attr *attr)
1563 {
1564 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1565 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1566 		return -EFAULT;
1567 
1568 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1569 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1570 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1571 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1572 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1573 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1574 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1575 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1576 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1577 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1578 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1579 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1580 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1581 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1582 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1583 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1584 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1585 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1586 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1587 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1588 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1589 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1590 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1591 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1592 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1593 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1594 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1595 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1596 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1597 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1598 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1599 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1600 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1601 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1602 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1603 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1604 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1605 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1606 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1607 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1608 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1609 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1610 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1612 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1615 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1618 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1620 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1621 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1624 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1625 
1626 	return 0;
1627 }
1628 
1629 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1630 {
1631 	int ret = -ENXIO;
1632 
1633 	switch (attr->attr) {
1634 	case KVM_S390_VM_CPU_PROCESSOR:
1635 		ret = kvm_s390_get_processor(kvm, attr);
1636 		break;
1637 	case KVM_S390_VM_CPU_MACHINE:
1638 		ret = kvm_s390_get_machine(kvm, attr);
1639 		break;
1640 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1641 		ret = kvm_s390_get_processor_feat(kvm, attr);
1642 		break;
1643 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1644 		ret = kvm_s390_get_machine_feat(kvm, attr);
1645 		break;
1646 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1647 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1648 		break;
1649 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1650 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1651 		break;
1652 	}
1653 	return ret;
1654 }
1655 
1656 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1657 {
1658 	int ret;
1659 
1660 	switch (attr->group) {
1661 	case KVM_S390_VM_MEM_CTRL:
1662 		ret = kvm_s390_set_mem_control(kvm, attr);
1663 		break;
1664 	case KVM_S390_VM_TOD:
1665 		ret = kvm_s390_set_tod(kvm, attr);
1666 		break;
1667 	case KVM_S390_VM_CPU_MODEL:
1668 		ret = kvm_s390_set_cpu_model(kvm, attr);
1669 		break;
1670 	case KVM_S390_VM_CRYPTO:
1671 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1672 		break;
1673 	case KVM_S390_VM_MIGRATION:
1674 		ret = kvm_s390_vm_set_migration(kvm, attr);
1675 		break;
1676 	default:
1677 		ret = -ENXIO;
1678 		break;
1679 	}
1680 
1681 	return ret;
1682 }
1683 
1684 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1685 {
1686 	int ret;
1687 
1688 	switch (attr->group) {
1689 	case KVM_S390_VM_MEM_CTRL:
1690 		ret = kvm_s390_get_mem_control(kvm, attr);
1691 		break;
1692 	case KVM_S390_VM_TOD:
1693 		ret = kvm_s390_get_tod(kvm, attr);
1694 		break;
1695 	case KVM_S390_VM_CPU_MODEL:
1696 		ret = kvm_s390_get_cpu_model(kvm, attr);
1697 		break;
1698 	case KVM_S390_VM_MIGRATION:
1699 		ret = kvm_s390_vm_get_migration(kvm, attr);
1700 		break;
1701 	default:
1702 		ret = -ENXIO;
1703 		break;
1704 	}
1705 
1706 	return ret;
1707 }
1708 
1709 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1710 {
1711 	int ret;
1712 
1713 	switch (attr->group) {
1714 	case KVM_S390_VM_MEM_CTRL:
1715 		switch (attr->attr) {
1716 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1717 		case KVM_S390_VM_MEM_CLR_CMMA:
1718 			ret = sclp.has_cmma ? 0 : -ENXIO;
1719 			break;
1720 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1721 			ret = 0;
1722 			break;
1723 		default:
1724 			ret = -ENXIO;
1725 			break;
1726 		}
1727 		break;
1728 	case KVM_S390_VM_TOD:
1729 		switch (attr->attr) {
1730 		case KVM_S390_VM_TOD_LOW:
1731 		case KVM_S390_VM_TOD_HIGH:
1732 			ret = 0;
1733 			break;
1734 		default:
1735 			ret = -ENXIO;
1736 			break;
1737 		}
1738 		break;
1739 	case KVM_S390_VM_CPU_MODEL:
1740 		switch (attr->attr) {
1741 		case KVM_S390_VM_CPU_PROCESSOR:
1742 		case KVM_S390_VM_CPU_MACHINE:
1743 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1744 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1745 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1746 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1747 			ret = 0;
1748 			break;
1749 		default:
1750 			ret = -ENXIO;
1751 			break;
1752 		}
1753 		break;
1754 	case KVM_S390_VM_CRYPTO:
1755 		switch (attr->attr) {
1756 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1757 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1758 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1759 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1760 			ret = 0;
1761 			break;
1762 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1763 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1764 			ret = ap_instructions_available() ? 0 : -ENXIO;
1765 			break;
1766 		default:
1767 			ret = -ENXIO;
1768 			break;
1769 		}
1770 		break;
1771 	case KVM_S390_VM_MIGRATION:
1772 		ret = 0;
1773 		break;
1774 	default:
1775 		ret = -ENXIO;
1776 		break;
1777 	}
1778 
1779 	return ret;
1780 }
1781 
1782 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1783 {
1784 	uint8_t *keys;
1785 	uint64_t hva;
1786 	int srcu_idx, i, r = 0;
1787 
1788 	if (args->flags != 0)
1789 		return -EINVAL;
1790 
1791 	/* Is this guest using storage keys? */
1792 	if (!mm_uses_skeys(current->mm))
1793 		return KVM_S390_GET_SKEYS_NONE;
1794 
1795 	/* Enforce sane limit on memory allocation */
1796 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1797 		return -EINVAL;
1798 
1799 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1800 	if (!keys)
1801 		return -ENOMEM;
1802 
1803 	down_read(&current->mm->mmap_sem);
1804 	srcu_idx = srcu_read_lock(&kvm->srcu);
1805 	for (i = 0; i < args->count; i++) {
1806 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1807 		if (kvm_is_error_hva(hva)) {
1808 			r = -EFAULT;
1809 			break;
1810 		}
1811 
1812 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1813 		if (r)
1814 			break;
1815 	}
1816 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1817 	up_read(&current->mm->mmap_sem);
1818 
1819 	if (!r) {
1820 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1821 				 sizeof(uint8_t) * args->count);
1822 		if (r)
1823 			r = -EFAULT;
1824 	}
1825 
1826 	kvfree(keys);
1827 	return r;
1828 }
1829 
1830 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1831 {
1832 	uint8_t *keys;
1833 	uint64_t hva;
1834 	int srcu_idx, i, r = 0;
1835 	bool unlocked;
1836 
1837 	if (args->flags != 0)
1838 		return -EINVAL;
1839 
1840 	/* Enforce sane limit on memory allocation */
1841 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1842 		return -EINVAL;
1843 
1844 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1845 	if (!keys)
1846 		return -ENOMEM;
1847 
1848 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1849 			   sizeof(uint8_t) * args->count);
1850 	if (r) {
1851 		r = -EFAULT;
1852 		goto out;
1853 	}
1854 
1855 	/* Enable storage key handling for the guest */
1856 	r = s390_enable_skey();
1857 	if (r)
1858 		goto out;
1859 
1860 	i = 0;
1861 	down_read(&current->mm->mmap_sem);
1862 	srcu_idx = srcu_read_lock(&kvm->srcu);
1863         while (i < args->count) {
1864 		unlocked = false;
1865 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1866 		if (kvm_is_error_hva(hva)) {
1867 			r = -EFAULT;
1868 			break;
1869 		}
1870 
1871 		/* Lowest order bit is reserved */
1872 		if (keys[i] & 0x01) {
1873 			r = -EINVAL;
1874 			break;
1875 		}
1876 
1877 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1878 		if (r) {
1879 			r = fixup_user_fault(current, current->mm, hva,
1880 					     FAULT_FLAG_WRITE, &unlocked);
1881 			if (r)
1882 				break;
1883 		}
1884 		if (!r)
1885 			i++;
1886 	}
1887 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1888 	up_read(&current->mm->mmap_sem);
1889 out:
1890 	kvfree(keys);
1891 	return r;
1892 }
1893 
1894 /*
1895  * Base address and length must be sent at the start of each block, therefore
1896  * it's cheaper to send some clean data, as long as it's less than the size of
1897  * two longs.
1898  */
1899 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1900 /* for consistency */
1901 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1902 
1903 /*
1904  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1905  * address falls in a hole. In that case the index of one of the memslots
1906  * bordering the hole is returned.
1907  */
1908 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1909 {
1910 	int start = 0, end = slots->used_slots;
1911 	int slot = atomic_read(&slots->lru_slot);
1912 	struct kvm_memory_slot *memslots = slots->memslots;
1913 
1914 	if (gfn >= memslots[slot].base_gfn &&
1915 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1916 		return slot;
1917 
1918 	while (start < end) {
1919 		slot = start + (end - start) / 2;
1920 
1921 		if (gfn >= memslots[slot].base_gfn)
1922 			end = slot;
1923 		else
1924 			start = slot + 1;
1925 	}
1926 
1927 	if (gfn >= memslots[start].base_gfn &&
1928 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1929 		atomic_set(&slots->lru_slot, start);
1930 	}
1931 
1932 	return start;
1933 }
1934 
1935 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1936 			      u8 *res, unsigned long bufsize)
1937 {
1938 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1939 
1940 	args->count = 0;
1941 	while (args->count < bufsize) {
1942 		hva = gfn_to_hva(kvm, cur_gfn);
1943 		/*
1944 		 * We return an error if the first value was invalid, but we
1945 		 * return successfully if at least one value was copied.
1946 		 */
1947 		if (kvm_is_error_hva(hva))
1948 			return args->count ? 0 : -EFAULT;
1949 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1950 			pgstev = 0;
1951 		res[args->count++] = (pgstev >> 24) & 0x43;
1952 		cur_gfn++;
1953 	}
1954 
1955 	return 0;
1956 }
1957 
1958 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1959 					      unsigned long cur_gfn)
1960 {
1961 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1962 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1963 	unsigned long ofs = cur_gfn - ms->base_gfn;
1964 
1965 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1966 		slotidx--;
1967 		/* If we are above the highest slot, wrap around */
1968 		if (slotidx < 0)
1969 			slotidx = slots->used_slots - 1;
1970 
1971 		ms = slots->memslots + slotidx;
1972 		ofs = 0;
1973 	}
1974 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1975 	while ((slotidx > 0) && (ofs >= ms->npages)) {
1976 		slotidx--;
1977 		ms = slots->memslots + slotidx;
1978 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1979 	}
1980 	return ms->base_gfn + ofs;
1981 }
1982 
1983 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1984 			     u8 *res, unsigned long bufsize)
1985 {
1986 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1987 	struct kvm_memslots *slots = kvm_memslots(kvm);
1988 	struct kvm_memory_slot *ms;
1989 
1990 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1991 	ms = gfn_to_memslot(kvm, cur_gfn);
1992 	args->count = 0;
1993 	args->start_gfn = cur_gfn;
1994 	if (!ms)
1995 		return 0;
1996 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1997 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1998 
1999 	while (args->count < bufsize) {
2000 		hva = gfn_to_hva(kvm, cur_gfn);
2001 		if (kvm_is_error_hva(hva))
2002 			return 0;
2003 		/* Decrement only if we actually flipped the bit to 0 */
2004 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2005 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2006 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2007 			pgstev = 0;
2008 		/* Save the value */
2009 		res[args->count++] = (pgstev >> 24) & 0x43;
2010 		/* If the next bit is too far away, stop. */
2011 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2012 			return 0;
2013 		/* If we reached the previous "next", find the next one */
2014 		if (cur_gfn == next_gfn)
2015 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2016 		/* Reached the end of memory or of the buffer, stop */
2017 		if ((next_gfn >= mem_end) ||
2018 		    (next_gfn - args->start_gfn >= bufsize))
2019 			return 0;
2020 		cur_gfn++;
2021 		/* Reached the end of the current memslot, take the next one. */
2022 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2023 			ms = gfn_to_memslot(kvm, cur_gfn);
2024 			if (!ms)
2025 				return 0;
2026 		}
2027 	}
2028 	return 0;
2029 }
2030 
2031 /*
2032  * This function searches for the next page with dirty CMMA attributes, and
2033  * saves the attributes in the buffer up to either the end of the buffer or
2034  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2035  * no trailing clean bytes are saved.
2036  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2037  * output buffer will indicate 0 as length.
2038  */
2039 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2040 				  struct kvm_s390_cmma_log *args)
2041 {
2042 	unsigned long bufsize;
2043 	int srcu_idx, peek, ret;
2044 	u8 *values;
2045 
2046 	if (!kvm->arch.use_cmma)
2047 		return -ENXIO;
2048 	/* Invalid/unsupported flags were specified */
2049 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2050 		return -EINVAL;
2051 	/* Migration mode query, and we are not doing a migration */
2052 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2053 	if (!peek && !kvm->arch.migration_mode)
2054 		return -EINVAL;
2055 	/* CMMA is disabled or was not used, or the buffer has length zero */
2056 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2057 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2058 		memset(args, 0, sizeof(*args));
2059 		return 0;
2060 	}
2061 	/* We are not peeking, and there are no dirty pages */
2062 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2063 		memset(args, 0, sizeof(*args));
2064 		return 0;
2065 	}
2066 
2067 	values = vmalloc(bufsize);
2068 	if (!values)
2069 		return -ENOMEM;
2070 
2071 	down_read(&kvm->mm->mmap_sem);
2072 	srcu_idx = srcu_read_lock(&kvm->srcu);
2073 	if (peek)
2074 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2075 	else
2076 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2077 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2078 	up_read(&kvm->mm->mmap_sem);
2079 
2080 	if (kvm->arch.migration_mode)
2081 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2082 	else
2083 		args->remaining = 0;
2084 
2085 	if (copy_to_user((void __user *)args->values, values, args->count))
2086 		ret = -EFAULT;
2087 
2088 	vfree(values);
2089 	return ret;
2090 }
2091 
2092 /*
2093  * This function sets the CMMA attributes for the given pages. If the input
2094  * buffer has zero length, no action is taken, otherwise the attributes are
2095  * set and the mm->context.uses_cmm flag is set.
2096  */
2097 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2098 				  const struct kvm_s390_cmma_log *args)
2099 {
2100 	unsigned long hva, mask, pgstev, i;
2101 	uint8_t *bits;
2102 	int srcu_idx, r = 0;
2103 
2104 	mask = args->mask;
2105 
2106 	if (!kvm->arch.use_cmma)
2107 		return -ENXIO;
2108 	/* invalid/unsupported flags */
2109 	if (args->flags != 0)
2110 		return -EINVAL;
2111 	/* Enforce sane limit on memory allocation */
2112 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2113 		return -EINVAL;
2114 	/* Nothing to do */
2115 	if (args->count == 0)
2116 		return 0;
2117 
2118 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2119 	if (!bits)
2120 		return -ENOMEM;
2121 
2122 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2123 	if (r) {
2124 		r = -EFAULT;
2125 		goto out;
2126 	}
2127 
2128 	down_read(&kvm->mm->mmap_sem);
2129 	srcu_idx = srcu_read_lock(&kvm->srcu);
2130 	for (i = 0; i < args->count; i++) {
2131 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2132 		if (kvm_is_error_hva(hva)) {
2133 			r = -EFAULT;
2134 			break;
2135 		}
2136 
2137 		pgstev = bits[i];
2138 		pgstev = pgstev << 24;
2139 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2140 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2141 	}
2142 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2143 	up_read(&kvm->mm->mmap_sem);
2144 
2145 	if (!kvm->mm->context.uses_cmm) {
2146 		down_write(&kvm->mm->mmap_sem);
2147 		kvm->mm->context.uses_cmm = 1;
2148 		up_write(&kvm->mm->mmap_sem);
2149 	}
2150 out:
2151 	vfree(bits);
2152 	return r;
2153 }
2154 
2155 long kvm_arch_vm_ioctl(struct file *filp,
2156 		       unsigned int ioctl, unsigned long arg)
2157 {
2158 	struct kvm *kvm = filp->private_data;
2159 	void __user *argp = (void __user *)arg;
2160 	struct kvm_device_attr attr;
2161 	int r;
2162 
2163 	switch (ioctl) {
2164 	case KVM_S390_INTERRUPT: {
2165 		struct kvm_s390_interrupt s390int;
2166 
2167 		r = -EFAULT;
2168 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2169 			break;
2170 		r = kvm_s390_inject_vm(kvm, &s390int);
2171 		break;
2172 	}
2173 	case KVM_CREATE_IRQCHIP: {
2174 		struct kvm_irq_routing_entry routing;
2175 
2176 		r = -EINVAL;
2177 		if (kvm->arch.use_irqchip) {
2178 			/* Set up dummy routing. */
2179 			memset(&routing, 0, sizeof(routing));
2180 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2181 		}
2182 		break;
2183 	}
2184 	case KVM_SET_DEVICE_ATTR: {
2185 		r = -EFAULT;
2186 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2187 			break;
2188 		r = kvm_s390_vm_set_attr(kvm, &attr);
2189 		break;
2190 	}
2191 	case KVM_GET_DEVICE_ATTR: {
2192 		r = -EFAULT;
2193 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2194 			break;
2195 		r = kvm_s390_vm_get_attr(kvm, &attr);
2196 		break;
2197 	}
2198 	case KVM_HAS_DEVICE_ATTR: {
2199 		r = -EFAULT;
2200 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2201 			break;
2202 		r = kvm_s390_vm_has_attr(kvm, &attr);
2203 		break;
2204 	}
2205 	case KVM_S390_GET_SKEYS: {
2206 		struct kvm_s390_skeys args;
2207 
2208 		r = -EFAULT;
2209 		if (copy_from_user(&args, argp,
2210 				   sizeof(struct kvm_s390_skeys)))
2211 			break;
2212 		r = kvm_s390_get_skeys(kvm, &args);
2213 		break;
2214 	}
2215 	case KVM_S390_SET_SKEYS: {
2216 		struct kvm_s390_skeys args;
2217 
2218 		r = -EFAULT;
2219 		if (copy_from_user(&args, argp,
2220 				   sizeof(struct kvm_s390_skeys)))
2221 			break;
2222 		r = kvm_s390_set_skeys(kvm, &args);
2223 		break;
2224 	}
2225 	case KVM_S390_GET_CMMA_BITS: {
2226 		struct kvm_s390_cmma_log args;
2227 
2228 		r = -EFAULT;
2229 		if (copy_from_user(&args, argp, sizeof(args)))
2230 			break;
2231 		mutex_lock(&kvm->slots_lock);
2232 		r = kvm_s390_get_cmma_bits(kvm, &args);
2233 		mutex_unlock(&kvm->slots_lock);
2234 		if (!r) {
2235 			r = copy_to_user(argp, &args, sizeof(args));
2236 			if (r)
2237 				r = -EFAULT;
2238 		}
2239 		break;
2240 	}
2241 	case KVM_S390_SET_CMMA_BITS: {
2242 		struct kvm_s390_cmma_log args;
2243 
2244 		r = -EFAULT;
2245 		if (copy_from_user(&args, argp, sizeof(args)))
2246 			break;
2247 		mutex_lock(&kvm->slots_lock);
2248 		r = kvm_s390_set_cmma_bits(kvm, &args);
2249 		mutex_unlock(&kvm->slots_lock);
2250 		break;
2251 	}
2252 	default:
2253 		r = -ENOTTY;
2254 	}
2255 
2256 	return r;
2257 }
2258 
2259 static int kvm_s390_apxa_installed(void)
2260 {
2261 	struct ap_config_info info;
2262 
2263 	if (ap_instructions_available()) {
2264 		if (ap_qci(&info) == 0)
2265 			return info.apxa;
2266 	}
2267 
2268 	return 0;
2269 }
2270 
2271 /*
2272  * The format of the crypto control block (CRYCB) is specified in the 3 low
2273  * order bits of the CRYCB designation (CRYCBD) field as follows:
2274  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2275  *	     AP extended addressing (APXA) facility are installed.
2276  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2277  * Format 2: Both the APXA and MSAX3 facilities are installed
2278  */
2279 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2280 {
2281 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2282 
2283 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2284 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2285 
2286 	/* Check whether MSAX3 is installed */
2287 	if (!test_kvm_facility(kvm, 76))
2288 		return;
2289 
2290 	if (kvm_s390_apxa_installed())
2291 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2292 	else
2293 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2294 }
2295 
2296 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2297 			       unsigned long *aqm, unsigned long *adm)
2298 {
2299 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2300 
2301 	mutex_lock(&kvm->lock);
2302 	kvm_s390_vcpu_block_all(kvm);
2303 
2304 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2305 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2306 		memcpy(crycb->apcb1.apm, apm, 32);
2307 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2308 			 apm[0], apm[1], apm[2], apm[3]);
2309 		memcpy(crycb->apcb1.aqm, aqm, 32);
2310 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2311 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2312 		memcpy(crycb->apcb1.adm, adm, 32);
2313 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2314 			 adm[0], adm[1], adm[2], adm[3]);
2315 		break;
2316 	case CRYCB_FORMAT1:
2317 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2318 		memcpy(crycb->apcb0.apm, apm, 8);
2319 		memcpy(crycb->apcb0.aqm, aqm, 2);
2320 		memcpy(crycb->apcb0.adm, adm, 2);
2321 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2322 			 apm[0], *((unsigned short *)aqm),
2323 			 *((unsigned short *)adm));
2324 		break;
2325 	default:	/* Can not happen */
2326 		break;
2327 	}
2328 
2329 	/* recreate the shadow crycb for each vcpu */
2330 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2331 	kvm_s390_vcpu_unblock_all(kvm);
2332 	mutex_unlock(&kvm->lock);
2333 }
2334 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2335 
2336 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2337 {
2338 	mutex_lock(&kvm->lock);
2339 	kvm_s390_vcpu_block_all(kvm);
2340 
2341 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2342 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2343 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2344 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2345 
2346 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2347 	/* recreate the shadow crycb for each vcpu */
2348 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2349 	kvm_s390_vcpu_unblock_all(kvm);
2350 	mutex_unlock(&kvm->lock);
2351 }
2352 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2353 
2354 static u64 kvm_s390_get_initial_cpuid(void)
2355 {
2356 	struct cpuid cpuid;
2357 
2358 	get_cpu_id(&cpuid);
2359 	cpuid.version = 0xff;
2360 	return *((u64 *) &cpuid);
2361 }
2362 
2363 static void kvm_s390_crypto_init(struct kvm *kvm)
2364 {
2365 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2366 	kvm_s390_set_crycb_format(kvm);
2367 
2368 	if (!test_kvm_facility(kvm, 76))
2369 		return;
2370 
2371 	/* Enable AES/DEA protected key functions by default */
2372 	kvm->arch.crypto.aes_kw = 1;
2373 	kvm->arch.crypto.dea_kw = 1;
2374 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2375 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2376 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2377 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2378 }
2379 
2380 static void sca_dispose(struct kvm *kvm)
2381 {
2382 	if (kvm->arch.use_esca)
2383 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2384 	else
2385 		free_page((unsigned long)(kvm->arch.sca));
2386 	kvm->arch.sca = NULL;
2387 }
2388 
2389 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2390 {
2391 	gfp_t alloc_flags = GFP_KERNEL;
2392 	int i, rc;
2393 	char debug_name[16];
2394 	static unsigned long sca_offset;
2395 
2396 	rc = -EINVAL;
2397 #ifdef CONFIG_KVM_S390_UCONTROL
2398 	if (type & ~KVM_VM_S390_UCONTROL)
2399 		goto out_err;
2400 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2401 		goto out_err;
2402 #else
2403 	if (type)
2404 		goto out_err;
2405 #endif
2406 
2407 	rc = s390_enable_sie();
2408 	if (rc)
2409 		goto out_err;
2410 
2411 	rc = -ENOMEM;
2412 
2413 	if (!sclp.has_64bscao)
2414 		alloc_flags |= GFP_DMA;
2415 	rwlock_init(&kvm->arch.sca_lock);
2416 	/* start with basic SCA */
2417 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2418 	if (!kvm->arch.sca)
2419 		goto out_err;
2420 	spin_lock(&kvm_lock);
2421 	sca_offset += 16;
2422 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2423 		sca_offset = 0;
2424 	kvm->arch.sca = (struct bsca_block *)
2425 			((char *) kvm->arch.sca + sca_offset);
2426 	spin_unlock(&kvm_lock);
2427 
2428 	sprintf(debug_name, "kvm-%u", current->pid);
2429 
2430 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2431 	if (!kvm->arch.dbf)
2432 		goto out_err;
2433 
2434 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2435 	kvm->arch.sie_page2 =
2436 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2437 	if (!kvm->arch.sie_page2)
2438 		goto out_err;
2439 
2440 	kvm->arch.sie_page2->kvm = kvm;
2441 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2442 
2443 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2444 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2445 					      (kvm_s390_fac_base[i] |
2446 					       kvm_s390_fac_ext[i]);
2447 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2448 					      kvm_s390_fac_base[i];
2449 	}
2450 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2451 
2452 	/* we are always in czam mode - even on pre z14 machines */
2453 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2454 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2455 	/* we emulate STHYI in kvm */
2456 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2457 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2458 	if (MACHINE_HAS_TLB_GUEST) {
2459 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2460 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2461 	}
2462 
2463 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2464 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2465 
2466 	kvm_s390_crypto_init(kvm);
2467 
2468 	mutex_init(&kvm->arch.float_int.ais_lock);
2469 	spin_lock_init(&kvm->arch.float_int.lock);
2470 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2471 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2472 	init_waitqueue_head(&kvm->arch.ipte_wq);
2473 	mutex_init(&kvm->arch.ipte_mutex);
2474 
2475 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2476 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2477 
2478 	if (type & KVM_VM_S390_UCONTROL) {
2479 		kvm->arch.gmap = NULL;
2480 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2481 	} else {
2482 		if (sclp.hamax == U64_MAX)
2483 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2484 		else
2485 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2486 						    sclp.hamax + 1);
2487 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2488 		if (!kvm->arch.gmap)
2489 			goto out_err;
2490 		kvm->arch.gmap->private = kvm;
2491 		kvm->arch.gmap->pfault_enabled = 0;
2492 	}
2493 
2494 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2495 	kvm->arch.use_skf = sclp.has_skey;
2496 	spin_lock_init(&kvm->arch.start_stop_lock);
2497 	kvm_s390_vsie_init(kvm);
2498 	kvm_s390_gisa_init(kvm);
2499 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2500 
2501 	return 0;
2502 out_err:
2503 	free_page((unsigned long)kvm->arch.sie_page2);
2504 	debug_unregister(kvm->arch.dbf);
2505 	sca_dispose(kvm);
2506 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2507 	return rc;
2508 }
2509 
2510 bool kvm_arch_has_vcpu_debugfs(void)
2511 {
2512 	return false;
2513 }
2514 
2515 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2516 {
2517 	return 0;
2518 }
2519 
2520 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2521 {
2522 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2523 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2524 	kvm_s390_clear_local_irqs(vcpu);
2525 	kvm_clear_async_pf_completion_queue(vcpu);
2526 	if (!kvm_is_ucontrol(vcpu->kvm))
2527 		sca_del_vcpu(vcpu);
2528 
2529 	if (kvm_is_ucontrol(vcpu->kvm))
2530 		gmap_remove(vcpu->arch.gmap);
2531 
2532 	if (vcpu->kvm->arch.use_cmma)
2533 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2534 	free_page((unsigned long)(vcpu->arch.sie_block));
2535 
2536 	kvm_vcpu_uninit(vcpu);
2537 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2538 }
2539 
2540 static void kvm_free_vcpus(struct kvm *kvm)
2541 {
2542 	unsigned int i;
2543 	struct kvm_vcpu *vcpu;
2544 
2545 	kvm_for_each_vcpu(i, vcpu, kvm)
2546 		kvm_arch_vcpu_destroy(vcpu);
2547 
2548 	mutex_lock(&kvm->lock);
2549 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2550 		kvm->vcpus[i] = NULL;
2551 
2552 	atomic_set(&kvm->online_vcpus, 0);
2553 	mutex_unlock(&kvm->lock);
2554 }
2555 
2556 void kvm_arch_destroy_vm(struct kvm *kvm)
2557 {
2558 	kvm_free_vcpus(kvm);
2559 	sca_dispose(kvm);
2560 	debug_unregister(kvm->arch.dbf);
2561 	kvm_s390_gisa_destroy(kvm);
2562 	free_page((unsigned long)kvm->arch.sie_page2);
2563 	if (!kvm_is_ucontrol(kvm))
2564 		gmap_remove(kvm->arch.gmap);
2565 	kvm_s390_destroy_adapters(kvm);
2566 	kvm_s390_clear_float_irqs(kvm);
2567 	kvm_s390_vsie_destroy(kvm);
2568 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2569 }
2570 
2571 /* Section: vcpu related */
2572 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2573 {
2574 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2575 	if (!vcpu->arch.gmap)
2576 		return -ENOMEM;
2577 	vcpu->arch.gmap->private = vcpu->kvm;
2578 
2579 	return 0;
2580 }
2581 
2582 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2583 {
2584 	if (!kvm_s390_use_sca_entries())
2585 		return;
2586 	read_lock(&vcpu->kvm->arch.sca_lock);
2587 	if (vcpu->kvm->arch.use_esca) {
2588 		struct esca_block *sca = vcpu->kvm->arch.sca;
2589 
2590 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2591 		sca->cpu[vcpu->vcpu_id].sda = 0;
2592 	} else {
2593 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2594 
2595 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2596 		sca->cpu[vcpu->vcpu_id].sda = 0;
2597 	}
2598 	read_unlock(&vcpu->kvm->arch.sca_lock);
2599 }
2600 
2601 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2602 {
2603 	if (!kvm_s390_use_sca_entries()) {
2604 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2605 
2606 		/* we still need the basic sca for the ipte control */
2607 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2608 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2609 		return;
2610 	}
2611 	read_lock(&vcpu->kvm->arch.sca_lock);
2612 	if (vcpu->kvm->arch.use_esca) {
2613 		struct esca_block *sca = vcpu->kvm->arch.sca;
2614 
2615 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2616 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2617 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2618 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2619 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2620 	} else {
2621 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2622 
2623 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2624 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2625 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2626 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2627 	}
2628 	read_unlock(&vcpu->kvm->arch.sca_lock);
2629 }
2630 
2631 /* Basic SCA to Extended SCA data copy routines */
2632 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2633 {
2634 	d->sda = s->sda;
2635 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2636 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2637 }
2638 
2639 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2640 {
2641 	int i;
2642 
2643 	d->ipte_control = s->ipte_control;
2644 	d->mcn[0] = s->mcn;
2645 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2646 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2647 }
2648 
2649 static int sca_switch_to_extended(struct kvm *kvm)
2650 {
2651 	struct bsca_block *old_sca = kvm->arch.sca;
2652 	struct esca_block *new_sca;
2653 	struct kvm_vcpu *vcpu;
2654 	unsigned int vcpu_idx;
2655 	u32 scaol, scaoh;
2656 
2657 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2658 	if (!new_sca)
2659 		return -ENOMEM;
2660 
2661 	scaoh = (u32)((u64)(new_sca) >> 32);
2662 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2663 
2664 	kvm_s390_vcpu_block_all(kvm);
2665 	write_lock(&kvm->arch.sca_lock);
2666 
2667 	sca_copy_b_to_e(new_sca, old_sca);
2668 
2669 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2670 		vcpu->arch.sie_block->scaoh = scaoh;
2671 		vcpu->arch.sie_block->scaol = scaol;
2672 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2673 	}
2674 	kvm->arch.sca = new_sca;
2675 	kvm->arch.use_esca = 1;
2676 
2677 	write_unlock(&kvm->arch.sca_lock);
2678 	kvm_s390_vcpu_unblock_all(kvm);
2679 
2680 	free_page((unsigned long)old_sca);
2681 
2682 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2683 		 old_sca, kvm->arch.sca);
2684 	return 0;
2685 }
2686 
2687 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2688 {
2689 	int rc;
2690 
2691 	if (!kvm_s390_use_sca_entries()) {
2692 		if (id < KVM_MAX_VCPUS)
2693 			return true;
2694 		return false;
2695 	}
2696 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2697 		return true;
2698 	if (!sclp.has_esca || !sclp.has_64bscao)
2699 		return false;
2700 
2701 	mutex_lock(&kvm->lock);
2702 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2703 	mutex_unlock(&kvm->lock);
2704 
2705 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2706 }
2707 
2708 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2709 {
2710 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2711 	kvm_clear_async_pf_completion_queue(vcpu);
2712 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2713 				    KVM_SYNC_GPRS |
2714 				    KVM_SYNC_ACRS |
2715 				    KVM_SYNC_CRS |
2716 				    KVM_SYNC_ARCH0 |
2717 				    KVM_SYNC_PFAULT;
2718 	kvm_s390_set_prefix(vcpu, 0);
2719 	if (test_kvm_facility(vcpu->kvm, 64))
2720 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2721 	if (test_kvm_facility(vcpu->kvm, 82))
2722 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2723 	if (test_kvm_facility(vcpu->kvm, 133))
2724 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2725 	if (test_kvm_facility(vcpu->kvm, 156))
2726 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2727 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2728 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2729 	 */
2730 	if (MACHINE_HAS_VX)
2731 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2732 	else
2733 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2734 
2735 	if (kvm_is_ucontrol(vcpu->kvm))
2736 		return __kvm_ucontrol_vcpu_init(vcpu);
2737 
2738 	return 0;
2739 }
2740 
2741 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2742 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2743 {
2744 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2745 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2746 	vcpu->arch.cputm_start = get_tod_clock_fast();
2747 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2748 }
2749 
2750 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2751 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2752 {
2753 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2754 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2755 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2756 	vcpu->arch.cputm_start = 0;
2757 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2758 }
2759 
2760 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2761 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2762 {
2763 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2764 	vcpu->arch.cputm_enabled = true;
2765 	__start_cpu_timer_accounting(vcpu);
2766 }
2767 
2768 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2769 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2770 {
2771 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2772 	__stop_cpu_timer_accounting(vcpu);
2773 	vcpu->arch.cputm_enabled = false;
2774 }
2775 
2776 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2777 {
2778 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2779 	__enable_cpu_timer_accounting(vcpu);
2780 	preempt_enable();
2781 }
2782 
2783 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2784 {
2785 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2786 	__disable_cpu_timer_accounting(vcpu);
2787 	preempt_enable();
2788 }
2789 
2790 /* set the cpu timer - may only be called from the VCPU thread itself */
2791 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2792 {
2793 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2794 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2795 	if (vcpu->arch.cputm_enabled)
2796 		vcpu->arch.cputm_start = get_tod_clock_fast();
2797 	vcpu->arch.sie_block->cputm = cputm;
2798 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2799 	preempt_enable();
2800 }
2801 
2802 /* update and get the cpu timer - can also be called from other VCPU threads */
2803 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2804 {
2805 	unsigned int seq;
2806 	__u64 value;
2807 
2808 	if (unlikely(!vcpu->arch.cputm_enabled))
2809 		return vcpu->arch.sie_block->cputm;
2810 
2811 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2812 	do {
2813 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2814 		/*
2815 		 * If the writer would ever execute a read in the critical
2816 		 * section, e.g. in irq context, we have a deadlock.
2817 		 */
2818 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2819 		value = vcpu->arch.sie_block->cputm;
2820 		/* if cputm_start is 0, accounting is being started/stopped */
2821 		if (likely(vcpu->arch.cputm_start))
2822 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2823 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2824 	preempt_enable();
2825 	return value;
2826 }
2827 
2828 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2829 {
2830 
2831 	gmap_enable(vcpu->arch.enabled_gmap);
2832 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2833 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2834 		__start_cpu_timer_accounting(vcpu);
2835 	vcpu->cpu = cpu;
2836 }
2837 
2838 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2839 {
2840 	vcpu->cpu = -1;
2841 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2842 		__stop_cpu_timer_accounting(vcpu);
2843 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2844 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2845 	gmap_disable(vcpu->arch.enabled_gmap);
2846 
2847 }
2848 
2849 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2850 {
2851 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2852 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2853 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2854 	kvm_s390_set_prefix(vcpu, 0);
2855 	kvm_s390_set_cpu_timer(vcpu, 0);
2856 	vcpu->arch.sie_block->ckc       = 0UL;
2857 	vcpu->arch.sie_block->todpr     = 0;
2858 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2859 	vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2860 					CR0_INTERRUPT_KEY_SUBMASK |
2861 					CR0_MEASUREMENT_ALERT_SUBMASK;
2862 	vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2863 					CR14_UNUSED_33 |
2864 					CR14_EXTERNAL_DAMAGE_SUBMASK;
2865 	/* make sure the new fpc will be lazily loaded */
2866 	save_fpu_regs();
2867 	current->thread.fpu.fpc = 0;
2868 	vcpu->arch.sie_block->gbea = 1;
2869 	vcpu->arch.sie_block->pp = 0;
2870 	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2871 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2872 	kvm_clear_async_pf_completion_queue(vcpu);
2873 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2874 		kvm_s390_vcpu_stop(vcpu);
2875 	kvm_s390_clear_local_irqs(vcpu);
2876 }
2877 
2878 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2879 {
2880 	mutex_lock(&vcpu->kvm->lock);
2881 	preempt_disable();
2882 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2883 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2884 	preempt_enable();
2885 	mutex_unlock(&vcpu->kvm->lock);
2886 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2887 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2888 		sca_add_vcpu(vcpu);
2889 	}
2890 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2891 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2892 	/* make vcpu_load load the right gmap on the first trigger */
2893 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2894 }
2895 
2896 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2897 {
2898 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2899 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2900 		return true;
2901 	return false;
2902 }
2903 
2904 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2905 {
2906 	/* At least one ECC subfunction must be present */
2907 	return kvm_has_pckmo_subfunc(kvm, 32) ||
2908 	       kvm_has_pckmo_subfunc(kvm, 33) ||
2909 	       kvm_has_pckmo_subfunc(kvm, 34) ||
2910 	       kvm_has_pckmo_subfunc(kvm, 40) ||
2911 	       kvm_has_pckmo_subfunc(kvm, 41);
2912 
2913 }
2914 
2915 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2916 {
2917 	/*
2918 	 * If the AP instructions are not being interpreted and the MSAX3
2919 	 * facility is not configured for the guest, there is nothing to set up.
2920 	 */
2921 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2922 		return;
2923 
2924 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2925 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2926 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
2927 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2928 
2929 	if (vcpu->kvm->arch.crypto.apie)
2930 		vcpu->arch.sie_block->eca |= ECA_APIE;
2931 
2932 	/* Set up protected key support */
2933 	if (vcpu->kvm->arch.crypto.aes_kw) {
2934 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2935 		/* ecc is also wrapped with AES key */
2936 		if (kvm_has_pckmo_ecc(vcpu->kvm))
2937 			vcpu->arch.sie_block->ecd |= ECD_ECC;
2938 	}
2939 
2940 	if (vcpu->kvm->arch.crypto.dea_kw)
2941 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2942 }
2943 
2944 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2945 {
2946 	free_page(vcpu->arch.sie_block->cbrlo);
2947 	vcpu->arch.sie_block->cbrlo = 0;
2948 }
2949 
2950 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2951 {
2952 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2953 	if (!vcpu->arch.sie_block->cbrlo)
2954 		return -ENOMEM;
2955 	return 0;
2956 }
2957 
2958 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2959 {
2960 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2961 
2962 	vcpu->arch.sie_block->ibc = model->ibc;
2963 	if (test_kvm_facility(vcpu->kvm, 7))
2964 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2965 }
2966 
2967 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2968 {
2969 	int rc = 0;
2970 
2971 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2972 						    CPUSTAT_SM |
2973 						    CPUSTAT_STOPPED);
2974 
2975 	if (test_kvm_facility(vcpu->kvm, 78))
2976 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2977 	else if (test_kvm_facility(vcpu->kvm, 8))
2978 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2979 
2980 	kvm_s390_vcpu_setup_model(vcpu);
2981 
2982 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2983 	if (MACHINE_HAS_ESOP)
2984 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2985 	if (test_kvm_facility(vcpu->kvm, 9))
2986 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2987 	if (test_kvm_facility(vcpu->kvm, 73))
2988 		vcpu->arch.sie_block->ecb |= ECB_TE;
2989 
2990 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2991 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2992 	if (test_kvm_facility(vcpu->kvm, 130))
2993 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2994 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2995 	if (sclp.has_cei)
2996 		vcpu->arch.sie_block->eca |= ECA_CEI;
2997 	if (sclp.has_ib)
2998 		vcpu->arch.sie_block->eca |= ECA_IB;
2999 	if (sclp.has_siif)
3000 		vcpu->arch.sie_block->eca |= ECA_SII;
3001 	if (sclp.has_sigpif)
3002 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3003 	if (test_kvm_facility(vcpu->kvm, 129)) {
3004 		vcpu->arch.sie_block->eca |= ECA_VX;
3005 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3006 	}
3007 	if (test_kvm_facility(vcpu->kvm, 139))
3008 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3009 	if (test_kvm_facility(vcpu->kvm, 156))
3010 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3011 	if (vcpu->arch.sie_block->gd) {
3012 		vcpu->arch.sie_block->eca |= ECA_AIV;
3013 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3014 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3015 	}
3016 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3017 					| SDNXC;
3018 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3019 
3020 	if (sclp.has_kss)
3021 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3022 	else
3023 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3024 
3025 	if (vcpu->kvm->arch.use_cmma) {
3026 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3027 		if (rc)
3028 			return rc;
3029 	}
3030 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3031 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3032 
3033 	vcpu->arch.sie_block->hpid = HPID_KVM;
3034 
3035 	kvm_s390_vcpu_crypto_setup(vcpu);
3036 
3037 	return rc;
3038 }
3039 
3040 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
3041 				      unsigned int id)
3042 {
3043 	struct kvm_vcpu *vcpu;
3044 	struct sie_page *sie_page;
3045 	int rc = -EINVAL;
3046 
3047 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3048 		goto out;
3049 
3050 	rc = -ENOMEM;
3051 
3052 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
3053 	if (!vcpu)
3054 		goto out;
3055 
3056 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3057 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3058 	if (!sie_page)
3059 		goto out_free_cpu;
3060 
3061 	vcpu->arch.sie_block = &sie_page->sie_block;
3062 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3063 
3064 	/* the real guest size will always be smaller than msl */
3065 	vcpu->arch.sie_block->mso = 0;
3066 	vcpu->arch.sie_block->msl = sclp.hamax;
3067 
3068 	vcpu->arch.sie_block->icpua = id;
3069 	spin_lock_init(&vcpu->arch.local_int.lock);
3070 	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
3071 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3072 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3073 	seqcount_init(&vcpu->arch.cputm_seqcount);
3074 
3075 	rc = kvm_vcpu_init(vcpu, kvm, id);
3076 	if (rc)
3077 		goto out_free_sie_block;
3078 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
3079 		 vcpu->arch.sie_block);
3080 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
3081 
3082 	return vcpu;
3083 out_free_sie_block:
3084 	free_page((unsigned long)(vcpu->arch.sie_block));
3085 out_free_cpu:
3086 	kmem_cache_free(kvm_vcpu_cache, vcpu);
3087 out:
3088 	return ERR_PTR(rc);
3089 }
3090 
3091 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3092 {
3093 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3094 }
3095 
3096 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3097 {
3098 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3099 }
3100 
3101 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3102 {
3103 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3104 	exit_sie(vcpu);
3105 }
3106 
3107 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3108 {
3109 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3110 }
3111 
3112 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3113 {
3114 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3115 	exit_sie(vcpu);
3116 }
3117 
3118 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3119 {
3120 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3121 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3122 }
3123 
3124 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3125 {
3126 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3127 }
3128 
3129 /*
3130  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3131  * If the CPU is not running (e.g. waiting as idle) the function will
3132  * return immediately. */
3133 void exit_sie(struct kvm_vcpu *vcpu)
3134 {
3135 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3136 	kvm_s390_vsie_kick(vcpu);
3137 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3138 		cpu_relax();
3139 }
3140 
3141 /* Kick a guest cpu out of SIE to process a request synchronously */
3142 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3143 {
3144 	kvm_make_request(req, vcpu);
3145 	kvm_s390_vcpu_request(vcpu);
3146 }
3147 
3148 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3149 			      unsigned long end)
3150 {
3151 	struct kvm *kvm = gmap->private;
3152 	struct kvm_vcpu *vcpu;
3153 	unsigned long prefix;
3154 	int i;
3155 
3156 	if (gmap_is_shadow(gmap))
3157 		return;
3158 	if (start >= 1UL << 31)
3159 		/* We are only interested in prefix pages */
3160 		return;
3161 	kvm_for_each_vcpu(i, vcpu, kvm) {
3162 		/* match against both prefix pages */
3163 		prefix = kvm_s390_get_prefix(vcpu);
3164 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3165 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3166 				   start, end);
3167 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3168 		}
3169 	}
3170 }
3171 
3172 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3173 {
3174 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3175 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3176 	    halt_poll_max_steal) {
3177 		vcpu->stat.halt_no_poll_steal++;
3178 		return true;
3179 	}
3180 	return false;
3181 }
3182 
3183 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3184 {
3185 	/* kvm common code refers to this, but never calls it */
3186 	BUG();
3187 	return 0;
3188 }
3189 
3190 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3191 					   struct kvm_one_reg *reg)
3192 {
3193 	int r = -EINVAL;
3194 
3195 	switch (reg->id) {
3196 	case KVM_REG_S390_TODPR:
3197 		r = put_user(vcpu->arch.sie_block->todpr,
3198 			     (u32 __user *)reg->addr);
3199 		break;
3200 	case KVM_REG_S390_EPOCHDIFF:
3201 		r = put_user(vcpu->arch.sie_block->epoch,
3202 			     (u64 __user *)reg->addr);
3203 		break;
3204 	case KVM_REG_S390_CPU_TIMER:
3205 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3206 			     (u64 __user *)reg->addr);
3207 		break;
3208 	case KVM_REG_S390_CLOCK_COMP:
3209 		r = put_user(vcpu->arch.sie_block->ckc,
3210 			     (u64 __user *)reg->addr);
3211 		break;
3212 	case KVM_REG_S390_PFTOKEN:
3213 		r = put_user(vcpu->arch.pfault_token,
3214 			     (u64 __user *)reg->addr);
3215 		break;
3216 	case KVM_REG_S390_PFCOMPARE:
3217 		r = put_user(vcpu->arch.pfault_compare,
3218 			     (u64 __user *)reg->addr);
3219 		break;
3220 	case KVM_REG_S390_PFSELECT:
3221 		r = put_user(vcpu->arch.pfault_select,
3222 			     (u64 __user *)reg->addr);
3223 		break;
3224 	case KVM_REG_S390_PP:
3225 		r = put_user(vcpu->arch.sie_block->pp,
3226 			     (u64 __user *)reg->addr);
3227 		break;
3228 	case KVM_REG_S390_GBEA:
3229 		r = put_user(vcpu->arch.sie_block->gbea,
3230 			     (u64 __user *)reg->addr);
3231 		break;
3232 	default:
3233 		break;
3234 	}
3235 
3236 	return r;
3237 }
3238 
3239 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3240 					   struct kvm_one_reg *reg)
3241 {
3242 	int r = -EINVAL;
3243 	__u64 val;
3244 
3245 	switch (reg->id) {
3246 	case KVM_REG_S390_TODPR:
3247 		r = get_user(vcpu->arch.sie_block->todpr,
3248 			     (u32 __user *)reg->addr);
3249 		break;
3250 	case KVM_REG_S390_EPOCHDIFF:
3251 		r = get_user(vcpu->arch.sie_block->epoch,
3252 			     (u64 __user *)reg->addr);
3253 		break;
3254 	case KVM_REG_S390_CPU_TIMER:
3255 		r = get_user(val, (u64 __user *)reg->addr);
3256 		if (!r)
3257 			kvm_s390_set_cpu_timer(vcpu, val);
3258 		break;
3259 	case KVM_REG_S390_CLOCK_COMP:
3260 		r = get_user(vcpu->arch.sie_block->ckc,
3261 			     (u64 __user *)reg->addr);
3262 		break;
3263 	case KVM_REG_S390_PFTOKEN:
3264 		r = get_user(vcpu->arch.pfault_token,
3265 			     (u64 __user *)reg->addr);
3266 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3267 			kvm_clear_async_pf_completion_queue(vcpu);
3268 		break;
3269 	case KVM_REG_S390_PFCOMPARE:
3270 		r = get_user(vcpu->arch.pfault_compare,
3271 			     (u64 __user *)reg->addr);
3272 		break;
3273 	case KVM_REG_S390_PFSELECT:
3274 		r = get_user(vcpu->arch.pfault_select,
3275 			     (u64 __user *)reg->addr);
3276 		break;
3277 	case KVM_REG_S390_PP:
3278 		r = get_user(vcpu->arch.sie_block->pp,
3279 			     (u64 __user *)reg->addr);
3280 		break;
3281 	case KVM_REG_S390_GBEA:
3282 		r = get_user(vcpu->arch.sie_block->gbea,
3283 			     (u64 __user *)reg->addr);
3284 		break;
3285 	default:
3286 		break;
3287 	}
3288 
3289 	return r;
3290 }
3291 
3292 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3293 {
3294 	kvm_s390_vcpu_initial_reset(vcpu);
3295 	return 0;
3296 }
3297 
3298 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3299 {
3300 	vcpu_load(vcpu);
3301 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3302 	vcpu_put(vcpu);
3303 	return 0;
3304 }
3305 
3306 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3307 {
3308 	vcpu_load(vcpu);
3309 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3310 	vcpu_put(vcpu);
3311 	return 0;
3312 }
3313 
3314 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3315 				  struct kvm_sregs *sregs)
3316 {
3317 	vcpu_load(vcpu);
3318 
3319 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3320 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3321 
3322 	vcpu_put(vcpu);
3323 	return 0;
3324 }
3325 
3326 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3327 				  struct kvm_sregs *sregs)
3328 {
3329 	vcpu_load(vcpu);
3330 
3331 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3332 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3333 
3334 	vcpu_put(vcpu);
3335 	return 0;
3336 }
3337 
3338 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3339 {
3340 	int ret = 0;
3341 
3342 	vcpu_load(vcpu);
3343 
3344 	if (test_fp_ctl(fpu->fpc)) {
3345 		ret = -EINVAL;
3346 		goto out;
3347 	}
3348 	vcpu->run->s.regs.fpc = fpu->fpc;
3349 	if (MACHINE_HAS_VX)
3350 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3351 				 (freg_t *) fpu->fprs);
3352 	else
3353 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3354 
3355 out:
3356 	vcpu_put(vcpu);
3357 	return ret;
3358 }
3359 
3360 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3361 {
3362 	vcpu_load(vcpu);
3363 
3364 	/* make sure we have the latest values */
3365 	save_fpu_regs();
3366 	if (MACHINE_HAS_VX)
3367 		convert_vx_to_fp((freg_t *) fpu->fprs,
3368 				 (__vector128 *) vcpu->run->s.regs.vrs);
3369 	else
3370 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3371 	fpu->fpc = vcpu->run->s.regs.fpc;
3372 
3373 	vcpu_put(vcpu);
3374 	return 0;
3375 }
3376 
3377 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3378 {
3379 	int rc = 0;
3380 
3381 	if (!is_vcpu_stopped(vcpu))
3382 		rc = -EBUSY;
3383 	else {
3384 		vcpu->run->psw_mask = psw.mask;
3385 		vcpu->run->psw_addr = psw.addr;
3386 	}
3387 	return rc;
3388 }
3389 
3390 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3391 				  struct kvm_translation *tr)
3392 {
3393 	return -EINVAL; /* not implemented yet */
3394 }
3395 
3396 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3397 			      KVM_GUESTDBG_USE_HW_BP | \
3398 			      KVM_GUESTDBG_ENABLE)
3399 
3400 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3401 					struct kvm_guest_debug *dbg)
3402 {
3403 	int rc = 0;
3404 
3405 	vcpu_load(vcpu);
3406 
3407 	vcpu->guest_debug = 0;
3408 	kvm_s390_clear_bp_data(vcpu);
3409 
3410 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3411 		rc = -EINVAL;
3412 		goto out;
3413 	}
3414 	if (!sclp.has_gpere) {
3415 		rc = -EINVAL;
3416 		goto out;
3417 	}
3418 
3419 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3420 		vcpu->guest_debug = dbg->control;
3421 		/* enforce guest PER */
3422 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3423 
3424 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3425 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3426 	} else {
3427 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3428 		vcpu->arch.guestdbg.last_bp = 0;
3429 	}
3430 
3431 	if (rc) {
3432 		vcpu->guest_debug = 0;
3433 		kvm_s390_clear_bp_data(vcpu);
3434 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3435 	}
3436 
3437 out:
3438 	vcpu_put(vcpu);
3439 	return rc;
3440 }
3441 
3442 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3443 				    struct kvm_mp_state *mp_state)
3444 {
3445 	int ret;
3446 
3447 	vcpu_load(vcpu);
3448 
3449 	/* CHECK_STOP and LOAD are not supported yet */
3450 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3451 				      KVM_MP_STATE_OPERATING;
3452 
3453 	vcpu_put(vcpu);
3454 	return ret;
3455 }
3456 
3457 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3458 				    struct kvm_mp_state *mp_state)
3459 {
3460 	int rc = 0;
3461 
3462 	vcpu_load(vcpu);
3463 
3464 	/* user space knows about this interface - let it control the state */
3465 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3466 
3467 	switch (mp_state->mp_state) {
3468 	case KVM_MP_STATE_STOPPED:
3469 		kvm_s390_vcpu_stop(vcpu);
3470 		break;
3471 	case KVM_MP_STATE_OPERATING:
3472 		kvm_s390_vcpu_start(vcpu);
3473 		break;
3474 	case KVM_MP_STATE_LOAD:
3475 	case KVM_MP_STATE_CHECK_STOP:
3476 		/* fall through - CHECK_STOP and LOAD are not supported yet */
3477 	default:
3478 		rc = -ENXIO;
3479 	}
3480 
3481 	vcpu_put(vcpu);
3482 	return rc;
3483 }
3484 
3485 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3486 {
3487 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3488 }
3489 
3490 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3491 {
3492 retry:
3493 	kvm_s390_vcpu_request_handled(vcpu);
3494 	if (!kvm_request_pending(vcpu))
3495 		return 0;
3496 	/*
3497 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3498 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3499 	 * This ensures that the ipte instruction for this request has
3500 	 * already finished. We might race against a second unmapper that
3501 	 * wants to set the blocking bit. Lets just retry the request loop.
3502 	 */
3503 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3504 		int rc;
3505 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3506 					  kvm_s390_get_prefix(vcpu),
3507 					  PAGE_SIZE * 2, PROT_WRITE);
3508 		if (rc) {
3509 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3510 			return rc;
3511 		}
3512 		goto retry;
3513 	}
3514 
3515 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3516 		vcpu->arch.sie_block->ihcpu = 0xffff;
3517 		goto retry;
3518 	}
3519 
3520 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3521 		if (!ibs_enabled(vcpu)) {
3522 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3523 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3524 		}
3525 		goto retry;
3526 	}
3527 
3528 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3529 		if (ibs_enabled(vcpu)) {
3530 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3531 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3532 		}
3533 		goto retry;
3534 	}
3535 
3536 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3537 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3538 		goto retry;
3539 	}
3540 
3541 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3542 		/*
3543 		 * Disable CMM virtualization; we will emulate the ESSA
3544 		 * instruction manually, in order to provide additional
3545 		 * functionalities needed for live migration.
3546 		 */
3547 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3548 		goto retry;
3549 	}
3550 
3551 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3552 		/*
3553 		 * Re-enable CMM virtualization if CMMA is available and
3554 		 * CMM has been used.
3555 		 */
3556 		if ((vcpu->kvm->arch.use_cmma) &&
3557 		    (vcpu->kvm->mm->context.uses_cmm))
3558 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3559 		goto retry;
3560 	}
3561 
3562 	/* nothing to do, just clear the request */
3563 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3564 	/* we left the vsie handler, nothing to do, just clear the request */
3565 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3566 
3567 	return 0;
3568 }
3569 
3570 void kvm_s390_set_tod_clock(struct kvm *kvm,
3571 			    const struct kvm_s390_vm_tod_clock *gtod)
3572 {
3573 	struct kvm_vcpu *vcpu;
3574 	struct kvm_s390_tod_clock_ext htod;
3575 	int i;
3576 
3577 	mutex_lock(&kvm->lock);
3578 	preempt_disable();
3579 
3580 	get_tod_clock_ext((char *)&htod);
3581 
3582 	kvm->arch.epoch = gtod->tod - htod.tod;
3583 	kvm->arch.epdx = 0;
3584 	if (test_kvm_facility(kvm, 139)) {
3585 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3586 		if (kvm->arch.epoch > gtod->tod)
3587 			kvm->arch.epdx -= 1;
3588 	}
3589 
3590 	kvm_s390_vcpu_block_all(kvm);
3591 	kvm_for_each_vcpu(i, vcpu, kvm) {
3592 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3593 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3594 	}
3595 
3596 	kvm_s390_vcpu_unblock_all(kvm);
3597 	preempt_enable();
3598 	mutex_unlock(&kvm->lock);
3599 }
3600 
3601 /**
3602  * kvm_arch_fault_in_page - fault-in guest page if necessary
3603  * @vcpu: The corresponding virtual cpu
3604  * @gpa: Guest physical address
3605  * @writable: Whether the page should be writable or not
3606  *
3607  * Make sure that a guest page has been faulted-in on the host.
3608  *
3609  * Return: Zero on success, negative error code otherwise.
3610  */
3611 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3612 {
3613 	return gmap_fault(vcpu->arch.gmap, gpa,
3614 			  writable ? FAULT_FLAG_WRITE : 0);
3615 }
3616 
3617 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3618 				      unsigned long token)
3619 {
3620 	struct kvm_s390_interrupt inti;
3621 	struct kvm_s390_irq irq;
3622 
3623 	if (start_token) {
3624 		irq.u.ext.ext_params2 = token;
3625 		irq.type = KVM_S390_INT_PFAULT_INIT;
3626 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3627 	} else {
3628 		inti.type = KVM_S390_INT_PFAULT_DONE;
3629 		inti.parm64 = token;
3630 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3631 	}
3632 }
3633 
3634 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3635 				     struct kvm_async_pf *work)
3636 {
3637 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3638 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3639 }
3640 
3641 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3642 				 struct kvm_async_pf *work)
3643 {
3644 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3645 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3646 }
3647 
3648 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3649 			       struct kvm_async_pf *work)
3650 {
3651 	/* s390 will always inject the page directly */
3652 }
3653 
3654 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3655 {
3656 	/*
3657 	 * s390 will always inject the page directly,
3658 	 * but we still want check_async_completion to cleanup
3659 	 */
3660 	return true;
3661 }
3662 
3663 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3664 {
3665 	hva_t hva;
3666 	struct kvm_arch_async_pf arch;
3667 	int rc;
3668 
3669 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3670 		return 0;
3671 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3672 	    vcpu->arch.pfault_compare)
3673 		return 0;
3674 	if (psw_extint_disabled(vcpu))
3675 		return 0;
3676 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3677 		return 0;
3678 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3679 		return 0;
3680 	if (!vcpu->arch.gmap->pfault_enabled)
3681 		return 0;
3682 
3683 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3684 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3685 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3686 		return 0;
3687 
3688 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3689 	return rc;
3690 }
3691 
3692 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3693 {
3694 	int rc, cpuflags;
3695 
3696 	/*
3697 	 * On s390 notifications for arriving pages will be delivered directly
3698 	 * to the guest but the house keeping for completed pfaults is
3699 	 * handled outside the worker.
3700 	 */
3701 	kvm_check_async_pf_completion(vcpu);
3702 
3703 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3704 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3705 
3706 	if (need_resched())
3707 		schedule();
3708 
3709 	if (test_cpu_flag(CIF_MCCK_PENDING))
3710 		s390_handle_mcck();
3711 
3712 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3713 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3714 		if (rc)
3715 			return rc;
3716 	}
3717 
3718 	rc = kvm_s390_handle_requests(vcpu);
3719 	if (rc)
3720 		return rc;
3721 
3722 	if (guestdbg_enabled(vcpu)) {
3723 		kvm_s390_backup_guest_per_regs(vcpu);
3724 		kvm_s390_patch_guest_per_regs(vcpu);
3725 	}
3726 
3727 	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3728 
3729 	vcpu->arch.sie_block->icptcode = 0;
3730 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3731 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3732 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3733 
3734 	return 0;
3735 }
3736 
3737 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3738 {
3739 	struct kvm_s390_pgm_info pgm_info = {
3740 		.code = PGM_ADDRESSING,
3741 	};
3742 	u8 opcode, ilen;
3743 	int rc;
3744 
3745 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3746 	trace_kvm_s390_sie_fault(vcpu);
3747 
3748 	/*
3749 	 * We want to inject an addressing exception, which is defined as a
3750 	 * suppressing or terminating exception. However, since we came here
3751 	 * by a DAT access exception, the PSW still points to the faulting
3752 	 * instruction since DAT exceptions are nullifying. So we've got
3753 	 * to look up the current opcode to get the length of the instruction
3754 	 * to be able to forward the PSW.
3755 	 */
3756 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3757 	ilen = insn_length(opcode);
3758 	if (rc < 0) {
3759 		return rc;
3760 	} else if (rc) {
3761 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3762 		 * Forward by arbitrary ilc, injection will take care of
3763 		 * nullification if necessary.
3764 		 */
3765 		pgm_info = vcpu->arch.pgm;
3766 		ilen = 4;
3767 	}
3768 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3769 	kvm_s390_forward_psw(vcpu, ilen);
3770 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3771 }
3772 
3773 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3774 {
3775 	struct mcck_volatile_info *mcck_info;
3776 	struct sie_page *sie_page;
3777 
3778 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3779 		   vcpu->arch.sie_block->icptcode);
3780 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3781 
3782 	if (guestdbg_enabled(vcpu))
3783 		kvm_s390_restore_guest_per_regs(vcpu);
3784 
3785 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3786 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3787 
3788 	if (exit_reason == -EINTR) {
3789 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3790 		sie_page = container_of(vcpu->arch.sie_block,
3791 					struct sie_page, sie_block);
3792 		mcck_info = &sie_page->mcck_info;
3793 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3794 		return 0;
3795 	}
3796 
3797 	if (vcpu->arch.sie_block->icptcode > 0) {
3798 		int rc = kvm_handle_sie_intercept(vcpu);
3799 
3800 		if (rc != -EOPNOTSUPP)
3801 			return rc;
3802 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3803 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3804 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3805 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3806 		return -EREMOTE;
3807 	} else if (exit_reason != -EFAULT) {
3808 		vcpu->stat.exit_null++;
3809 		return 0;
3810 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3811 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3812 		vcpu->run->s390_ucontrol.trans_exc_code =
3813 						current->thread.gmap_addr;
3814 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3815 		return -EREMOTE;
3816 	} else if (current->thread.gmap_pfault) {
3817 		trace_kvm_s390_major_guest_pfault(vcpu);
3818 		current->thread.gmap_pfault = 0;
3819 		if (kvm_arch_setup_async_pf(vcpu))
3820 			return 0;
3821 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3822 	}
3823 	return vcpu_post_run_fault_in_sie(vcpu);
3824 }
3825 
3826 static int __vcpu_run(struct kvm_vcpu *vcpu)
3827 {
3828 	int rc, exit_reason;
3829 
3830 	/*
3831 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3832 	 * ning the guest), so that memslots (and other stuff) are protected
3833 	 */
3834 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3835 
3836 	do {
3837 		rc = vcpu_pre_run(vcpu);
3838 		if (rc)
3839 			break;
3840 
3841 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3842 		/*
3843 		 * As PF_VCPU will be used in fault handler, between
3844 		 * guest_enter and guest_exit should be no uaccess.
3845 		 */
3846 		local_irq_disable();
3847 		guest_enter_irqoff();
3848 		__disable_cpu_timer_accounting(vcpu);
3849 		local_irq_enable();
3850 		exit_reason = sie64a(vcpu->arch.sie_block,
3851 				     vcpu->run->s.regs.gprs);
3852 		local_irq_disable();
3853 		__enable_cpu_timer_accounting(vcpu);
3854 		guest_exit_irqoff();
3855 		local_irq_enable();
3856 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3857 
3858 		rc = vcpu_post_run(vcpu, exit_reason);
3859 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3860 
3861 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3862 	return rc;
3863 }
3864 
3865 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3866 {
3867 	struct runtime_instr_cb *riccb;
3868 	struct gs_cb *gscb;
3869 
3870 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3871 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3872 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3873 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3874 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3875 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3876 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3877 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3878 		/* some control register changes require a tlb flush */
3879 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3880 	}
3881 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3882 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3883 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3884 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3885 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3886 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3887 	}
3888 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3889 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3890 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3891 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3892 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3893 			kvm_clear_async_pf_completion_queue(vcpu);
3894 	}
3895 	/*
3896 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3897 	 * we should enable RI here instead of doing the lazy enablement.
3898 	 */
3899 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3900 	    test_kvm_facility(vcpu->kvm, 64) &&
3901 	    riccb->v &&
3902 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3903 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3904 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3905 	}
3906 	/*
3907 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3908 	 * we should enable GS here instead of doing the lazy enablement.
3909 	 */
3910 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3911 	    test_kvm_facility(vcpu->kvm, 133) &&
3912 	    gscb->gssm &&
3913 	    !vcpu->arch.gs_enabled) {
3914 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3915 		vcpu->arch.sie_block->ecb |= ECB_GS;
3916 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3917 		vcpu->arch.gs_enabled = 1;
3918 	}
3919 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3920 	    test_kvm_facility(vcpu->kvm, 82)) {
3921 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3922 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3923 	}
3924 	save_access_regs(vcpu->arch.host_acrs);
3925 	restore_access_regs(vcpu->run->s.regs.acrs);
3926 	/* save host (userspace) fprs/vrs */
3927 	save_fpu_regs();
3928 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3929 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3930 	if (MACHINE_HAS_VX)
3931 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3932 	else
3933 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3934 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3935 	if (test_fp_ctl(current->thread.fpu.fpc))
3936 		/* User space provided an invalid FPC, let's clear it */
3937 		current->thread.fpu.fpc = 0;
3938 	if (MACHINE_HAS_GS) {
3939 		preempt_disable();
3940 		__ctl_set_bit(2, 4);
3941 		if (current->thread.gs_cb) {
3942 			vcpu->arch.host_gscb = current->thread.gs_cb;
3943 			save_gs_cb(vcpu->arch.host_gscb);
3944 		}
3945 		if (vcpu->arch.gs_enabled) {
3946 			current->thread.gs_cb = (struct gs_cb *)
3947 						&vcpu->run->s.regs.gscb;
3948 			restore_gs_cb(current->thread.gs_cb);
3949 		}
3950 		preempt_enable();
3951 	}
3952 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
3953 
3954 	kvm_run->kvm_dirty_regs = 0;
3955 }
3956 
3957 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3958 {
3959 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3960 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3961 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3962 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3963 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3964 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3965 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3966 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3967 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3968 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3969 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3970 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3971 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3972 	save_access_regs(vcpu->run->s.regs.acrs);
3973 	restore_access_regs(vcpu->arch.host_acrs);
3974 	/* Save guest register state */
3975 	save_fpu_regs();
3976 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3977 	/* Restore will be done lazily at return */
3978 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3979 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3980 	if (MACHINE_HAS_GS) {
3981 		__ctl_set_bit(2, 4);
3982 		if (vcpu->arch.gs_enabled)
3983 			save_gs_cb(current->thread.gs_cb);
3984 		preempt_disable();
3985 		current->thread.gs_cb = vcpu->arch.host_gscb;
3986 		restore_gs_cb(vcpu->arch.host_gscb);
3987 		preempt_enable();
3988 		if (!vcpu->arch.host_gscb)
3989 			__ctl_clear_bit(2, 4);
3990 		vcpu->arch.host_gscb = NULL;
3991 	}
3992 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
3993 }
3994 
3995 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3996 {
3997 	int rc;
3998 
3999 	if (kvm_run->immediate_exit)
4000 		return -EINTR;
4001 
4002 	vcpu_load(vcpu);
4003 
4004 	if (guestdbg_exit_pending(vcpu)) {
4005 		kvm_s390_prepare_debug_exit(vcpu);
4006 		rc = 0;
4007 		goto out;
4008 	}
4009 
4010 	kvm_sigset_activate(vcpu);
4011 
4012 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4013 		kvm_s390_vcpu_start(vcpu);
4014 	} else if (is_vcpu_stopped(vcpu)) {
4015 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4016 				   vcpu->vcpu_id);
4017 		rc = -EINVAL;
4018 		goto out;
4019 	}
4020 
4021 	sync_regs(vcpu, kvm_run);
4022 	enable_cpu_timer_accounting(vcpu);
4023 
4024 	might_fault();
4025 	rc = __vcpu_run(vcpu);
4026 
4027 	if (signal_pending(current) && !rc) {
4028 		kvm_run->exit_reason = KVM_EXIT_INTR;
4029 		rc = -EINTR;
4030 	}
4031 
4032 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4033 		kvm_s390_prepare_debug_exit(vcpu);
4034 		rc = 0;
4035 	}
4036 
4037 	if (rc == -EREMOTE) {
4038 		/* userspace support is needed, kvm_run has been prepared */
4039 		rc = 0;
4040 	}
4041 
4042 	disable_cpu_timer_accounting(vcpu);
4043 	store_regs(vcpu, kvm_run);
4044 
4045 	kvm_sigset_deactivate(vcpu);
4046 
4047 	vcpu->stat.exit_userspace++;
4048 out:
4049 	vcpu_put(vcpu);
4050 	return rc;
4051 }
4052 
4053 /*
4054  * store status at address
4055  * we use have two special cases:
4056  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4057  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4058  */
4059 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4060 {
4061 	unsigned char archmode = 1;
4062 	freg_t fprs[NUM_FPRS];
4063 	unsigned int px;
4064 	u64 clkcomp, cputm;
4065 	int rc;
4066 
4067 	px = kvm_s390_get_prefix(vcpu);
4068 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4069 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4070 			return -EFAULT;
4071 		gpa = 0;
4072 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4073 		if (write_guest_real(vcpu, 163, &archmode, 1))
4074 			return -EFAULT;
4075 		gpa = px;
4076 	} else
4077 		gpa -= __LC_FPREGS_SAVE_AREA;
4078 
4079 	/* manually convert vector registers if necessary */
4080 	if (MACHINE_HAS_VX) {
4081 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4082 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4083 				     fprs, 128);
4084 	} else {
4085 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4086 				     vcpu->run->s.regs.fprs, 128);
4087 	}
4088 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4089 			      vcpu->run->s.regs.gprs, 128);
4090 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4091 			      &vcpu->arch.sie_block->gpsw, 16);
4092 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4093 			      &px, 4);
4094 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4095 			      &vcpu->run->s.regs.fpc, 4);
4096 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4097 			      &vcpu->arch.sie_block->todpr, 4);
4098 	cputm = kvm_s390_get_cpu_timer(vcpu);
4099 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4100 			      &cputm, 8);
4101 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4102 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4103 			      &clkcomp, 8);
4104 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4105 			      &vcpu->run->s.regs.acrs, 64);
4106 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4107 			      &vcpu->arch.sie_block->gcr, 128);
4108 	return rc ? -EFAULT : 0;
4109 }
4110 
4111 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4112 {
4113 	/*
4114 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4115 	 * switch in the run ioctl. Let's update our copies before we save
4116 	 * it into the save area
4117 	 */
4118 	save_fpu_regs();
4119 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4120 	save_access_regs(vcpu->run->s.regs.acrs);
4121 
4122 	return kvm_s390_store_status_unloaded(vcpu, addr);
4123 }
4124 
4125 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4126 {
4127 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4128 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4129 }
4130 
4131 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4132 {
4133 	unsigned int i;
4134 	struct kvm_vcpu *vcpu;
4135 
4136 	kvm_for_each_vcpu(i, vcpu, kvm) {
4137 		__disable_ibs_on_vcpu(vcpu);
4138 	}
4139 }
4140 
4141 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4142 {
4143 	if (!sclp.has_ibs)
4144 		return;
4145 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4146 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4147 }
4148 
4149 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4150 {
4151 	int i, online_vcpus, started_vcpus = 0;
4152 
4153 	if (!is_vcpu_stopped(vcpu))
4154 		return;
4155 
4156 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4157 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4158 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4159 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4160 
4161 	for (i = 0; i < online_vcpus; i++) {
4162 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4163 			started_vcpus++;
4164 	}
4165 
4166 	if (started_vcpus == 0) {
4167 		/* we're the only active VCPU -> speed it up */
4168 		__enable_ibs_on_vcpu(vcpu);
4169 	} else if (started_vcpus == 1) {
4170 		/*
4171 		 * As we are starting a second VCPU, we have to disable
4172 		 * the IBS facility on all VCPUs to remove potentially
4173 		 * oustanding ENABLE requests.
4174 		 */
4175 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4176 	}
4177 
4178 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4179 	/*
4180 	 * Another VCPU might have used IBS while we were offline.
4181 	 * Let's play safe and flush the VCPU at startup.
4182 	 */
4183 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4184 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4185 	return;
4186 }
4187 
4188 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4189 {
4190 	int i, online_vcpus, started_vcpus = 0;
4191 	struct kvm_vcpu *started_vcpu = NULL;
4192 
4193 	if (is_vcpu_stopped(vcpu))
4194 		return;
4195 
4196 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4197 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4198 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4199 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4200 
4201 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4202 	kvm_s390_clear_stop_irq(vcpu);
4203 
4204 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4205 	__disable_ibs_on_vcpu(vcpu);
4206 
4207 	for (i = 0; i < online_vcpus; i++) {
4208 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4209 			started_vcpus++;
4210 			started_vcpu = vcpu->kvm->vcpus[i];
4211 		}
4212 	}
4213 
4214 	if (started_vcpus == 1) {
4215 		/*
4216 		 * As we only have one VCPU left, we want to enable the
4217 		 * IBS facility for that VCPU to speed it up.
4218 		 */
4219 		__enable_ibs_on_vcpu(started_vcpu);
4220 	}
4221 
4222 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4223 	return;
4224 }
4225 
4226 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4227 				     struct kvm_enable_cap *cap)
4228 {
4229 	int r;
4230 
4231 	if (cap->flags)
4232 		return -EINVAL;
4233 
4234 	switch (cap->cap) {
4235 	case KVM_CAP_S390_CSS_SUPPORT:
4236 		if (!vcpu->kvm->arch.css_support) {
4237 			vcpu->kvm->arch.css_support = 1;
4238 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4239 			trace_kvm_s390_enable_css(vcpu->kvm);
4240 		}
4241 		r = 0;
4242 		break;
4243 	default:
4244 		r = -EINVAL;
4245 		break;
4246 	}
4247 	return r;
4248 }
4249 
4250 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4251 				  struct kvm_s390_mem_op *mop)
4252 {
4253 	void __user *uaddr = (void __user *)mop->buf;
4254 	void *tmpbuf = NULL;
4255 	int r, srcu_idx;
4256 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4257 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4258 
4259 	if (mop->flags & ~supported_flags)
4260 		return -EINVAL;
4261 
4262 	if (mop->size > MEM_OP_MAX_SIZE)
4263 		return -E2BIG;
4264 
4265 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4266 		tmpbuf = vmalloc(mop->size);
4267 		if (!tmpbuf)
4268 			return -ENOMEM;
4269 	}
4270 
4271 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4272 
4273 	switch (mop->op) {
4274 	case KVM_S390_MEMOP_LOGICAL_READ:
4275 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4276 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4277 					    mop->size, GACC_FETCH);
4278 			break;
4279 		}
4280 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4281 		if (r == 0) {
4282 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4283 				r = -EFAULT;
4284 		}
4285 		break;
4286 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4287 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4288 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4289 					    mop->size, GACC_STORE);
4290 			break;
4291 		}
4292 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4293 			r = -EFAULT;
4294 			break;
4295 		}
4296 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4297 		break;
4298 	default:
4299 		r = -EINVAL;
4300 	}
4301 
4302 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4303 
4304 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4305 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4306 
4307 	vfree(tmpbuf);
4308 	return r;
4309 }
4310 
4311 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4312 			       unsigned int ioctl, unsigned long arg)
4313 {
4314 	struct kvm_vcpu *vcpu = filp->private_data;
4315 	void __user *argp = (void __user *)arg;
4316 
4317 	switch (ioctl) {
4318 	case KVM_S390_IRQ: {
4319 		struct kvm_s390_irq s390irq;
4320 
4321 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4322 			return -EFAULT;
4323 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4324 	}
4325 	case KVM_S390_INTERRUPT: {
4326 		struct kvm_s390_interrupt s390int;
4327 		struct kvm_s390_irq s390irq;
4328 
4329 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4330 			return -EFAULT;
4331 		if (s390int_to_s390irq(&s390int, &s390irq))
4332 			return -EINVAL;
4333 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4334 	}
4335 	}
4336 	return -ENOIOCTLCMD;
4337 }
4338 
4339 long kvm_arch_vcpu_ioctl(struct file *filp,
4340 			 unsigned int ioctl, unsigned long arg)
4341 {
4342 	struct kvm_vcpu *vcpu = filp->private_data;
4343 	void __user *argp = (void __user *)arg;
4344 	int idx;
4345 	long r;
4346 
4347 	vcpu_load(vcpu);
4348 
4349 	switch (ioctl) {
4350 	case KVM_S390_STORE_STATUS:
4351 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4352 		r = kvm_s390_vcpu_store_status(vcpu, arg);
4353 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4354 		break;
4355 	case KVM_S390_SET_INITIAL_PSW: {
4356 		psw_t psw;
4357 
4358 		r = -EFAULT;
4359 		if (copy_from_user(&psw, argp, sizeof(psw)))
4360 			break;
4361 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4362 		break;
4363 	}
4364 	case KVM_S390_INITIAL_RESET:
4365 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4366 		break;
4367 	case KVM_SET_ONE_REG:
4368 	case KVM_GET_ONE_REG: {
4369 		struct kvm_one_reg reg;
4370 		r = -EFAULT;
4371 		if (copy_from_user(&reg, argp, sizeof(reg)))
4372 			break;
4373 		if (ioctl == KVM_SET_ONE_REG)
4374 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4375 		else
4376 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4377 		break;
4378 	}
4379 #ifdef CONFIG_KVM_S390_UCONTROL
4380 	case KVM_S390_UCAS_MAP: {
4381 		struct kvm_s390_ucas_mapping ucasmap;
4382 
4383 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4384 			r = -EFAULT;
4385 			break;
4386 		}
4387 
4388 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4389 			r = -EINVAL;
4390 			break;
4391 		}
4392 
4393 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4394 				     ucasmap.vcpu_addr, ucasmap.length);
4395 		break;
4396 	}
4397 	case KVM_S390_UCAS_UNMAP: {
4398 		struct kvm_s390_ucas_mapping ucasmap;
4399 
4400 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4401 			r = -EFAULT;
4402 			break;
4403 		}
4404 
4405 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4406 			r = -EINVAL;
4407 			break;
4408 		}
4409 
4410 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4411 			ucasmap.length);
4412 		break;
4413 	}
4414 #endif
4415 	case KVM_S390_VCPU_FAULT: {
4416 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4417 		break;
4418 	}
4419 	case KVM_ENABLE_CAP:
4420 	{
4421 		struct kvm_enable_cap cap;
4422 		r = -EFAULT;
4423 		if (copy_from_user(&cap, argp, sizeof(cap)))
4424 			break;
4425 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4426 		break;
4427 	}
4428 	case KVM_S390_MEM_OP: {
4429 		struct kvm_s390_mem_op mem_op;
4430 
4431 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4432 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4433 		else
4434 			r = -EFAULT;
4435 		break;
4436 	}
4437 	case KVM_S390_SET_IRQ_STATE: {
4438 		struct kvm_s390_irq_state irq_state;
4439 
4440 		r = -EFAULT;
4441 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4442 			break;
4443 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4444 		    irq_state.len == 0 ||
4445 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4446 			r = -EINVAL;
4447 			break;
4448 		}
4449 		/* do not use irq_state.flags, it will break old QEMUs */
4450 		r = kvm_s390_set_irq_state(vcpu,
4451 					   (void __user *) irq_state.buf,
4452 					   irq_state.len);
4453 		break;
4454 	}
4455 	case KVM_S390_GET_IRQ_STATE: {
4456 		struct kvm_s390_irq_state irq_state;
4457 
4458 		r = -EFAULT;
4459 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4460 			break;
4461 		if (irq_state.len == 0) {
4462 			r = -EINVAL;
4463 			break;
4464 		}
4465 		/* do not use irq_state.flags, it will break old QEMUs */
4466 		r = kvm_s390_get_irq_state(vcpu,
4467 					   (__u8 __user *)  irq_state.buf,
4468 					   irq_state.len);
4469 		break;
4470 	}
4471 	default:
4472 		r = -ENOTTY;
4473 	}
4474 
4475 	vcpu_put(vcpu);
4476 	return r;
4477 }
4478 
4479 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4480 {
4481 #ifdef CONFIG_KVM_S390_UCONTROL
4482 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4483 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4484 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4485 		get_page(vmf->page);
4486 		return 0;
4487 	}
4488 #endif
4489 	return VM_FAULT_SIGBUS;
4490 }
4491 
4492 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4493 			    unsigned long npages)
4494 {
4495 	return 0;
4496 }
4497 
4498 /* Section: memory related */
4499 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4500 				   struct kvm_memory_slot *memslot,
4501 				   const struct kvm_userspace_memory_region *mem,
4502 				   enum kvm_mr_change change)
4503 {
4504 	/* A few sanity checks. We can have memory slots which have to be
4505 	   located/ended at a segment boundary (1MB). The memory in userland is
4506 	   ok to be fragmented into various different vmas. It is okay to mmap()
4507 	   and munmap() stuff in this slot after doing this call at any time */
4508 
4509 	if (mem->userspace_addr & 0xffffful)
4510 		return -EINVAL;
4511 
4512 	if (mem->memory_size & 0xffffful)
4513 		return -EINVAL;
4514 
4515 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4516 		return -EINVAL;
4517 
4518 	return 0;
4519 }
4520 
4521 void kvm_arch_commit_memory_region(struct kvm *kvm,
4522 				const struct kvm_userspace_memory_region *mem,
4523 				const struct kvm_memory_slot *old,
4524 				const struct kvm_memory_slot *new,
4525 				enum kvm_mr_change change)
4526 {
4527 	int rc;
4528 
4529 	/* If the basics of the memslot do not change, we do not want
4530 	 * to update the gmap. Every update causes several unnecessary
4531 	 * segment translation exceptions. This is usually handled just
4532 	 * fine by the normal fault handler + gmap, but it will also
4533 	 * cause faults on the prefix page of running guest CPUs.
4534 	 */
4535 	if (old->userspace_addr == mem->userspace_addr &&
4536 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4537 	    old->npages * PAGE_SIZE == mem->memory_size)
4538 		return;
4539 
4540 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4541 		mem->guest_phys_addr, mem->memory_size);
4542 	if (rc)
4543 		pr_warn("failed to commit memory region\n");
4544 	return;
4545 }
4546 
4547 static inline unsigned long nonhyp_mask(int i)
4548 {
4549 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4550 
4551 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4552 }
4553 
4554 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4555 {
4556 	vcpu->valid_wakeup = false;
4557 }
4558 
4559 static int __init kvm_s390_init(void)
4560 {
4561 	int i;
4562 
4563 	if (!sclp.has_sief2) {
4564 		pr_info("SIE is not available\n");
4565 		return -ENODEV;
4566 	}
4567 
4568 	if (nested && hpage) {
4569 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4570 		return -EINVAL;
4571 	}
4572 
4573 	for (i = 0; i < 16; i++)
4574 		kvm_s390_fac_base[i] |=
4575 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4576 
4577 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4578 }
4579 
4580 static void __exit kvm_s390_exit(void)
4581 {
4582 	kvm_exit();
4583 }
4584 
4585 module_init(kvm_s390_init);
4586 module_exit(kvm_s390_exit);
4587 
4588 /*
4589  * Enable autoloading of the kvm module.
4590  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4591  * since x86 takes a different approach.
4592  */
4593 #include <linux/miscdevice.h>
4594 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4595 MODULE_ALIAS("devname:kvm");
4596