xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision f9834f18)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include "kvm-s390.h"
48 #include "gaccess.h"
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53 
54 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
58 
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61 
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
64 	{ "exit_null", VCPU_STAT(exit_null) },
65 	{ "exit_validity", VCPU_STAT(exit_validity) },
66 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
67 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
68 	{ "exit_io_request", VCPU_STAT(exit_io_request) },
69 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
71 	{ "exit_pei", VCPU_STAT(exit_pei) },
72 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 	{ "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
79 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
85 	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
86 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
87 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
88 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
89 	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
90 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
91 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
92 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
93 	{ "deliver_program", VCPU_STAT(deliver_program) },
94 	{ "deliver_io", VCPU_STAT(deliver_io) },
95 	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
96 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
97 	{ "inject_ckc", VCPU_STAT(inject_ckc) },
98 	{ "inject_cputm", VCPU_STAT(inject_cputm) },
99 	{ "inject_external_call", VCPU_STAT(inject_external_call) },
100 	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
101 	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
102 	{ "inject_io", VM_STAT(inject_io) },
103 	{ "inject_mchk", VCPU_STAT(inject_mchk) },
104 	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
105 	{ "inject_program", VCPU_STAT(inject_program) },
106 	{ "inject_restart", VCPU_STAT(inject_restart) },
107 	{ "inject_service_signal", VM_STAT(inject_service_signal) },
108 	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
109 	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
110 	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
111 	{ "inject_virtio", VM_STAT(inject_virtio) },
112 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
113 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
114 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
115 	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
116 	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
117 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
118 	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
119 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
120 	{ "instruction_sck", VCPU_STAT(instruction_sck) },
121 	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
122 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
123 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
124 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
125 	{ "instruction_iske", VCPU_STAT(instruction_iske) },
126 	{ "instruction_ri", VCPU_STAT(instruction_ri) },
127 	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
128 	{ "instruction_sske", VCPU_STAT(instruction_sske) },
129 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
130 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
131 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
132 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
133 	{ "instruction_tb", VCPU_STAT(instruction_tb) },
134 	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
135 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
136 	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
137 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
138 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
139 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
140 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
141 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
142 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
143 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
144 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
145 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
146 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
147 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
148 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
149 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
150 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
151 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
152 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
153 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
154 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
155 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
156 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
157 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
158 	{ "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
159 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
160 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
161 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
162 	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
163 	{ NULL }
164 };
165 
166 struct kvm_s390_tod_clock_ext {
167 	__u8 epoch_idx;
168 	__u64 tod;
169 	__u8 reserved[7];
170 } __packed;
171 
172 /* allow nested virtualization in KVM (if enabled by user space) */
173 static int nested;
174 module_param(nested, int, S_IRUGO);
175 MODULE_PARM_DESC(nested, "Nested virtualization support");
176 
177 /* allow 1m huge page guest backing, if !nested */
178 static int hpage;
179 module_param(hpage, int, 0444);
180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
181 
182 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
183 static u8 halt_poll_max_steal = 10;
184 module_param(halt_poll_max_steal, byte, 0644);
185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
186 
187 /*
188  * For now we handle at most 16 double words as this is what the s390 base
189  * kernel handles and stores in the prefix page. If we ever need to go beyond
190  * this, this requires changes to code, but the external uapi can stay.
191  */
192 #define SIZE_INTERNAL 16
193 
194 /*
195  * Base feature mask that defines default mask for facilities. Consists of the
196  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
197  */
198 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
199 /*
200  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
201  * and defines the facilities that can be enabled via a cpu model.
202  */
203 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
204 
205 static unsigned long kvm_s390_fac_size(void)
206 {
207 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
208 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
209 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
210 		sizeof(S390_lowcore.stfle_fac_list));
211 
212 	return SIZE_INTERNAL;
213 }
214 
215 /* available cpu features supported by kvm */
216 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
217 /* available subfunctions indicated via query / "test bit" */
218 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
219 
220 static struct gmap_notifier gmap_notifier;
221 static struct gmap_notifier vsie_gmap_notifier;
222 debug_info_t *kvm_s390_dbf;
223 
224 /* Section: not file related */
225 int kvm_arch_hardware_enable(void)
226 {
227 	/* every s390 is virtualization enabled ;-) */
228 	return 0;
229 }
230 
231 int kvm_arch_check_processor_compat(void)
232 {
233 	return 0;
234 }
235 
236 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
237 			      unsigned long end);
238 
239 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
240 {
241 	u8 delta_idx = 0;
242 
243 	/*
244 	 * The TOD jumps by delta, we have to compensate this by adding
245 	 * -delta to the epoch.
246 	 */
247 	delta = -delta;
248 
249 	/* sign-extension - we're adding to signed values below */
250 	if ((s64)delta < 0)
251 		delta_idx = -1;
252 
253 	scb->epoch += delta;
254 	if (scb->ecd & ECD_MEF) {
255 		scb->epdx += delta_idx;
256 		if (scb->epoch < delta)
257 			scb->epdx += 1;
258 	}
259 }
260 
261 /*
262  * This callback is executed during stop_machine(). All CPUs are therefore
263  * temporarily stopped. In order not to change guest behavior, we have to
264  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
265  * so a CPU won't be stopped while calculating with the epoch.
266  */
267 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
268 			  void *v)
269 {
270 	struct kvm *kvm;
271 	struct kvm_vcpu *vcpu;
272 	int i;
273 	unsigned long long *delta = v;
274 
275 	list_for_each_entry(kvm, &vm_list, vm_list) {
276 		kvm_for_each_vcpu(i, vcpu, kvm) {
277 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
278 			if (i == 0) {
279 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
280 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
281 			}
282 			if (vcpu->arch.cputm_enabled)
283 				vcpu->arch.cputm_start += *delta;
284 			if (vcpu->arch.vsie_block)
285 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
286 						   *delta);
287 		}
288 	}
289 	return NOTIFY_OK;
290 }
291 
292 static struct notifier_block kvm_clock_notifier = {
293 	.notifier_call = kvm_clock_sync,
294 };
295 
296 int kvm_arch_hardware_setup(void)
297 {
298 	gmap_notifier.notifier_call = kvm_gmap_notifier;
299 	gmap_register_pte_notifier(&gmap_notifier);
300 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
301 	gmap_register_pte_notifier(&vsie_gmap_notifier);
302 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
303 				       &kvm_clock_notifier);
304 	return 0;
305 }
306 
307 void kvm_arch_hardware_unsetup(void)
308 {
309 	gmap_unregister_pte_notifier(&gmap_notifier);
310 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
311 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
312 					 &kvm_clock_notifier);
313 }
314 
315 static void allow_cpu_feat(unsigned long nr)
316 {
317 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
318 }
319 
320 static inline int plo_test_bit(unsigned char nr)
321 {
322 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
323 	int cc;
324 
325 	asm volatile(
326 		/* Parameter registers are ignored for "test bit" */
327 		"	plo	0,0,0,0(0)\n"
328 		"	ipm	%0\n"
329 		"	srl	%0,28\n"
330 		: "=d" (cc)
331 		: "d" (r0)
332 		: "cc");
333 	return cc == 0;
334 }
335 
336 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
337 {
338 	register unsigned long r0 asm("0") = 0;	/* query function */
339 	register unsigned long r1 asm("1") = (unsigned long) query;
340 
341 	asm volatile(
342 		/* Parameter regs are ignored */
343 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
344 		:
345 		: "d" (r0), "a" (r1), [opc] "i" (opcode)
346 		: "cc", "memory");
347 }
348 
349 #define INSN_SORTL 0xb938
350 #define INSN_DFLTCC 0xb939
351 
352 static void kvm_s390_cpu_feat_init(void)
353 {
354 	int i;
355 
356 	for (i = 0; i < 256; ++i) {
357 		if (plo_test_bit(i))
358 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
359 	}
360 
361 	if (test_facility(28)) /* TOD-clock steering */
362 		ptff(kvm_s390_available_subfunc.ptff,
363 		     sizeof(kvm_s390_available_subfunc.ptff),
364 		     PTFF_QAF);
365 
366 	if (test_facility(17)) { /* MSA */
367 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
368 			      kvm_s390_available_subfunc.kmac);
369 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
370 			      kvm_s390_available_subfunc.kmc);
371 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
372 			      kvm_s390_available_subfunc.km);
373 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
374 			      kvm_s390_available_subfunc.kimd);
375 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
376 			      kvm_s390_available_subfunc.klmd);
377 	}
378 	if (test_facility(76)) /* MSA3 */
379 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
380 			      kvm_s390_available_subfunc.pckmo);
381 	if (test_facility(77)) { /* MSA4 */
382 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
383 			      kvm_s390_available_subfunc.kmctr);
384 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
385 			      kvm_s390_available_subfunc.kmf);
386 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
387 			      kvm_s390_available_subfunc.kmo);
388 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
389 			      kvm_s390_available_subfunc.pcc);
390 	}
391 	if (test_facility(57)) /* MSA5 */
392 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
393 			      kvm_s390_available_subfunc.ppno);
394 
395 	if (test_facility(146)) /* MSA8 */
396 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
397 			      kvm_s390_available_subfunc.kma);
398 
399 	if (test_facility(155)) /* MSA9 */
400 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
401 			      kvm_s390_available_subfunc.kdsa);
402 
403 	if (test_facility(150)) /* SORTL */
404 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
405 
406 	if (test_facility(151)) /* DFLTCC */
407 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
408 
409 	if (MACHINE_HAS_ESOP)
410 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
411 	/*
412 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
413 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
414 	 */
415 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
416 	    !test_facility(3) || !nested)
417 		return;
418 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
419 	if (sclp.has_64bscao)
420 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
421 	if (sclp.has_siif)
422 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
423 	if (sclp.has_gpere)
424 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
425 	if (sclp.has_gsls)
426 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
427 	if (sclp.has_ib)
428 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
429 	if (sclp.has_cei)
430 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
431 	if (sclp.has_ibs)
432 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
433 	if (sclp.has_kss)
434 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
435 	/*
436 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
437 	 * all skey handling functions read/set the skey from the PGSTE
438 	 * instead of the real storage key.
439 	 *
440 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
441 	 * pages being detected as preserved although they are resident.
442 	 *
443 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
444 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
445 	 *
446 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
447 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
448 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
449 	 *
450 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
451 	 * cannot easily shadow the SCA because of the ipte lock.
452 	 */
453 }
454 
455 int kvm_arch_init(void *opaque)
456 {
457 	int rc = -ENOMEM;
458 
459 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
460 	if (!kvm_s390_dbf)
461 		return -ENOMEM;
462 
463 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
464 		goto out;
465 
466 	kvm_s390_cpu_feat_init();
467 
468 	/* Register floating interrupt controller interface. */
469 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
470 	if (rc) {
471 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
472 		goto out;
473 	}
474 
475 	rc = kvm_s390_gib_init(GAL_ISC);
476 	if (rc)
477 		goto out;
478 
479 	return 0;
480 
481 out:
482 	kvm_arch_exit();
483 	return rc;
484 }
485 
486 void kvm_arch_exit(void)
487 {
488 	kvm_s390_gib_destroy();
489 	debug_unregister(kvm_s390_dbf);
490 }
491 
492 /* Section: device related */
493 long kvm_arch_dev_ioctl(struct file *filp,
494 			unsigned int ioctl, unsigned long arg)
495 {
496 	if (ioctl == KVM_S390_ENABLE_SIE)
497 		return s390_enable_sie();
498 	return -EINVAL;
499 }
500 
501 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
502 {
503 	int r;
504 
505 	switch (ext) {
506 	case KVM_CAP_S390_PSW:
507 	case KVM_CAP_S390_GMAP:
508 	case KVM_CAP_SYNC_MMU:
509 #ifdef CONFIG_KVM_S390_UCONTROL
510 	case KVM_CAP_S390_UCONTROL:
511 #endif
512 	case KVM_CAP_ASYNC_PF:
513 	case KVM_CAP_SYNC_REGS:
514 	case KVM_CAP_ONE_REG:
515 	case KVM_CAP_ENABLE_CAP:
516 	case KVM_CAP_S390_CSS_SUPPORT:
517 	case KVM_CAP_IOEVENTFD:
518 	case KVM_CAP_DEVICE_CTRL:
519 	case KVM_CAP_S390_IRQCHIP:
520 	case KVM_CAP_VM_ATTRIBUTES:
521 	case KVM_CAP_MP_STATE:
522 	case KVM_CAP_IMMEDIATE_EXIT:
523 	case KVM_CAP_S390_INJECT_IRQ:
524 	case KVM_CAP_S390_USER_SIGP:
525 	case KVM_CAP_S390_USER_STSI:
526 	case KVM_CAP_S390_SKEYS:
527 	case KVM_CAP_S390_IRQ_STATE:
528 	case KVM_CAP_S390_USER_INSTR0:
529 	case KVM_CAP_S390_CMMA_MIGRATION:
530 	case KVM_CAP_S390_AIS:
531 	case KVM_CAP_S390_AIS_MIGRATION:
532 	case KVM_CAP_S390_VCPU_RESETS:
533 		r = 1;
534 		break;
535 	case KVM_CAP_S390_HPAGE_1M:
536 		r = 0;
537 		if (hpage && !kvm_is_ucontrol(kvm))
538 			r = 1;
539 		break;
540 	case KVM_CAP_S390_MEM_OP:
541 		r = MEM_OP_MAX_SIZE;
542 		break;
543 	case KVM_CAP_NR_VCPUS:
544 	case KVM_CAP_MAX_VCPUS:
545 	case KVM_CAP_MAX_VCPU_ID:
546 		r = KVM_S390_BSCA_CPU_SLOTS;
547 		if (!kvm_s390_use_sca_entries())
548 			r = KVM_MAX_VCPUS;
549 		else if (sclp.has_esca && sclp.has_64bscao)
550 			r = KVM_S390_ESCA_CPU_SLOTS;
551 		break;
552 	case KVM_CAP_S390_COW:
553 		r = MACHINE_HAS_ESOP;
554 		break;
555 	case KVM_CAP_S390_VECTOR_REGISTERS:
556 		r = MACHINE_HAS_VX;
557 		break;
558 	case KVM_CAP_S390_RI:
559 		r = test_facility(64);
560 		break;
561 	case KVM_CAP_S390_GS:
562 		r = test_facility(133);
563 		break;
564 	case KVM_CAP_S390_BPB:
565 		r = test_facility(82);
566 		break;
567 	default:
568 		r = 0;
569 	}
570 	return r;
571 }
572 
573 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
574 				    struct kvm_memory_slot *memslot)
575 {
576 	int i;
577 	gfn_t cur_gfn, last_gfn;
578 	unsigned long gaddr, vmaddr;
579 	struct gmap *gmap = kvm->arch.gmap;
580 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
581 
582 	/* Loop over all guest segments */
583 	cur_gfn = memslot->base_gfn;
584 	last_gfn = memslot->base_gfn + memslot->npages;
585 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
586 		gaddr = gfn_to_gpa(cur_gfn);
587 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
588 		if (kvm_is_error_hva(vmaddr))
589 			continue;
590 
591 		bitmap_zero(bitmap, _PAGE_ENTRIES);
592 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
593 		for (i = 0; i < _PAGE_ENTRIES; i++) {
594 			if (test_bit(i, bitmap))
595 				mark_page_dirty(kvm, cur_gfn + i);
596 		}
597 
598 		if (fatal_signal_pending(current))
599 			return;
600 		cond_resched();
601 	}
602 }
603 
604 /* Section: vm related */
605 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
606 
607 /*
608  * Get (and clear) the dirty memory log for a memory slot.
609  */
610 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
611 			       struct kvm_dirty_log *log)
612 {
613 	int r;
614 	unsigned long n;
615 	struct kvm_memslots *slots;
616 	struct kvm_memory_slot *memslot;
617 	int is_dirty = 0;
618 
619 	if (kvm_is_ucontrol(kvm))
620 		return -EINVAL;
621 
622 	mutex_lock(&kvm->slots_lock);
623 
624 	r = -EINVAL;
625 	if (log->slot >= KVM_USER_MEM_SLOTS)
626 		goto out;
627 
628 	slots = kvm_memslots(kvm);
629 	memslot = id_to_memslot(slots, log->slot);
630 	r = -ENOENT;
631 	if (!memslot->dirty_bitmap)
632 		goto out;
633 
634 	kvm_s390_sync_dirty_log(kvm, memslot);
635 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
636 	if (r)
637 		goto out;
638 
639 	/* Clear the dirty log */
640 	if (is_dirty) {
641 		n = kvm_dirty_bitmap_bytes(memslot);
642 		memset(memslot->dirty_bitmap, 0, n);
643 	}
644 	r = 0;
645 out:
646 	mutex_unlock(&kvm->slots_lock);
647 	return r;
648 }
649 
650 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
651 {
652 	unsigned int i;
653 	struct kvm_vcpu *vcpu;
654 
655 	kvm_for_each_vcpu(i, vcpu, kvm) {
656 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
657 	}
658 }
659 
660 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
661 {
662 	int r;
663 
664 	if (cap->flags)
665 		return -EINVAL;
666 
667 	switch (cap->cap) {
668 	case KVM_CAP_S390_IRQCHIP:
669 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
670 		kvm->arch.use_irqchip = 1;
671 		r = 0;
672 		break;
673 	case KVM_CAP_S390_USER_SIGP:
674 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
675 		kvm->arch.user_sigp = 1;
676 		r = 0;
677 		break;
678 	case KVM_CAP_S390_VECTOR_REGISTERS:
679 		mutex_lock(&kvm->lock);
680 		if (kvm->created_vcpus) {
681 			r = -EBUSY;
682 		} else if (MACHINE_HAS_VX) {
683 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
684 			set_kvm_facility(kvm->arch.model.fac_list, 129);
685 			if (test_facility(134)) {
686 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
687 				set_kvm_facility(kvm->arch.model.fac_list, 134);
688 			}
689 			if (test_facility(135)) {
690 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
691 				set_kvm_facility(kvm->arch.model.fac_list, 135);
692 			}
693 			if (test_facility(148)) {
694 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
695 				set_kvm_facility(kvm->arch.model.fac_list, 148);
696 			}
697 			if (test_facility(152)) {
698 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
699 				set_kvm_facility(kvm->arch.model.fac_list, 152);
700 			}
701 			r = 0;
702 		} else
703 			r = -EINVAL;
704 		mutex_unlock(&kvm->lock);
705 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
706 			 r ? "(not available)" : "(success)");
707 		break;
708 	case KVM_CAP_S390_RI:
709 		r = -EINVAL;
710 		mutex_lock(&kvm->lock);
711 		if (kvm->created_vcpus) {
712 			r = -EBUSY;
713 		} else if (test_facility(64)) {
714 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
715 			set_kvm_facility(kvm->arch.model.fac_list, 64);
716 			r = 0;
717 		}
718 		mutex_unlock(&kvm->lock);
719 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
720 			 r ? "(not available)" : "(success)");
721 		break;
722 	case KVM_CAP_S390_AIS:
723 		mutex_lock(&kvm->lock);
724 		if (kvm->created_vcpus) {
725 			r = -EBUSY;
726 		} else {
727 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
728 			set_kvm_facility(kvm->arch.model.fac_list, 72);
729 			r = 0;
730 		}
731 		mutex_unlock(&kvm->lock);
732 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
733 			 r ? "(not available)" : "(success)");
734 		break;
735 	case KVM_CAP_S390_GS:
736 		r = -EINVAL;
737 		mutex_lock(&kvm->lock);
738 		if (kvm->created_vcpus) {
739 			r = -EBUSY;
740 		} else if (test_facility(133)) {
741 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
742 			set_kvm_facility(kvm->arch.model.fac_list, 133);
743 			r = 0;
744 		}
745 		mutex_unlock(&kvm->lock);
746 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
747 			 r ? "(not available)" : "(success)");
748 		break;
749 	case KVM_CAP_S390_HPAGE_1M:
750 		mutex_lock(&kvm->lock);
751 		if (kvm->created_vcpus)
752 			r = -EBUSY;
753 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
754 			r = -EINVAL;
755 		else {
756 			r = 0;
757 			down_write(&kvm->mm->mmap_sem);
758 			kvm->mm->context.allow_gmap_hpage_1m = 1;
759 			up_write(&kvm->mm->mmap_sem);
760 			/*
761 			 * We might have to create fake 4k page
762 			 * tables. To avoid that the hardware works on
763 			 * stale PGSTEs, we emulate these instructions.
764 			 */
765 			kvm->arch.use_skf = 0;
766 			kvm->arch.use_pfmfi = 0;
767 		}
768 		mutex_unlock(&kvm->lock);
769 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
770 			 r ? "(not available)" : "(success)");
771 		break;
772 	case KVM_CAP_S390_USER_STSI:
773 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
774 		kvm->arch.user_stsi = 1;
775 		r = 0;
776 		break;
777 	case KVM_CAP_S390_USER_INSTR0:
778 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
779 		kvm->arch.user_instr0 = 1;
780 		icpt_operexc_on_all_vcpus(kvm);
781 		r = 0;
782 		break;
783 	default:
784 		r = -EINVAL;
785 		break;
786 	}
787 	return r;
788 }
789 
790 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
791 {
792 	int ret;
793 
794 	switch (attr->attr) {
795 	case KVM_S390_VM_MEM_LIMIT_SIZE:
796 		ret = 0;
797 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
798 			 kvm->arch.mem_limit);
799 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
800 			ret = -EFAULT;
801 		break;
802 	default:
803 		ret = -ENXIO;
804 		break;
805 	}
806 	return ret;
807 }
808 
809 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
810 {
811 	int ret;
812 	unsigned int idx;
813 	switch (attr->attr) {
814 	case KVM_S390_VM_MEM_ENABLE_CMMA:
815 		ret = -ENXIO;
816 		if (!sclp.has_cmma)
817 			break;
818 
819 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
820 		mutex_lock(&kvm->lock);
821 		if (kvm->created_vcpus)
822 			ret = -EBUSY;
823 		else if (kvm->mm->context.allow_gmap_hpage_1m)
824 			ret = -EINVAL;
825 		else {
826 			kvm->arch.use_cmma = 1;
827 			/* Not compatible with cmma. */
828 			kvm->arch.use_pfmfi = 0;
829 			ret = 0;
830 		}
831 		mutex_unlock(&kvm->lock);
832 		break;
833 	case KVM_S390_VM_MEM_CLR_CMMA:
834 		ret = -ENXIO;
835 		if (!sclp.has_cmma)
836 			break;
837 		ret = -EINVAL;
838 		if (!kvm->arch.use_cmma)
839 			break;
840 
841 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
842 		mutex_lock(&kvm->lock);
843 		idx = srcu_read_lock(&kvm->srcu);
844 		s390_reset_cmma(kvm->arch.gmap->mm);
845 		srcu_read_unlock(&kvm->srcu, idx);
846 		mutex_unlock(&kvm->lock);
847 		ret = 0;
848 		break;
849 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
850 		unsigned long new_limit;
851 
852 		if (kvm_is_ucontrol(kvm))
853 			return -EINVAL;
854 
855 		if (get_user(new_limit, (u64 __user *)attr->addr))
856 			return -EFAULT;
857 
858 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
859 		    new_limit > kvm->arch.mem_limit)
860 			return -E2BIG;
861 
862 		if (!new_limit)
863 			return -EINVAL;
864 
865 		/* gmap_create takes last usable address */
866 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
867 			new_limit -= 1;
868 
869 		ret = -EBUSY;
870 		mutex_lock(&kvm->lock);
871 		if (!kvm->created_vcpus) {
872 			/* gmap_create will round the limit up */
873 			struct gmap *new = gmap_create(current->mm, new_limit);
874 
875 			if (!new) {
876 				ret = -ENOMEM;
877 			} else {
878 				gmap_remove(kvm->arch.gmap);
879 				new->private = kvm;
880 				kvm->arch.gmap = new;
881 				ret = 0;
882 			}
883 		}
884 		mutex_unlock(&kvm->lock);
885 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
886 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
887 			 (void *) kvm->arch.gmap->asce);
888 		break;
889 	}
890 	default:
891 		ret = -ENXIO;
892 		break;
893 	}
894 	return ret;
895 }
896 
897 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
898 
899 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
900 {
901 	struct kvm_vcpu *vcpu;
902 	int i;
903 
904 	kvm_s390_vcpu_block_all(kvm);
905 
906 	kvm_for_each_vcpu(i, vcpu, kvm) {
907 		kvm_s390_vcpu_crypto_setup(vcpu);
908 		/* recreate the shadow crycb by leaving the VSIE handler */
909 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
910 	}
911 
912 	kvm_s390_vcpu_unblock_all(kvm);
913 }
914 
915 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
916 {
917 	mutex_lock(&kvm->lock);
918 	switch (attr->attr) {
919 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
920 		if (!test_kvm_facility(kvm, 76)) {
921 			mutex_unlock(&kvm->lock);
922 			return -EINVAL;
923 		}
924 		get_random_bytes(
925 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
926 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
927 		kvm->arch.crypto.aes_kw = 1;
928 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
929 		break;
930 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
931 		if (!test_kvm_facility(kvm, 76)) {
932 			mutex_unlock(&kvm->lock);
933 			return -EINVAL;
934 		}
935 		get_random_bytes(
936 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
937 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
938 		kvm->arch.crypto.dea_kw = 1;
939 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
940 		break;
941 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
942 		if (!test_kvm_facility(kvm, 76)) {
943 			mutex_unlock(&kvm->lock);
944 			return -EINVAL;
945 		}
946 		kvm->arch.crypto.aes_kw = 0;
947 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
948 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
949 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
950 		break;
951 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
952 		if (!test_kvm_facility(kvm, 76)) {
953 			mutex_unlock(&kvm->lock);
954 			return -EINVAL;
955 		}
956 		kvm->arch.crypto.dea_kw = 0;
957 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
958 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
959 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
960 		break;
961 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
962 		if (!ap_instructions_available()) {
963 			mutex_unlock(&kvm->lock);
964 			return -EOPNOTSUPP;
965 		}
966 		kvm->arch.crypto.apie = 1;
967 		break;
968 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
969 		if (!ap_instructions_available()) {
970 			mutex_unlock(&kvm->lock);
971 			return -EOPNOTSUPP;
972 		}
973 		kvm->arch.crypto.apie = 0;
974 		break;
975 	default:
976 		mutex_unlock(&kvm->lock);
977 		return -ENXIO;
978 	}
979 
980 	kvm_s390_vcpu_crypto_reset_all(kvm);
981 	mutex_unlock(&kvm->lock);
982 	return 0;
983 }
984 
985 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
986 {
987 	int cx;
988 	struct kvm_vcpu *vcpu;
989 
990 	kvm_for_each_vcpu(cx, vcpu, kvm)
991 		kvm_s390_sync_request(req, vcpu);
992 }
993 
994 /*
995  * Must be called with kvm->srcu held to avoid races on memslots, and with
996  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
997  */
998 static int kvm_s390_vm_start_migration(struct kvm *kvm)
999 {
1000 	struct kvm_memory_slot *ms;
1001 	struct kvm_memslots *slots;
1002 	unsigned long ram_pages = 0;
1003 	int slotnr;
1004 
1005 	/* migration mode already enabled */
1006 	if (kvm->arch.migration_mode)
1007 		return 0;
1008 	slots = kvm_memslots(kvm);
1009 	if (!slots || !slots->used_slots)
1010 		return -EINVAL;
1011 
1012 	if (!kvm->arch.use_cmma) {
1013 		kvm->arch.migration_mode = 1;
1014 		return 0;
1015 	}
1016 	/* mark all the pages in active slots as dirty */
1017 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1018 		ms = slots->memslots + slotnr;
1019 		if (!ms->dirty_bitmap)
1020 			return -EINVAL;
1021 		/*
1022 		 * The second half of the bitmap is only used on x86,
1023 		 * and would be wasted otherwise, so we put it to good
1024 		 * use here to keep track of the state of the storage
1025 		 * attributes.
1026 		 */
1027 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1028 		ram_pages += ms->npages;
1029 	}
1030 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1031 	kvm->arch.migration_mode = 1;
1032 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1033 	return 0;
1034 }
1035 
1036 /*
1037  * Must be called with kvm->slots_lock to avoid races with ourselves and
1038  * kvm_s390_vm_start_migration.
1039  */
1040 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1041 {
1042 	/* migration mode already disabled */
1043 	if (!kvm->arch.migration_mode)
1044 		return 0;
1045 	kvm->arch.migration_mode = 0;
1046 	if (kvm->arch.use_cmma)
1047 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1048 	return 0;
1049 }
1050 
1051 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1052 				     struct kvm_device_attr *attr)
1053 {
1054 	int res = -ENXIO;
1055 
1056 	mutex_lock(&kvm->slots_lock);
1057 	switch (attr->attr) {
1058 	case KVM_S390_VM_MIGRATION_START:
1059 		res = kvm_s390_vm_start_migration(kvm);
1060 		break;
1061 	case KVM_S390_VM_MIGRATION_STOP:
1062 		res = kvm_s390_vm_stop_migration(kvm);
1063 		break;
1064 	default:
1065 		break;
1066 	}
1067 	mutex_unlock(&kvm->slots_lock);
1068 
1069 	return res;
1070 }
1071 
1072 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1073 				     struct kvm_device_attr *attr)
1074 {
1075 	u64 mig = kvm->arch.migration_mode;
1076 
1077 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1078 		return -ENXIO;
1079 
1080 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1081 		return -EFAULT;
1082 	return 0;
1083 }
1084 
1085 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1086 {
1087 	struct kvm_s390_vm_tod_clock gtod;
1088 
1089 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1090 		return -EFAULT;
1091 
1092 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1093 		return -EINVAL;
1094 	kvm_s390_set_tod_clock(kvm, &gtod);
1095 
1096 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1097 		gtod.epoch_idx, gtod.tod);
1098 
1099 	return 0;
1100 }
1101 
1102 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1103 {
1104 	u8 gtod_high;
1105 
1106 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1107 					   sizeof(gtod_high)))
1108 		return -EFAULT;
1109 
1110 	if (gtod_high != 0)
1111 		return -EINVAL;
1112 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1113 
1114 	return 0;
1115 }
1116 
1117 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1118 {
1119 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1120 
1121 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1122 			   sizeof(gtod.tod)))
1123 		return -EFAULT;
1124 
1125 	kvm_s390_set_tod_clock(kvm, &gtod);
1126 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1127 	return 0;
1128 }
1129 
1130 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1131 {
1132 	int ret;
1133 
1134 	if (attr->flags)
1135 		return -EINVAL;
1136 
1137 	switch (attr->attr) {
1138 	case KVM_S390_VM_TOD_EXT:
1139 		ret = kvm_s390_set_tod_ext(kvm, attr);
1140 		break;
1141 	case KVM_S390_VM_TOD_HIGH:
1142 		ret = kvm_s390_set_tod_high(kvm, attr);
1143 		break;
1144 	case KVM_S390_VM_TOD_LOW:
1145 		ret = kvm_s390_set_tod_low(kvm, attr);
1146 		break;
1147 	default:
1148 		ret = -ENXIO;
1149 		break;
1150 	}
1151 	return ret;
1152 }
1153 
1154 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1155 				   struct kvm_s390_vm_tod_clock *gtod)
1156 {
1157 	struct kvm_s390_tod_clock_ext htod;
1158 
1159 	preempt_disable();
1160 
1161 	get_tod_clock_ext((char *)&htod);
1162 
1163 	gtod->tod = htod.tod + kvm->arch.epoch;
1164 	gtod->epoch_idx = 0;
1165 	if (test_kvm_facility(kvm, 139)) {
1166 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1167 		if (gtod->tod < htod.tod)
1168 			gtod->epoch_idx += 1;
1169 	}
1170 
1171 	preempt_enable();
1172 }
1173 
1174 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1175 {
1176 	struct kvm_s390_vm_tod_clock gtod;
1177 
1178 	memset(&gtod, 0, sizeof(gtod));
1179 	kvm_s390_get_tod_clock(kvm, &gtod);
1180 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1181 		return -EFAULT;
1182 
1183 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1184 		gtod.epoch_idx, gtod.tod);
1185 	return 0;
1186 }
1187 
1188 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1189 {
1190 	u8 gtod_high = 0;
1191 
1192 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1193 					 sizeof(gtod_high)))
1194 		return -EFAULT;
1195 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1196 
1197 	return 0;
1198 }
1199 
1200 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1201 {
1202 	u64 gtod;
1203 
1204 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1205 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1206 		return -EFAULT;
1207 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1208 
1209 	return 0;
1210 }
1211 
1212 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1213 {
1214 	int ret;
1215 
1216 	if (attr->flags)
1217 		return -EINVAL;
1218 
1219 	switch (attr->attr) {
1220 	case KVM_S390_VM_TOD_EXT:
1221 		ret = kvm_s390_get_tod_ext(kvm, attr);
1222 		break;
1223 	case KVM_S390_VM_TOD_HIGH:
1224 		ret = kvm_s390_get_tod_high(kvm, attr);
1225 		break;
1226 	case KVM_S390_VM_TOD_LOW:
1227 		ret = kvm_s390_get_tod_low(kvm, attr);
1228 		break;
1229 	default:
1230 		ret = -ENXIO;
1231 		break;
1232 	}
1233 	return ret;
1234 }
1235 
1236 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238 	struct kvm_s390_vm_cpu_processor *proc;
1239 	u16 lowest_ibc, unblocked_ibc;
1240 	int ret = 0;
1241 
1242 	mutex_lock(&kvm->lock);
1243 	if (kvm->created_vcpus) {
1244 		ret = -EBUSY;
1245 		goto out;
1246 	}
1247 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1248 	if (!proc) {
1249 		ret = -ENOMEM;
1250 		goto out;
1251 	}
1252 	if (!copy_from_user(proc, (void __user *)attr->addr,
1253 			    sizeof(*proc))) {
1254 		kvm->arch.model.cpuid = proc->cpuid;
1255 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1256 		unblocked_ibc = sclp.ibc & 0xfff;
1257 		if (lowest_ibc && proc->ibc) {
1258 			if (proc->ibc > unblocked_ibc)
1259 				kvm->arch.model.ibc = unblocked_ibc;
1260 			else if (proc->ibc < lowest_ibc)
1261 				kvm->arch.model.ibc = lowest_ibc;
1262 			else
1263 				kvm->arch.model.ibc = proc->ibc;
1264 		}
1265 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1266 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1267 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1268 			 kvm->arch.model.ibc,
1269 			 kvm->arch.model.cpuid);
1270 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1271 			 kvm->arch.model.fac_list[0],
1272 			 kvm->arch.model.fac_list[1],
1273 			 kvm->arch.model.fac_list[2]);
1274 	} else
1275 		ret = -EFAULT;
1276 	kfree(proc);
1277 out:
1278 	mutex_unlock(&kvm->lock);
1279 	return ret;
1280 }
1281 
1282 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1283 				       struct kvm_device_attr *attr)
1284 {
1285 	struct kvm_s390_vm_cpu_feat data;
1286 
1287 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1288 		return -EFAULT;
1289 	if (!bitmap_subset((unsigned long *) data.feat,
1290 			   kvm_s390_available_cpu_feat,
1291 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1292 		return -EINVAL;
1293 
1294 	mutex_lock(&kvm->lock);
1295 	if (kvm->created_vcpus) {
1296 		mutex_unlock(&kvm->lock);
1297 		return -EBUSY;
1298 	}
1299 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1300 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1301 	mutex_unlock(&kvm->lock);
1302 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1303 			 data.feat[0],
1304 			 data.feat[1],
1305 			 data.feat[2]);
1306 	return 0;
1307 }
1308 
1309 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1310 					  struct kvm_device_attr *attr)
1311 {
1312 	mutex_lock(&kvm->lock);
1313 	if (kvm->created_vcpus) {
1314 		mutex_unlock(&kvm->lock);
1315 		return -EBUSY;
1316 	}
1317 
1318 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1319 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1320 		mutex_unlock(&kvm->lock);
1321 		return -EFAULT;
1322 	}
1323 	mutex_unlock(&kvm->lock);
1324 
1325 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1326 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1327 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1328 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1329 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1330 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1331 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1332 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1333 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1334 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1335 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1336 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1337 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1338 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1339 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1340 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1341 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1342 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1343 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1344 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1345 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1346 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1347 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1348 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1349 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1350 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1351 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1352 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1353 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1354 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1355 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1356 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1357 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1358 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1359 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1360 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1361 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1363 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1366 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1369 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1372 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1375 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1377 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1378 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1381 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1382 
1383 	return 0;
1384 }
1385 
1386 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1387 {
1388 	int ret = -ENXIO;
1389 
1390 	switch (attr->attr) {
1391 	case KVM_S390_VM_CPU_PROCESSOR:
1392 		ret = kvm_s390_set_processor(kvm, attr);
1393 		break;
1394 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1395 		ret = kvm_s390_set_processor_feat(kvm, attr);
1396 		break;
1397 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1398 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1399 		break;
1400 	}
1401 	return ret;
1402 }
1403 
1404 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1405 {
1406 	struct kvm_s390_vm_cpu_processor *proc;
1407 	int ret = 0;
1408 
1409 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1410 	if (!proc) {
1411 		ret = -ENOMEM;
1412 		goto out;
1413 	}
1414 	proc->cpuid = kvm->arch.model.cpuid;
1415 	proc->ibc = kvm->arch.model.ibc;
1416 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1417 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1418 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1419 		 kvm->arch.model.ibc,
1420 		 kvm->arch.model.cpuid);
1421 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1422 		 kvm->arch.model.fac_list[0],
1423 		 kvm->arch.model.fac_list[1],
1424 		 kvm->arch.model.fac_list[2]);
1425 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1426 		ret = -EFAULT;
1427 	kfree(proc);
1428 out:
1429 	return ret;
1430 }
1431 
1432 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1433 {
1434 	struct kvm_s390_vm_cpu_machine *mach;
1435 	int ret = 0;
1436 
1437 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1438 	if (!mach) {
1439 		ret = -ENOMEM;
1440 		goto out;
1441 	}
1442 	get_cpu_id((struct cpuid *) &mach->cpuid);
1443 	mach->ibc = sclp.ibc;
1444 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1445 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1446 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1447 	       sizeof(S390_lowcore.stfle_fac_list));
1448 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1449 		 kvm->arch.model.ibc,
1450 		 kvm->arch.model.cpuid);
1451 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1452 		 mach->fac_mask[0],
1453 		 mach->fac_mask[1],
1454 		 mach->fac_mask[2]);
1455 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1456 		 mach->fac_list[0],
1457 		 mach->fac_list[1],
1458 		 mach->fac_list[2]);
1459 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1460 		ret = -EFAULT;
1461 	kfree(mach);
1462 out:
1463 	return ret;
1464 }
1465 
1466 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1467 				       struct kvm_device_attr *attr)
1468 {
1469 	struct kvm_s390_vm_cpu_feat data;
1470 
1471 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1472 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1473 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1474 		return -EFAULT;
1475 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1476 			 data.feat[0],
1477 			 data.feat[1],
1478 			 data.feat[2]);
1479 	return 0;
1480 }
1481 
1482 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1483 				     struct kvm_device_attr *attr)
1484 {
1485 	struct kvm_s390_vm_cpu_feat data;
1486 
1487 	bitmap_copy((unsigned long *) data.feat,
1488 		    kvm_s390_available_cpu_feat,
1489 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1490 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1491 		return -EFAULT;
1492 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1493 			 data.feat[0],
1494 			 data.feat[1],
1495 			 data.feat[2]);
1496 	return 0;
1497 }
1498 
1499 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1500 					  struct kvm_device_attr *attr)
1501 {
1502 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1503 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1504 		return -EFAULT;
1505 
1506 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1507 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1508 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1509 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1510 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1511 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1512 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1513 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1514 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1515 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1516 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1517 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1518 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1519 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1520 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1521 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1522 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1523 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1524 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1525 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1526 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1527 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1528 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1529 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1530 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1531 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1532 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1533 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1534 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1535 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1536 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1537 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1538 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1539 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1540 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1541 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1542 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1544 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1547 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1550 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1553 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1556 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1558 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1559 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1562 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1563 
1564 	return 0;
1565 }
1566 
1567 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1568 					struct kvm_device_attr *attr)
1569 {
1570 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1571 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1572 		return -EFAULT;
1573 
1574 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1575 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1576 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1577 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1578 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1579 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1580 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1581 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1582 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1583 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1584 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1585 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1586 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1587 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1588 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1589 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1590 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1591 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1592 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1593 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1594 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1595 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1596 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1597 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1598 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1599 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1600 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1601 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1602 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1603 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1604 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1605 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1606 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1607 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1608 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1609 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1610 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1612 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1615 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1618 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1621 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1624 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1626 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1627 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1630 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1631 
1632 	return 0;
1633 }
1634 
1635 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1636 {
1637 	int ret = -ENXIO;
1638 
1639 	switch (attr->attr) {
1640 	case KVM_S390_VM_CPU_PROCESSOR:
1641 		ret = kvm_s390_get_processor(kvm, attr);
1642 		break;
1643 	case KVM_S390_VM_CPU_MACHINE:
1644 		ret = kvm_s390_get_machine(kvm, attr);
1645 		break;
1646 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1647 		ret = kvm_s390_get_processor_feat(kvm, attr);
1648 		break;
1649 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1650 		ret = kvm_s390_get_machine_feat(kvm, attr);
1651 		break;
1652 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1653 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1654 		break;
1655 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1656 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1657 		break;
1658 	}
1659 	return ret;
1660 }
1661 
1662 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1663 {
1664 	int ret;
1665 
1666 	switch (attr->group) {
1667 	case KVM_S390_VM_MEM_CTRL:
1668 		ret = kvm_s390_set_mem_control(kvm, attr);
1669 		break;
1670 	case KVM_S390_VM_TOD:
1671 		ret = kvm_s390_set_tod(kvm, attr);
1672 		break;
1673 	case KVM_S390_VM_CPU_MODEL:
1674 		ret = kvm_s390_set_cpu_model(kvm, attr);
1675 		break;
1676 	case KVM_S390_VM_CRYPTO:
1677 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1678 		break;
1679 	case KVM_S390_VM_MIGRATION:
1680 		ret = kvm_s390_vm_set_migration(kvm, attr);
1681 		break;
1682 	default:
1683 		ret = -ENXIO;
1684 		break;
1685 	}
1686 
1687 	return ret;
1688 }
1689 
1690 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1691 {
1692 	int ret;
1693 
1694 	switch (attr->group) {
1695 	case KVM_S390_VM_MEM_CTRL:
1696 		ret = kvm_s390_get_mem_control(kvm, attr);
1697 		break;
1698 	case KVM_S390_VM_TOD:
1699 		ret = kvm_s390_get_tod(kvm, attr);
1700 		break;
1701 	case KVM_S390_VM_CPU_MODEL:
1702 		ret = kvm_s390_get_cpu_model(kvm, attr);
1703 		break;
1704 	case KVM_S390_VM_MIGRATION:
1705 		ret = kvm_s390_vm_get_migration(kvm, attr);
1706 		break;
1707 	default:
1708 		ret = -ENXIO;
1709 		break;
1710 	}
1711 
1712 	return ret;
1713 }
1714 
1715 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1716 {
1717 	int ret;
1718 
1719 	switch (attr->group) {
1720 	case KVM_S390_VM_MEM_CTRL:
1721 		switch (attr->attr) {
1722 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1723 		case KVM_S390_VM_MEM_CLR_CMMA:
1724 			ret = sclp.has_cmma ? 0 : -ENXIO;
1725 			break;
1726 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1727 			ret = 0;
1728 			break;
1729 		default:
1730 			ret = -ENXIO;
1731 			break;
1732 		}
1733 		break;
1734 	case KVM_S390_VM_TOD:
1735 		switch (attr->attr) {
1736 		case KVM_S390_VM_TOD_LOW:
1737 		case KVM_S390_VM_TOD_HIGH:
1738 			ret = 0;
1739 			break;
1740 		default:
1741 			ret = -ENXIO;
1742 			break;
1743 		}
1744 		break;
1745 	case KVM_S390_VM_CPU_MODEL:
1746 		switch (attr->attr) {
1747 		case KVM_S390_VM_CPU_PROCESSOR:
1748 		case KVM_S390_VM_CPU_MACHINE:
1749 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1750 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1751 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1752 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1753 			ret = 0;
1754 			break;
1755 		default:
1756 			ret = -ENXIO;
1757 			break;
1758 		}
1759 		break;
1760 	case KVM_S390_VM_CRYPTO:
1761 		switch (attr->attr) {
1762 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1763 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1764 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1765 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1766 			ret = 0;
1767 			break;
1768 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1769 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1770 			ret = ap_instructions_available() ? 0 : -ENXIO;
1771 			break;
1772 		default:
1773 			ret = -ENXIO;
1774 			break;
1775 		}
1776 		break;
1777 	case KVM_S390_VM_MIGRATION:
1778 		ret = 0;
1779 		break;
1780 	default:
1781 		ret = -ENXIO;
1782 		break;
1783 	}
1784 
1785 	return ret;
1786 }
1787 
1788 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1789 {
1790 	uint8_t *keys;
1791 	uint64_t hva;
1792 	int srcu_idx, i, r = 0;
1793 
1794 	if (args->flags != 0)
1795 		return -EINVAL;
1796 
1797 	/* Is this guest using storage keys? */
1798 	if (!mm_uses_skeys(current->mm))
1799 		return KVM_S390_GET_SKEYS_NONE;
1800 
1801 	/* Enforce sane limit on memory allocation */
1802 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1803 		return -EINVAL;
1804 
1805 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1806 	if (!keys)
1807 		return -ENOMEM;
1808 
1809 	down_read(&current->mm->mmap_sem);
1810 	srcu_idx = srcu_read_lock(&kvm->srcu);
1811 	for (i = 0; i < args->count; i++) {
1812 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1813 		if (kvm_is_error_hva(hva)) {
1814 			r = -EFAULT;
1815 			break;
1816 		}
1817 
1818 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1819 		if (r)
1820 			break;
1821 	}
1822 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1823 	up_read(&current->mm->mmap_sem);
1824 
1825 	if (!r) {
1826 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1827 				 sizeof(uint8_t) * args->count);
1828 		if (r)
1829 			r = -EFAULT;
1830 	}
1831 
1832 	kvfree(keys);
1833 	return r;
1834 }
1835 
1836 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1837 {
1838 	uint8_t *keys;
1839 	uint64_t hva;
1840 	int srcu_idx, i, r = 0;
1841 	bool unlocked;
1842 
1843 	if (args->flags != 0)
1844 		return -EINVAL;
1845 
1846 	/* Enforce sane limit on memory allocation */
1847 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1848 		return -EINVAL;
1849 
1850 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1851 	if (!keys)
1852 		return -ENOMEM;
1853 
1854 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1855 			   sizeof(uint8_t) * args->count);
1856 	if (r) {
1857 		r = -EFAULT;
1858 		goto out;
1859 	}
1860 
1861 	/* Enable storage key handling for the guest */
1862 	r = s390_enable_skey();
1863 	if (r)
1864 		goto out;
1865 
1866 	i = 0;
1867 	down_read(&current->mm->mmap_sem);
1868 	srcu_idx = srcu_read_lock(&kvm->srcu);
1869         while (i < args->count) {
1870 		unlocked = false;
1871 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1872 		if (kvm_is_error_hva(hva)) {
1873 			r = -EFAULT;
1874 			break;
1875 		}
1876 
1877 		/* Lowest order bit is reserved */
1878 		if (keys[i] & 0x01) {
1879 			r = -EINVAL;
1880 			break;
1881 		}
1882 
1883 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1884 		if (r) {
1885 			r = fixup_user_fault(current, current->mm, hva,
1886 					     FAULT_FLAG_WRITE, &unlocked);
1887 			if (r)
1888 				break;
1889 		}
1890 		if (!r)
1891 			i++;
1892 	}
1893 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1894 	up_read(&current->mm->mmap_sem);
1895 out:
1896 	kvfree(keys);
1897 	return r;
1898 }
1899 
1900 /*
1901  * Base address and length must be sent at the start of each block, therefore
1902  * it's cheaper to send some clean data, as long as it's less than the size of
1903  * two longs.
1904  */
1905 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1906 /* for consistency */
1907 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1908 
1909 /*
1910  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1911  * address falls in a hole. In that case the index of one of the memslots
1912  * bordering the hole is returned.
1913  */
1914 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1915 {
1916 	int start = 0, end = slots->used_slots;
1917 	int slot = atomic_read(&slots->lru_slot);
1918 	struct kvm_memory_slot *memslots = slots->memslots;
1919 
1920 	if (gfn >= memslots[slot].base_gfn &&
1921 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1922 		return slot;
1923 
1924 	while (start < end) {
1925 		slot = start + (end - start) / 2;
1926 
1927 		if (gfn >= memslots[slot].base_gfn)
1928 			end = slot;
1929 		else
1930 			start = slot + 1;
1931 	}
1932 
1933 	if (gfn >= memslots[start].base_gfn &&
1934 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1935 		atomic_set(&slots->lru_slot, start);
1936 	}
1937 
1938 	return start;
1939 }
1940 
1941 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1942 			      u8 *res, unsigned long bufsize)
1943 {
1944 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1945 
1946 	args->count = 0;
1947 	while (args->count < bufsize) {
1948 		hva = gfn_to_hva(kvm, cur_gfn);
1949 		/*
1950 		 * We return an error if the first value was invalid, but we
1951 		 * return successfully if at least one value was copied.
1952 		 */
1953 		if (kvm_is_error_hva(hva))
1954 			return args->count ? 0 : -EFAULT;
1955 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1956 			pgstev = 0;
1957 		res[args->count++] = (pgstev >> 24) & 0x43;
1958 		cur_gfn++;
1959 	}
1960 
1961 	return 0;
1962 }
1963 
1964 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1965 					      unsigned long cur_gfn)
1966 {
1967 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1968 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1969 	unsigned long ofs = cur_gfn - ms->base_gfn;
1970 
1971 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1972 		slotidx--;
1973 		/* If we are above the highest slot, wrap around */
1974 		if (slotidx < 0)
1975 			slotidx = slots->used_slots - 1;
1976 
1977 		ms = slots->memslots + slotidx;
1978 		ofs = 0;
1979 	}
1980 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1981 	while ((slotidx > 0) && (ofs >= ms->npages)) {
1982 		slotidx--;
1983 		ms = slots->memslots + slotidx;
1984 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1985 	}
1986 	return ms->base_gfn + ofs;
1987 }
1988 
1989 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1990 			     u8 *res, unsigned long bufsize)
1991 {
1992 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1993 	struct kvm_memslots *slots = kvm_memslots(kvm);
1994 	struct kvm_memory_slot *ms;
1995 
1996 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1997 	ms = gfn_to_memslot(kvm, cur_gfn);
1998 	args->count = 0;
1999 	args->start_gfn = cur_gfn;
2000 	if (!ms)
2001 		return 0;
2002 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2003 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2004 
2005 	while (args->count < bufsize) {
2006 		hva = gfn_to_hva(kvm, cur_gfn);
2007 		if (kvm_is_error_hva(hva))
2008 			return 0;
2009 		/* Decrement only if we actually flipped the bit to 0 */
2010 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2011 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2012 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2013 			pgstev = 0;
2014 		/* Save the value */
2015 		res[args->count++] = (pgstev >> 24) & 0x43;
2016 		/* If the next bit is too far away, stop. */
2017 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2018 			return 0;
2019 		/* If we reached the previous "next", find the next one */
2020 		if (cur_gfn == next_gfn)
2021 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2022 		/* Reached the end of memory or of the buffer, stop */
2023 		if ((next_gfn >= mem_end) ||
2024 		    (next_gfn - args->start_gfn >= bufsize))
2025 			return 0;
2026 		cur_gfn++;
2027 		/* Reached the end of the current memslot, take the next one. */
2028 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2029 			ms = gfn_to_memslot(kvm, cur_gfn);
2030 			if (!ms)
2031 				return 0;
2032 		}
2033 	}
2034 	return 0;
2035 }
2036 
2037 /*
2038  * This function searches for the next page with dirty CMMA attributes, and
2039  * saves the attributes in the buffer up to either the end of the buffer or
2040  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2041  * no trailing clean bytes are saved.
2042  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2043  * output buffer will indicate 0 as length.
2044  */
2045 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2046 				  struct kvm_s390_cmma_log *args)
2047 {
2048 	unsigned long bufsize;
2049 	int srcu_idx, peek, ret;
2050 	u8 *values;
2051 
2052 	if (!kvm->arch.use_cmma)
2053 		return -ENXIO;
2054 	/* Invalid/unsupported flags were specified */
2055 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2056 		return -EINVAL;
2057 	/* Migration mode query, and we are not doing a migration */
2058 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2059 	if (!peek && !kvm->arch.migration_mode)
2060 		return -EINVAL;
2061 	/* CMMA is disabled or was not used, or the buffer has length zero */
2062 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2063 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2064 		memset(args, 0, sizeof(*args));
2065 		return 0;
2066 	}
2067 	/* We are not peeking, and there are no dirty pages */
2068 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2069 		memset(args, 0, sizeof(*args));
2070 		return 0;
2071 	}
2072 
2073 	values = vmalloc(bufsize);
2074 	if (!values)
2075 		return -ENOMEM;
2076 
2077 	down_read(&kvm->mm->mmap_sem);
2078 	srcu_idx = srcu_read_lock(&kvm->srcu);
2079 	if (peek)
2080 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2081 	else
2082 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2083 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2084 	up_read(&kvm->mm->mmap_sem);
2085 
2086 	if (kvm->arch.migration_mode)
2087 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2088 	else
2089 		args->remaining = 0;
2090 
2091 	if (copy_to_user((void __user *)args->values, values, args->count))
2092 		ret = -EFAULT;
2093 
2094 	vfree(values);
2095 	return ret;
2096 }
2097 
2098 /*
2099  * This function sets the CMMA attributes for the given pages. If the input
2100  * buffer has zero length, no action is taken, otherwise the attributes are
2101  * set and the mm->context.uses_cmm flag is set.
2102  */
2103 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2104 				  const struct kvm_s390_cmma_log *args)
2105 {
2106 	unsigned long hva, mask, pgstev, i;
2107 	uint8_t *bits;
2108 	int srcu_idx, r = 0;
2109 
2110 	mask = args->mask;
2111 
2112 	if (!kvm->arch.use_cmma)
2113 		return -ENXIO;
2114 	/* invalid/unsupported flags */
2115 	if (args->flags != 0)
2116 		return -EINVAL;
2117 	/* Enforce sane limit on memory allocation */
2118 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2119 		return -EINVAL;
2120 	/* Nothing to do */
2121 	if (args->count == 0)
2122 		return 0;
2123 
2124 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2125 	if (!bits)
2126 		return -ENOMEM;
2127 
2128 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2129 	if (r) {
2130 		r = -EFAULT;
2131 		goto out;
2132 	}
2133 
2134 	down_read(&kvm->mm->mmap_sem);
2135 	srcu_idx = srcu_read_lock(&kvm->srcu);
2136 	for (i = 0; i < args->count; i++) {
2137 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2138 		if (kvm_is_error_hva(hva)) {
2139 			r = -EFAULT;
2140 			break;
2141 		}
2142 
2143 		pgstev = bits[i];
2144 		pgstev = pgstev << 24;
2145 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2146 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2147 	}
2148 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2149 	up_read(&kvm->mm->mmap_sem);
2150 
2151 	if (!kvm->mm->context.uses_cmm) {
2152 		down_write(&kvm->mm->mmap_sem);
2153 		kvm->mm->context.uses_cmm = 1;
2154 		up_write(&kvm->mm->mmap_sem);
2155 	}
2156 out:
2157 	vfree(bits);
2158 	return r;
2159 }
2160 
2161 long kvm_arch_vm_ioctl(struct file *filp,
2162 		       unsigned int ioctl, unsigned long arg)
2163 {
2164 	struct kvm *kvm = filp->private_data;
2165 	void __user *argp = (void __user *)arg;
2166 	struct kvm_device_attr attr;
2167 	int r;
2168 
2169 	switch (ioctl) {
2170 	case KVM_S390_INTERRUPT: {
2171 		struct kvm_s390_interrupt s390int;
2172 
2173 		r = -EFAULT;
2174 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2175 			break;
2176 		r = kvm_s390_inject_vm(kvm, &s390int);
2177 		break;
2178 	}
2179 	case KVM_CREATE_IRQCHIP: {
2180 		struct kvm_irq_routing_entry routing;
2181 
2182 		r = -EINVAL;
2183 		if (kvm->arch.use_irqchip) {
2184 			/* Set up dummy routing. */
2185 			memset(&routing, 0, sizeof(routing));
2186 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2187 		}
2188 		break;
2189 	}
2190 	case KVM_SET_DEVICE_ATTR: {
2191 		r = -EFAULT;
2192 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2193 			break;
2194 		r = kvm_s390_vm_set_attr(kvm, &attr);
2195 		break;
2196 	}
2197 	case KVM_GET_DEVICE_ATTR: {
2198 		r = -EFAULT;
2199 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2200 			break;
2201 		r = kvm_s390_vm_get_attr(kvm, &attr);
2202 		break;
2203 	}
2204 	case KVM_HAS_DEVICE_ATTR: {
2205 		r = -EFAULT;
2206 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2207 			break;
2208 		r = kvm_s390_vm_has_attr(kvm, &attr);
2209 		break;
2210 	}
2211 	case KVM_S390_GET_SKEYS: {
2212 		struct kvm_s390_skeys args;
2213 
2214 		r = -EFAULT;
2215 		if (copy_from_user(&args, argp,
2216 				   sizeof(struct kvm_s390_skeys)))
2217 			break;
2218 		r = kvm_s390_get_skeys(kvm, &args);
2219 		break;
2220 	}
2221 	case KVM_S390_SET_SKEYS: {
2222 		struct kvm_s390_skeys args;
2223 
2224 		r = -EFAULT;
2225 		if (copy_from_user(&args, argp,
2226 				   sizeof(struct kvm_s390_skeys)))
2227 			break;
2228 		r = kvm_s390_set_skeys(kvm, &args);
2229 		break;
2230 	}
2231 	case KVM_S390_GET_CMMA_BITS: {
2232 		struct kvm_s390_cmma_log args;
2233 
2234 		r = -EFAULT;
2235 		if (copy_from_user(&args, argp, sizeof(args)))
2236 			break;
2237 		mutex_lock(&kvm->slots_lock);
2238 		r = kvm_s390_get_cmma_bits(kvm, &args);
2239 		mutex_unlock(&kvm->slots_lock);
2240 		if (!r) {
2241 			r = copy_to_user(argp, &args, sizeof(args));
2242 			if (r)
2243 				r = -EFAULT;
2244 		}
2245 		break;
2246 	}
2247 	case KVM_S390_SET_CMMA_BITS: {
2248 		struct kvm_s390_cmma_log args;
2249 
2250 		r = -EFAULT;
2251 		if (copy_from_user(&args, argp, sizeof(args)))
2252 			break;
2253 		mutex_lock(&kvm->slots_lock);
2254 		r = kvm_s390_set_cmma_bits(kvm, &args);
2255 		mutex_unlock(&kvm->slots_lock);
2256 		break;
2257 	}
2258 	default:
2259 		r = -ENOTTY;
2260 	}
2261 
2262 	return r;
2263 }
2264 
2265 static int kvm_s390_apxa_installed(void)
2266 {
2267 	struct ap_config_info info;
2268 
2269 	if (ap_instructions_available()) {
2270 		if (ap_qci(&info) == 0)
2271 			return info.apxa;
2272 	}
2273 
2274 	return 0;
2275 }
2276 
2277 /*
2278  * The format of the crypto control block (CRYCB) is specified in the 3 low
2279  * order bits of the CRYCB designation (CRYCBD) field as follows:
2280  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2281  *	     AP extended addressing (APXA) facility are installed.
2282  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2283  * Format 2: Both the APXA and MSAX3 facilities are installed
2284  */
2285 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2286 {
2287 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2288 
2289 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2290 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2291 
2292 	/* Check whether MSAX3 is installed */
2293 	if (!test_kvm_facility(kvm, 76))
2294 		return;
2295 
2296 	if (kvm_s390_apxa_installed())
2297 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2298 	else
2299 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2300 }
2301 
2302 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2303 			       unsigned long *aqm, unsigned long *adm)
2304 {
2305 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2306 
2307 	mutex_lock(&kvm->lock);
2308 	kvm_s390_vcpu_block_all(kvm);
2309 
2310 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2311 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2312 		memcpy(crycb->apcb1.apm, apm, 32);
2313 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2314 			 apm[0], apm[1], apm[2], apm[3]);
2315 		memcpy(crycb->apcb1.aqm, aqm, 32);
2316 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2317 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2318 		memcpy(crycb->apcb1.adm, adm, 32);
2319 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2320 			 adm[0], adm[1], adm[2], adm[3]);
2321 		break;
2322 	case CRYCB_FORMAT1:
2323 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2324 		memcpy(crycb->apcb0.apm, apm, 8);
2325 		memcpy(crycb->apcb0.aqm, aqm, 2);
2326 		memcpy(crycb->apcb0.adm, adm, 2);
2327 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2328 			 apm[0], *((unsigned short *)aqm),
2329 			 *((unsigned short *)adm));
2330 		break;
2331 	default:	/* Can not happen */
2332 		break;
2333 	}
2334 
2335 	/* recreate the shadow crycb for each vcpu */
2336 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2337 	kvm_s390_vcpu_unblock_all(kvm);
2338 	mutex_unlock(&kvm->lock);
2339 }
2340 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2341 
2342 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2343 {
2344 	mutex_lock(&kvm->lock);
2345 	kvm_s390_vcpu_block_all(kvm);
2346 
2347 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2348 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2349 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2350 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2351 
2352 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2353 	/* recreate the shadow crycb for each vcpu */
2354 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2355 	kvm_s390_vcpu_unblock_all(kvm);
2356 	mutex_unlock(&kvm->lock);
2357 }
2358 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2359 
2360 static u64 kvm_s390_get_initial_cpuid(void)
2361 {
2362 	struct cpuid cpuid;
2363 
2364 	get_cpu_id(&cpuid);
2365 	cpuid.version = 0xff;
2366 	return *((u64 *) &cpuid);
2367 }
2368 
2369 static void kvm_s390_crypto_init(struct kvm *kvm)
2370 {
2371 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2372 	kvm_s390_set_crycb_format(kvm);
2373 
2374 	if (!test_kvm_facility(kvm, 76))
2375 		return;
2376 
2377 	/* Enable AES/DEA protected key functions by default */
2378 	kvm->arch.crypto.aes_kw = 1;
2379 	kvm->arch.crypto.dea_kw = 1;
2380 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2381 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2382 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2383 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2384 }
2385 
2386 static void sca_dispose(struct kvm *kvm)
2387 {
2388 	if (kvm->arch.use_esca)
2389 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2390 	else
2391 		free_page((unsigned long)(kvm->arch.sca));
2392 	kvm->arch.sca = NULL;
2393 }
2394 
2395 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2396 {
2397 	gfp_t alloc_flags = GFP_KERNEL;
2398 	int i, rc;
2399 	char debug_name[16];
2400 	static unsigned long sca_offset;
2401 
2402 	rc = -EINVAL;
2403 #ifdef CONFIG_KVM_S390_UCONTROL
2404 	if (type & ~KVM_VM_S390_UCONTROL)
2405 		goto out_err;
2406 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2407 		goto out_err;
2408 #else
2409 	if (type)
2410 		goto out_err;
2411 #endif
2412 
2413 	rc = s390_enable_sie();
2414 	if (rc)
2415 		goto out_err;
2416 
2417 	rc = -ENOMEM;
2418 
2419 	if (!sclp.has_64bscao)
2420 		alloc_flags |= GFP_DMA;
2421 	rwlock_init(&kvm->arch.sca_lock);
2422 	/* start with basic SCA */
2423 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2424 	if (!kvm->arch.sca)
2425 		goto out_err;
2426 	mutex_lock(&kvm_lock);
2427 	sca_offset += 16;
2428 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2429 		sca_offset = 0;
2430 	kvm->arch.sca = (struct bsca_block *)
2431 			((char *) kvm->arch.sca + sca_offset);
2432 	mutex_unlock(&kvm_lock);
2433 
2434 	sprintf(debug_name, "kvm-%u", current->pid);
2435 
2436 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2437 	if (!kvm->arch.dbf)
2438 		goto out_err;
2439 
2440 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2441 	kvm->arch.sie_page2 =
2442 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2443 	if (!kvm->arch.sie_page2)
2444 		goto out_err;
2445 
2446 	kvm->arch.sie_page2->kvm = kvm;
2447 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2448 
2449 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2450 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2451 					      (kvm_s390_fac_base[i] |
2452 					       kvm_s390_fac_ext[i]);
2453 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2454 					      kvm_s390_fac_base[i];
2455 	}
2456 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2457 
2458 	/* we are always in czam mode - even on pre z14 machines */
2459 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2460 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2461 	/* we emulate STHYI in kvm */
2462 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2463 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2464 	if (MACHINE_HAS_TLB_GUEST) {
2465 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2466 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2467 	}
2468 
2469 	if (css_general_characteristics.aiv && test_facility(65))
2470 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2471 
2472 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2473 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2474 
2475 	kvm_s390_crypto_init(kvm);
2476 
2477 	mutex_init(&kvm->arch.float_int.ais_lock);
2478 	spin_lock_init(&kvm->arch.float_int.lock);
2479 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2480 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2481 	init_waitqueue_head(&kvm->arch.ipte_wq);
2482 	mutex_init(&kvm->arch.ipte_mutex);
2483 
2484 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2485 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2486 
2487 	if (type & KVM_VM_S390_UCONTROL) {
2488 		kvm->arch.gmap = NULL;
2489 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2490 	} else {
2491 		if (sclp.hamax == U64_MAX)
2492 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2493 		else
2494 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2495 						    sclp.hamax + 1);
2496 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2497 		if (!kvm->arch.gmap)
2498 			goto out_err;
2499 		kvm->arch.gmap->private = kvm;
2500 		kvm->arch.gmap->pfault_enabled = 0;
2501 	}
2502 
2503 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2504 	kvm->arch.use_skf = sclp.has_skey;
2505 	spin_lock_init(&kvm->arch.start_stop_lock);
2506 	kvm_s390_vsie_init(kvm);
2507 	kvm_s390_gisa_init(kvm);
2508 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2509 
2510 	return 0;
2511 out_err:
2512 	free_page((unsigned long)kvm->arch.sie_page2);
2513 	debug_unregister(kvm->arch.dbf);
2514 	sca_dispose(kvm);
2515 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2516 	return rc;
2517 }
2518 
2519 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2520 {
2521 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2522 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2523 	kvm_s390_clear_local_irqs(vcpu);
2524 	kvm_clear_async_pf_completion_queue(vcpu);
2525 	if (!kvm_is_ucontrol(vcpu->kvm))
2526 		sca_del_vcpu(vcpu);
2527 
2528 	if (kvm_is_ucontrol(vcpu->kvm))
2529 		gmap_remove(vcpu->arch.gmap);
2530 
2531 	if (vcpu->kvm->arch.use_cmma)
2532 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2533 	free_page((unsigned long)(vcpu->arch.sie_block));
2534 }
2535 
2536 static void kvm_free_vcpus(struct kvm *kvm)
2537 {
2538 	unsigned int i;
2539 	struct kvm_vcpu *vcpu;
2540 
2541 	kvm_for_each_vcpu(i, vcpu, kvm)
2542 		kvm_vcpu_destroy(vcpu);
2543 
2544 	mutex_lock(&kvm->lock);
2545 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2546 		kvm->vcpus[i] = NULL;
2547 
2548 	atomic_set(&kvm->online_vcpus, 0);
2549 	mutex_unlock(&kvm->lock);
2550 }
2551 
2552 void kvm_arch_destroy_vm(struct kvm *kvm)
2553 {
2554 	kvm_free_vcpus(kvm);
2555 	sca_dispose(kvm);
2556 	debug_unregister(kvm->arch.dbf);
2557 	kvm_s390_gisa_destroy(kvm);
2558 	free_page((unsigned long)kvm->arch.sie_page2);
2559 	if (!kvm_is_ucontrol(kvm))
2560 		gmap_remove(kvm->arch.gmap);
2561 	kvm_s390_destroy_adapters(kvm);
2562 	kvm_s390_clear_float_irqs(kvm);
2563 	kvm_s390_vsie_destroy(kvm);
2564 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2565 }
2566 
2567 /* Section: vcpu related */
2568 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2569 {
2570 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2571 	if (!vcpu->arch.gmap)
2572 		return -ENOMEM;
2573 	vcpu->arch.gmap->private = vcpu->kvm;
2574 
2575 	return 0;
2576 }
2577 
2578 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2579 {
2580 	if (!kvm_s390_use_sca_entries())
2581 		return;
2582 	read_lock(&vcpu->kvm->arch.sca_lock);
2583 	if (vcpu->kvm->arch.use_esca) {
2584 		struct esca_block *sca = vcpu->kvm->arch.sca;
2585 
2586 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2587 		sca->cpu[vcpu->vcpu_id].sda = 0;
2588 	} else {
2589 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2590 
2591 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2592 		sca->cpu[vcpu->vcpu_id].sda = 0;
2593 	}
2594 	read_unlock(&vcpu->kvm->arch.sca_lock);
2595 }
2596 
2597 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2598 {
2599 	if (!kvm_s390_use_sca_entries()) {
2600 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2601 
2602 		/* we still need the basic sca for the ipte control */
2603 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2604 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2605 		return;
2606 	}
2607 	read_lock(&vcpu->kvm->arch.sca_lock);
2608 	if (vcpu->kvm->arch.use_esca) {
2609 		struct esca_block *sca = vcpu->kvm->arch.sca;
2610 
2611 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2612 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2613 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2614 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2615 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2616 	} else {
2617 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2618 
2619 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2620 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2621 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2622 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2623 	}
2624 	read_unlock(&vcpu->kvm->arch.sca_lock);
2625 }
2626 
2627 /* Basic SCA to Extended SCA data copy routines */
2628 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2629 {
2630 	d->sda = s->sda;
2631 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2632 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2633 }
2634 
2635 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2636 {
2637 	int i;
2638 
2639 	d->ipte_control = s->ipte_control;
2640 	d->mcn[0] = s->mcn;
2641 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2642 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2643 }
2644 
2645 static int sca_switch_to_extended(struct kvm *kvm)
2646 {
2647 	struct bsca_block *old_sca = kvm->arch.sca;
2648 	struct esca_block *new_sca;
2649 	struct kvm_vcpu *vcpu;
2650 	unsigned int vcpu_idx;
2651 	u32 scaol, scaoh;
2652 
2653 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2654 	if (!new_sca)
2655 		return -ENOMEM;
2656 
2657 	scaoh = (u32)((u64)(new_sca) >> 32);
2658 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2659 
2660 	kvm_s390_vcpu_block_all(kvm);
2661 	write_lock(&kvm->arch.sca_lock);
2662 
2663 	sca_copy_b_to_e(new_sca, old_sca);
2664 
2665 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2666 		vcpu->arch.sie_block->scaoh = scaoh;
2667 		vcpu->arch.sie_block->scaol = scaol;
2668 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2669 	}
2670 	kvm->arch.sca = new_sca;
2671 	kvm->arch.use_esca = 1;
2672 
2673 	write_unlock(&kvm->arch.sca_lock);
2674 	kvm_s390_vcpu_unblock_all(kvm);
2675 
2676 	free_page((unsigned long)old_sca);
2677 
2678 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2679 		 old_sca, kvm->arch.sca);
2680 	return 0;
2681 }
2682 
2683 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2684 {
2685 	int rc;
2686 
2687 	if (!kvm_s390_use_sca_entries()) {
2688 		if (id < KVM_MAX_VCPUS)
2689 			return true;
2690 		return false;
2691 	}
2692 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2693 		return true;
2694 	if (!sclp.has_esca || !sclp.has_64bscao)
2695 		return false;
2696 
2697 	mutex_lock(&kvm->lock);
2698 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2699 	mutex_unlock(&kvm->lock);
2700 
2701 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2702 }
2703 
2704 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2705 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2706 {
2707 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2708 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2709 	vcpu->arch.cputm_start = get_tod_clock_fast();
2710 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2711 }
2712 
2713 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2714 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2715 {
2716 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2717 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2718 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2719 	vcpu->arch.cputm_start = 0;
2720 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2721 }
2722 
2723 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2724 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2725 {
2726 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2727 	vcpu->arch.cputm_enabled = true;
2728 	__start_cpu_timer_accounting(vcpu);
2729 }
2730 
2731 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2732 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2733 {
2734 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2735 	__stop_cpu_timer_accounting(vcpu);
2736 	vcpu->arch.cputm_enabled = false;
2737 }
2738 
2739 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2740 {
2741 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2742 	__enable_cpu_timer_accounting(vcpu);
2743 	preempt_enable();
2744 }
2745 
2746 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2747 {
2748 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2749 	__disable_cpu_timer_accounting(vcpu);
2750 	preempt_enable();
2751 }
2752 
2753 /* set the cpu timer - may only be called from the VCPU thread itself */
2754 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2755 {
2756 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2757 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2758 	if (vcpu->arch.cputm_enabled)
2759 		vcpu->arch.cputm_start = get_tod_clock_fast();
2760 	vcpu->arch.sie_block->cputm = cputm;
2761 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2762 	preempt_enable();
2763 }
2764 
2765 /* update and get the cpu timer - can also be called from other VCPU threads */
2766 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2767 {
2768 	unsigned int seq;
2769 	__u64 value;
2770 
2771 	if (unlikely(!vcpu->arch.cputm_enabled))
2772 		return vcpu->arch.sie_block->cputm;
2773 
2774 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2775 	do {
2776 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2777 		/*
2778 		 * If the writer would ever execute a read in the critical
2779 		 * section, e.g. in irq context, we have a deadlock.
2780 		 */
2781 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2782 		value = vcpu->arch.sie_block->cputm;
2783 		/* if cputm_start is 0, accounting is being started/stopped */
2784 		if (likely(vcpu->arch.cputm_start))
2785 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2786 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2787 	preempt_enable();
2788 	return value;
2789 }
2790 
2791 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2792 {
2793 
2794 	gmap_enable(vcpu->arch.enabled_gmap);
2795 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2796 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2797 		__start_cpu_timer_accounting(vcpu);
2798 	vcpu->cpu = cpu;
2799 }
2800 
2801 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2802 {
2803 	vcpu->cpu = -1;
2804 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2805 		__stop_cpu_timer_accounting(vcpu);
2806 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2807 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2808 	gmap_disable(vcpu->arch.enabled_gmap);
2809 
2810 }
2811 
2812 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2813 {
2814 	mutex_lock(&vcpu->kvm->lock);
2815 	preempt_disable();
2816 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2817 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2818 	preempt_enable();
2819 	mutex_unlock(&vcpu->kvm->lock);
2820 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2821 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2822 		sca_add_vcpu(vcpu);
2823 	}
2824 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2825 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2826 	/* make vcpu_load load the right gmap on the first trigger */
2827 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2828 }
2829 
2830 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2831 {
2832 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2833 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2834 		return true;
2835 	return false;
2836 }
2837 
2838 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2839 {
2840 	/* At least one ECC subfunction must be present */
2841 	return kvm_has_pckmo_subfunc(kvm, 32) ||
2842 	       kvm_has_pckmo_subfunc(kvm, 33) ||
2843 	       kvm_has_pckmo_subfunc(kvm, 34) ||
2844 	       kvm_has_pckmo_subfunc(kvm, 40) ||
2845 	       kvm_has_pckmo_subfunc(kvm, 41);
2846 
2847 }
2848 
2849 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2850 {
2851 	/*
2852 	 * If the AP instructions are not being interpreted and the MSAX3
2853 	 * facility is not configured for the guest, there is nothing to set up.
2854 	 */
2855 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2856 		return;
2857 
2858 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2859 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2860 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
2861 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2862 
2863 	if (vcpu->kvm->arch.crypto.apie)
2864 		vcpu->arch.sie_block->eca |= ECA_APIE;
2865 
2866 	/* Set up protected key support */
2867 	if (vcpu->kvm->arch.crypto.aes_kw) {
2868 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2869 		/* ecc is also wrapped with AES key */
2870 		if (kvm_has_pckmo_ecc(vcpu->kvm))
2871 			vcpu->arch.sie_block->ecd |= ECD_ECC;
2872 	}
2873 
2874 	if (vcpu->kvm->arch.crypto.dea_kw)
2875 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2876 }
2877 
2878 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2879 {
2880 	free_page(vcpu->arch.sie_block->cbrlo);
2881 	vcpu->arch.sie_block->cbrlo = 0;
2882 }
2883 
2884 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2885 {
2886 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2887 	if (!vcpu->arch.sie_block->cbrlo)
2888 		return -ENOMEM;
2889 	return 0;
2890 }
2891 
2892 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2893 {
2894 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2895 
2896 	vcpu->arch.sie_block->ibc = model->ibc;
2897 	if (test_kvm_facility(vcpu->kvm, 7))
2898 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2899 }
2900 
2901 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
2902 {
2903 	int rc = 0;
2904 
2905 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2906 						    CPUSTAT_SM |
2907 						    CPUSTAT_STOPPED);
2908 
2909 	if (test_kvm_facility(vcpu->kvm, 78))
2910 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2911 	else if (test_kvm_facility(vcpu->kvm, 8))
2912 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2913 
2914 	kvm_s390_vcpu_setup_model(vcpu);
2915 
2916 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2917 	if (MACHINE_HAS_ESOP)
2918 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2919 	if (test_kvm_facility(vcpu->kvm, 9))
2920 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2921 	if (test_kvm_facility(vcpu->kvm, 73))
2922 		vcpu->arch.sie_block->ecb |= ECB_TE;
2923 
2924 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2925 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2926 	if (test_kvm_facility(vcpu->kvm, 130))
2927 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2928 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2929 	if (sclp.has_cei)
2930 		vcpu->arch.sie_block->eca |= ECA_CEI;
2931 	if (sclp.has_ib)
2932 		vcpu->arch.sie_block->eca |= ECA_IB;
2933 	if (sclp.has_siif)
2934 		vcpu->arch.sie_block->eca |= ECA_SII;
2935 	if (sclp.has_sigpif)
2936 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2937 	if (test_kvm_facility(vcpu->kvm, 129)) {
2938 		vcpu->arch.sie_block->eca |= ECA_VX;
2939 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2940 	}
2941 	if (test_kvm_facility(vcpu->kvm, 139))
2942 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2943 	if (test_kvm_facility(vcpu->kvm, 156))
2944 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2945 	if (vcpu->arch.sie_block->gd) {
2946 		vcpu->arch.sie_block->eca |= ECA_AIV;
2947 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2948 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2949 	}
2950 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2951 					| SDNXC;
2952 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2953 
2954 	if (sclp.has_kss)
2955 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2956 	else
2957 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2958 
2959 	if (vcpu->kvm->arch.use_cmma) {
2960 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2961 		if (rc)
2962 			return rc;
2963 	}
2964 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2965 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2966 
2967 	vcpu->arch.sie_block->hpid = HPID_KVM;
2968 
2969 	kvm_s390_vcpu_crypto_setup(vcpu);
2970 
2971 	return rc;
2972 }
2973 
2974 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
2975 {
2976 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2977 		return -EINVAL;
2978 	return 0;
2979 }
2980 
2981 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
2982 {
2983 	struct sie_page *sie_page;
2984 	int rc;
2985 
2986 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2987 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2988 	if (!sie_page)
2989 		return -ENOMEM;
2990 
2991 	vcpu->arch.sie_block = &sie_page->sie_block;
2992 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2993 
2994 	/* the real guest size will always be smaller than msl */
2995 	vcpu->arch.sie_block->mso = 0;
2996 	vcpu->arch.sie_block->msl = sclp.hamax;
2997 
2998 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
2999 	spin_lock_init(&vcpu->arch.local_int.lock);
3000 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3001 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3002 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3003 	seqcount_init(&vcpu->arch.cputm_seqcount);
3004 
3005 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3006 	kvm_clear_async_pf_completion_queue(vcpu);
3007 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3008 				    KVM_SYNC_GPRS |
3009 				    KVM_SYNC_ACRS |
3010 				    KVM_SYNC_CRS |
3011 				    KVM_SYNC_ARCH0 |
3012 				    KVM_SYNC_PFAULT;
3013 	kvm_s390_set_prefix(vcpu, 0);
3014 	if (test_kvm_facility(vcpu->kvm, 64))
3015 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3016 	if (test_kvm_facility(vcpu->kvm, 82))
3017 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3018 	if (test_kvm_facility(vcpu->kvm, 133))
3019 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3020 	if (test_kvm_facility(vcpu->kvm, 156))
3021 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3022 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3023 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3024 	 */
3025 	if (MACHINE_HAS_VX)
3026 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3027 	else
3028 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3029 
3030 	if (kvm_is_ucontrol(vcpu->kvm)) {
3031 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3032 		if (rc)
3033 			goto out_free_sie_block;
3034 	}
3035 
3036 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3037 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3038 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3039 
3040 	rc = kvm_s390_vcpu_setup(vcpu);
3041 	if (rc)
3042 		goto out_ucontrol_uninit;
3043 	return 0;
3044 
3045 out_ucontrol_uninit:
3046 	if (kvm_is_ucontrol(vcpu->kvm))
3047 		gmap_remove(vcpu->arch.gmap);
3048 out_free_sie_block:
3049 	free_page((unsigned long)(vcpu->arch.sie_block));
3050 	return rc;
3051 }
3052 
3053 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3054 {
3055 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3056 }
3057 
3058 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3059 {
3060 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3061 }
3062 
3063 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3064 {
3065 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3066 	exit_sie(vcpu);
3067 }
3068 
3069 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3070 {
3071 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3072 }
3073 
3074 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3075 {
3076 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3077 	exit_sie(vcpu);
3078 }
3079 
3080 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3081 {
3082 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3083 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3084 }
3085 
3086 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3087 {
3088 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3089 }
3090 
3091 /*
3092  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3093  * If the CPU is not running (e.g. waiting as idle) the function will
3094  * return immediately. */
3095 void exit_sie(struct kvm_vcpu *vcpu)
3096 {
3097 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3098 	kvm_s390_vsie_kick(vcpu);
3099 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3100 		cpu_relax();
3101 }
3102 
3103 /* Kick a guest cpu out of SIE to process a request synchronously */
3104 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3105 {
3106 	kvm_make_request(req, vcpu);
3107 	kvm_s390_vcpu_request(vcpu);
3108 }
3109 
3110 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3111 			      unsigned long end)
3112 {
3113 	struct kvm *kvm = gmap->private;
3114 	struct kvm_vcpu *vcpu;
3115 	unsigned long prefix;
3116 	int i;
3117 
3118 	if (gmap_is_shadow(gmap))
3119 		return;
3120 	if (start >= 1UL << 31)
3121 		/* We are only interested in prefix pages */
3122 		return;
3123 	kvm_for_each_vcpu(i, vcpu, kvm) {
3124 		/* match against both prefix pages */
3125 		prefix = kvm_s390_get_prefix(vcpu);
3126 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3127 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3128 				   start, end);
3129 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3130 		}
3131 	}
3132 }
3133 
3134 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3135 {
3136 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3137 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3138 	    halt_poll_max_steal) {
3139 		vcpu->stat.halt_no_poll_steal++;
3140 		return true;
3141 	}
3142 	return false;
3143 }
3144 
3145 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3146 {
3147 	/* kvm common code refers to this, but never calls it */
3148 	BUG();
3149 	return 0;
3150 }
3151 
3152 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3153 					   struct kvm_one_reg *reg)
3154 {
3155 	int r = -EINVAL;
3156 
3157 	switch (reg->id) {
3158 	case KVM_REG_S390_TODPR:
3159 		r = put_user(vcpu->arch.sie_block->todpr,
3160 			     (u32 __user *)reg->addr);
3161 		break;
3162 	case KVM_REG_S390_EPOCHDIFF:
3163 		r = put_user(vcpu->arch.sie_block->epoch,
3164 			     (u64 __user *)reg->addr);
3165 		break;
3166 	case KVM_REG_S390_CPU_TIMER:
3167 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3168 			     (u64 __user *)reg->addr);
3169 		break;
3170 	case KVM_REG_S390_CLOCK_COMP:
3171 		r = put_user(vcpu->arch.sie_block->ckc,
3172 			     (u64 __user *)reg->addr);
3173 		break;
3174 	case KVM_REG_S390_PFTOKEN:
3175 		r = put_user(vcpu->arch.pfault_token,
3176 			     (u64 __user *)reg->addr);
3177 		break;
3178 	case KVM_REG_S390_PFCOMPARE:
3179 		r = put_user(vcpu->arch.pfault_compare,
3180 			     (u64 __user *)reg->addr);
3181 		break;
3182 	case KVM_REG_S390_PFSELECT:
3183 		r = put_user(vcpu->arch.pfault_select,
3184 			     (u64 __user *)reg->addr);
3185 		break;
3186 	case KVM_REG_S390_PP:
3187 		r = put_user(vcpu->arch.sie_block->pp,
3188 			     (u64 __user *)reg->addr);
3189 		break;
3190 	case KVM_REG_S390_GBEA:
3191 		r = put_user(vcpu->arch.sie_block->gbea,
3192 			     (u64 __user *)reg->addr);
3193 		break;
3194 	default:
3195 		break;
3196 	}
3197 
3198 	return r;
3199 }
3200 
3201 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3202 					   struct kvm_one_reg *reg)
3203 {
3204 	int r = -EINVAL;
3205 	__u64 val;
3206 
3207 	switch (reg->id) {
3208 	case KVM_REG_S390_TODPR:
3209 		r = get_user(vcpu->arch.sie_block->todpr,
3210 			     (u32 __user *)reg->addr);
3211 		break;
3212 	case KVM_REG_S390_EPOCHDIFF:
3213 		r = get_user(vcpu->arch.sie_block->epoch,
3214 			     (u64 __user *)reg->addr);
3215 		break;
3216 	case KVM_REG_S390_CPU_TIMER:
3217 		r = get_user(val, (u64 __user *)reg->addr);
3218 		if (!r)
3219 			kvm_s390_set_cpu_timer(vcpu, val);
3220 		break;
3221 	case KVM_REG_S390_CLOCK_COMP:
3222 		r = get_user(vcpu->arch.sie_block->ckc,
3223 			     (u64 __user *)reg->addr);
3224 		break;
3225 	case KVM_REG_S390_PFTOKEN:
3226 		r = get_user(vcpu->arch.pfault_token,
3227 			     (u64 __user *)reg->addr);
3228 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3229 			kvm_clear_async_pf_completion_queue(vcpu);
3230 		break;
3231 	case KVM_REG_S390_PFCOMPARE:
3232 		r = get_user(vcpu->arch.pfault_compare,
3233 			     (u64 __user *)reg->addr);
3234 		break;
3235 	case KVM_REG_S390_PFSELECT:
3236 		r = get_user(vcpu->arch.pfault_select,
3237 			     (u64 __user *)reg->addr);
3238 		break;
3239 	case KVM_REG_S390_PP:
3240 		r = get_user(vcpu->arch.sie_block->pp,
3241 			     (u64 __user *)reg->addr);
3242 		break;
3243 	case KVM_REG_S390_GBEA:
3244 		r = get_user(vcpu->arch.sie_block->gbea,
3245 			     (u64 __user *)reg->addr);
3246 		break;
3247 	default:
3248 		break;
3249 	}
3250 
3251 	return r;
3252 }
3253 
3254 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3255 {
3256 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3257 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3258 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3259 
3260 	kvm_clear_async_pf_completion_queue(vcpu);
3261 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3262 		kvm_s390_vcpu_stop(vcpu);
3263 	kvm_s390_clear_local_irqs(vcpu);
3264 }
3265 
3266 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3267 {
3268 	/* Initial reset is a superset of the normal reset */
3269 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3270 
3271 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
3272 	vcpu->arch.sie_block->gpsw.mask = 0;
3273 	vcpu->arch.sie_block->gpsw.addr = 0;
3274 	kvm_s390_set_prefix(vcpu, 0);
3275 	kvm_s390_set_cpu_timer(vcpu, 0);
3276 	vcpu->arch.sie_block->ckc = 0;
3277 	vcpu->arch.sie_block->todpr = 0;
3278 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3279 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3280 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3281 	vcpu->run->s.regs.fpc = 0;
3282 	vcpu->arch.sie_block->gbea = 1;
3283 	vcpu->arch.sie_block->pp = 0;
3284 	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3285 }
3286 
3287 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3288 {
3289 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3290 
3291 	/* Clear reset is a superset of the initial reset */
3292 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3293 
3294 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3295 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3296 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3297 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3298 
3299 	regs->etoken = 0;
3300 	regs->etoken_extension = 0;
3301 }
3302 
3303 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3304 {
3305 	vcpu_load(vcpu);
3306 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3307 	vcpu_put(vcpu);
3308 	return 0;
3309 }
3310 
3311 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3312 {
3313 	vcpu_load(vcpu);
3314 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3315 	vcpu_put(vcpu);
3316 	return 0;
3317 }
3318 
3319 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3320 				  struct kvm_sregs *sregs)
3321 {
3322 	vcpu_load(vcpu);
3323 
3324 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3325 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3326 
3327 	vcpu_put(vcpu);
3328 	return 0;
3329 }
3330 
3331 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3332 				  struct kvm_sregs *sregs)
3333 {
3334 	vcpu_load(vcpu);
3335 
3336 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3337 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3338 
3339 	vcpu_put(vcpu);
3340 	return 0;
3341 }
3342 
3343 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3344 {
3345 	int ret = 0;
3346 
3347 	vcpu_load(vcpu);
3348 
3349 	if (test_fp_ctl(fpu->fpc)) {
3350 		ret = -EINVAL;
3351 		goto out;
3352 	}
3353 	vcpu->run->s.regs.fpc = fpu->fpc;
3354 	if (MACHINE_HAS_VX)
3355 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3356 				 (freg_t *) fpu->fprs);
3357 	else
3358 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3359 
3360 out:
3361 	vcpu_put(vcpu);
3362 	return ret;
3363 }
3364 
3365 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3366 {
3367 	vcpu_load(vcpu);
3368 
3369 	/* make sure we have the latest values */
3370 	save_fpu_regs();
3371 	if (MACHINE_HAS_VX)
3372 		convert_vx_to_fp((freg_t *) fpu->fprs,
3373 				 (__vector128 *) vcpu->run->s.regs.vrs);
3374 	else
3375 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3376 	fpu->fpc = vcpu->run->s.regs.fpc;
3377 
3378 	vcpu_put(vcpu);
3379 	return 0;
3380 }
3381 
3382 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3383 {
3384 	int rc = 0;
3385 
3386 	if (!is_vcpu_stopped(vcpu))
3387 		rc = -EBUSY;
3388 	else {
3389 		vcpu->run->psw_mask = psw.mask;
3390 		vcpu->run->psw_addr = psw.addr;
3391 	}
3392 	return rc;
3393 }
3394 
3395 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3396 				  struct kvm_translation *tr)
3397 {
3398 	return -EINVAL; /* not implemented yet */
3399 }
3400 
3401 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3402 			      KVM_GUESTDBG_USE_HW_BP | \
3403 			      KVM_GUESTDBG_ENABLE)
3404 
3405 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3406 					struct kvm_guest_debug *dbg)
3407 {
3408 	int rc = 0;
3409 
3410 	vcpu_load(vcpu);
3411 
3412 	vcpu->guest_debug = 0;
3413 	kvm_s390_clear_bp_data(vcpu);
3414 
3415 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3416 		rc = -EINVAL;
3417 		goto out;
3418 	}
3419 	if (!sclp.has_gpere) {
3420 		rc = -EINVAL;
3421 		goto out;
3422 	}
3423 
3424 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3425 		vcpu->guest_debug = dbg->control;
3426 		/* enforce guest PER */
3427 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3428 
3429 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3430 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3431 	} else {
3432 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3433 		vcpu->arch.guestdbg.last_bp = 0;
3434 	}
3435 
3436 	if (rc) {
3437 		vcpu->guest_debug = 0;
3438 		kvm_s390_clear_bp_data(vcpu);
3439 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3440 	}
3441 
3442 out:
3443 	vcpu_put(vcpu);
3444 	return rc;
3445 }
3446 
3447 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3448 				    struct kvm_mp_state *mp_state)
3449 {
3450 	int ret;
3451 
3452 	vcpu_load(vcpu);
3453 
3454 	/* CHECK_STOP and LOAD are not supported yet */
3455 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3456 				      KVM_MP_STATE_OPERATING;
3457 
3458 	vcpu_put(vcpu);
3459 	return ret;
3460 }
3461 
3462 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3463 				    struct kvm_mp_state *mp_state)
3464 {
3465 	int rc = 0;
3466 
3467 	vcpu_load(vcpu);
3468 
3469 	/* user space knows about this interface - let it control the state */
3470 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3471 
3472 	switch (mp_state->mp_state) {
3473 	case KVM_MP_STATE_STOPPED:
3474 		kvm_s390_vcpu_stop(vcpu);
3475 		break;
3476 	case KVM_MP_STATE_OPERATING:
3477 		kvm_s390_vcpu_start(vcpu);
3478 		break;
3479 	case KVM_MP_STATE_LOAD:
3480 	case KVM_MP_STATE_CHECK_STOP:
3481 		/* fall through - CHECK_STOP and LOAD are not supported yet */
3482 	default:
3483 		rc = -ENXIO;
3484 	}
3485 
3486 	vcpu_put(vcpu);
3487 	return rc;
3488 }
3489 
3490 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3491 {
3492 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3493 }
3494 
3495 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3496 {
3497 retry:
3498 	kvm_s390_vcpu_request_handled(vcpu);
3499 	if (!kvm_request_pending(vcpu))
3500 		return 0;
3501 	/*
3502 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3503 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3504 	 * This ensures that the ipte instruction for this request has
3505 	 * already finished. We might race against a second unmapper that
3506 	 * wants to set the blocking bit. Lets just retry the request loop.
3507 	 */
3508 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3509 		int rc;
3510 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3511 					  kvm_s390_get_prefix(vcpu),
3512 					  PAGE_SIZE * 2, PROT_WRITE);
3513 		if (rc) {
3514 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3515 			return rc;
3516 		}
3517 		goto retry;
3518 	}
3519 
3520 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3521 		vcpu->arch.sie_block->ihcpu = 0xffff;
3522 		goto retry;
3523 	}
3524 
3525 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3526 		if (!ibs_enabled(vcpu)) {
3527 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3528 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3529 		}
3530 		goto retry;
3531 	}
3532 
3533 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3534 		if (ibs_enabled(vcpu)) {
3535 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3536 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3537 		}
3538 		goto retry;
3539 	}
3540 
3541 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3542 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3543 		goto retry;
3544 	}
3545 
3546 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3547 		/*
3548 		 * Disable CMM virtualization; we will emulate the ESSA
3549 		 * instruction manually, in order to provide additional
3550 		 * functionalities needed for live migration.
3551 		 */
3552 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3553 		goto retry;
3554 	}
3555 
3556 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3557 		/*
3558 		 * Re-enable CMM virtualization if CMMA is available and
3559 		 * CMM has been used.
3560 		 */
3561 		if ((vcpu->kvm->arch.use_cmma) &&
3562 		    (vcpu->kvm->mm->context.uses_cmm))
3563 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3564 		goto retry;
3565 	}
3566 
3567 	/* nothing to do, just clear the request */
3568 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3569 	/* we left the vsie handler, nothing to do, just clear the request */
3570 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3571 
3572 	return 0;
3573 }
3574 
3575 void kvm_s390_set_tod_clock(struct kvm *kvm,
3576 			    const struct kvm_s390_vm_tod_clock *gtod)
3577 {
3578 	struct kvm_vcpu *vcpu;
3579 	struct kvm_s390_tod_clock_ext htod;
3580 	int i;
3581 
3582 	mutex_lock(&kvm->lock);
3583 	preempt_disable();
3584 
3585 	get_tod_clock_ext((char *)&htod);
3586 
3587 	kvm->arch.epoch = gtod->tod - htod.tod;
3588 	kvm->arch.epdx = 0;
3589 	if (test_kvm_facility(kvm, 139)) {
3590 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3591 		if (kvm->arch.epoch > gtod->tod)
3592 			kvm->arch.epdx -= 1;
3593 	}
3594 
3595 	kvm_s390_vcpu_block_all(kvm);
3596 	kvm_for_each_vcpu(i, vcpu, kvm) {
3597 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3598 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3599 	}
3600 
3601 	kvm_s390_vcpu_unblock_all(kvm);
3602 	preempt_enable();
3603 	mutex_unlock(&kvm->lock);
3604 }
3605 
3606 /**
3607  * kvm_arch_fault_in_page - fault-in guest page if necessary
3608  * @vcpu: The corresponding virtual cpu
3609  * @gpa: Guest physical address
3610  * @writable: Whether the page should be writable or not
3611  *
3612  * Make sure that a guest page has been faulted-in on the host.
3613  *
3614  * Return: Zero on success, negative error code otherwise.
3615  */
3616 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3617 {
3618 	return gmap_fault(vcpu->arch.gmap, gpa,
3619 			  writable ? FAULT_FLAG_WRITE : 0);
3620 }
3621 
3622 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3623 				      unsigned long token)
3624 {
3625 	struct kvm_s390_interrupt inti;
3626 	struct kvm_s390_irq irq;
3627 
3628 	if (start_token) {
3629 		irq.u.ext.ext_params2 = token;
3630 		irq.type = KVM_S390_INT_PFAULT_INIT;
3631 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3632 	} else {
3633 		inti.type = KVM_S390_INT_PFAULT_DONE;
3634 		inti.parm64 = token;
3635 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3636 	}
3637 }
3638 
3639 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3640 				     struct kvm_async_pf *work)
3641 {
3642 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3643 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3644 }
3645 
3646 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3647 				 struct kvm_async_pf *work)
3648 {
3649 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3650 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3651 }
3652 
3653 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3654 			       struct kvm_async_pf *work)
3655 {
3656 	/* s390 will always inject the page directly */
3657 }
3658 
3659 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3660 {
3661 	/*
3662 	 * s390 will always inject the page directly,
3663 	 * but we still want check_async_completion to cleanup
3664 	 */
3665 	return true;
3666 }
3667 
3668 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3669 {
3670 	hva_t hva;
3671 	struct kvm_arch_async_pf arch;
3672 	int rc;
3673 
3674 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3675 		return 0;
3676 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3677 	    vcpu->arch.pfault_compare)
3678 		return 0;
3679 	if (psw_extint_disabled(vcpu))
3680 		return 0;
3681 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3682 		return 0;
3683 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3684 		return 0;
3685 	if (!vcpu->arch.gmap->pfault_enabled)
3686 		return 0;
3687 
3688 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3689 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3690 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3691 		return 0;
3692 
3693 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3694 	return rc;
3695 }
3696 
3697 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3698 {
3699 	int rc, cpuflags;
3700 
3701 	/*
3702 	 * On s390 notifications for arriving pages will be delivered directly
3703 	 * to the guest but the house keeping for completed pfaults is
3704 	 * handled outside the worker.
3705 	 */
3706 	kvm_check_async_pf_completion(vcpu);
3707 
3708 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3709 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3710 
3711 	if (need_resched())
3712 		schedule();
3713 
3714 	if (test_cpu_flag(CIF_MCCK_PENDING))
3715 		s390_handle_mcck();
3716 
3717 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3718 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3719 		if (rc)
3720 			return rc;
3721 	}
3722 
3723 	rc = kvm_s390_handle_requests(vcpu);
3724 	if (rc)
3725 		return rc;
3726 
3727 	if (guestdbg_enabled(vcpu)) {
3728 		kvm_s390_backup_guest_per_regs(vcpu);
3729 		kvm_s390_patch_guest_per_regs(vcpu);
3730 	}
3731 
3732 	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3733 
3734 	vcpu->arch.sie_block->icptcode = 0;
3735 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3736 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3737 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3738 
3739 	return 0;
3740 }
3741 
3742 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3743 {
3744 	struct kvm_s390_pgm_info pgm_info = {
3745 		.code = PGM_ADDRESSING,
3746 	};
3747 	u8 opcode, ilen;
3748 	int rc;
3749 
3750 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3751 	trace_kvm_s390_sie_fault(vcpu);
3752 
3753 	/*
3754 	 * We want to inject an addressing exception, which is defined as a
3755 	 * suppressing or terminating exception. However, since we came here
3756 	 * by a DAT access exception, the PSW still points to the faulting
3757 	 * instruction since DAT exceptions are nullifying. So we've got
3758 	 * to look up the current opcode to get the length of the instruction
3759 	 * to be able to forward the PSW.
3760 	 */
3761 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3762 	ilen = insn_length(opcode);
3763 	if (rc < 0) {
3764 		return rc;
3765 	} else if (rc) {
3766 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3767 		 * Forward by arbitrary ilc, injection will take care of
3768 		 * nullification if necessary.
3769 		 */
3770 		pgm_info = vcpu->arch.pgm;
3771 		ilen = 4;
3772 	}
3773 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3774 	kvm_s390_forward_psw(vcpu, ilen);
3775 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3776 }
3777 
3778 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3779 {
3780 	struct mcck_volatile_info *mcck_info;
3781 	struct sie_page *sie_page;
3782 
3783 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3784 		   vcpu->arch.sie_block->icptcode);
3785 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3786 
3787 	if (guestdbg_enabled(vcpu))
3788 		kvm_s390_restore_guest_per_regs(vcpu);
3789 
3790 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3791 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3792 
3793 	if (exit_reason == -EINTR) {
3794 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3795 		sie_page = container_of(vcpu->arch.sie_block,
3796 					struct sie_page, sie_block);
3797 		mcck_info = &sie_page->mcck_info;
3798 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3799 		return 0;
3800 	}
3801 
3802 	if (vcpu->arch.sie_block->icptcode > 0) {
3803 		int rc = kvm_handle_sie_intercept(vcpu);
3804 
3805 		if (rc != -EOPNOTSUPP)
3806 			return rc;
3807 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3808 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3809 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3810 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3811 		return -EREMOTE;
3812 	} else if (exit_reason != -EFAULT) {
3813 		vcpu->stat.exit_null++;
3814 		return 0;
3815 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3816 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3817 		vcpu->run->s390_ucontrol.trans_exc_code =
3818 						current->thread.gmap_addr;
3819 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3820 		return -EREMOTE;
3821 	} else if (current->thread.gmap_pfault) {
3822 		trace_kvm_s390_major_guest_pfault(vcpu);
3823 		current->thread.gmap_pfault = 0;
3824 		if (kvm_arch_setup_async_pf(vcpu))
3825 			return 0;
3826 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3827 	}
3828 	return vcpu_post_run_fault_in_sie(vcpu);
3829 }
3830 
3831 static int __vcpu_run(struct kvm_vcpu *vcpu)
3832 {
3833 	int rc, exit_reason;
3834 
3835 	/*
3836 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3837 	 * ning the guest), so that memslots (and other stuff) are protected
3838 	 */
3839 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3840 
3841 	do {
3842 		rc = vcpu_pre_run(vcpu);
3843 		if (rc)
3844 			break;
3845 
3846 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3847 		/*
3848 		 * As PF_VCPU will be used in fault handler, between
3849 		 * guest_enter and guest_exit should be no uaccess.
3850 		 */
3851 		local_irq_disable();
3852 		guest_enter_irqoff();
3853 		__disable_cpu_timer_accounting(vcpu);
3854 		local_irq_enable();
3855 		exit_reason = sie64a(vcpu->arch.sie_block,
3856 				     vcpu->run->s.regs.gprs);
3857 		local_irq_disable();
3858 		__enable_cpu_timer_accounting(vcpu);
3859 		guest_exit_irqoff();
3860 		local_irq_enable();
3861 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3862 
3863 		rc = vcpu_post_run(vcpu, exit_reason);
3864 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3865 
3866 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3867 	return rc;
3868 }
3869 
3870 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3871 {
3872 	struct runtime_instr_cb *riccb;
3873 	struct gs_cb *gscb;
3874 
3875 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3876 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3877 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3878 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3879 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3880 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3881 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3882 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3883 		/* some control register changes require a tlb flush */
3884 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3885 	}
3886 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3887 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3888 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3889 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3890 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3891 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3892 	}
3893 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3894 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3895 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3896 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3897 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3898 			kvm_clear_async_pf_completion_queue(vcpu);
3899 	}
3900 	/*
3901 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3902 	 * we should enable RI here instead of doing the lazy enablement.
3903 	 */
3904 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3905 	    test_kvm_facility(vcpu->kvm, 64) &&
3906 	    riccb->v &&
3907 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3908 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3909 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3910 	}
3911 	/*
3912 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3913 	 * we should enable GS here instead of doing the lazy enablement.
3914 	 */
3915 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3916 	    test_kvm_facility(vcpu->kvm, 133) &&
3917 	    gscb->gssm &&
3918 	    !vcpu->arch.gs_enabled) {
3919 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3920 		vcpu->arch.sie_block->ecb |= ECB_GS;
3921 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3922 		vcpu->arch.gs_enabled = 1;
3923 	}
3924 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3925 	    test_kvm_facility(vcpu->kvm, 82)) {
3926 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3927 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3928 	}
3929 	save_access_regs(vcpu->arch.host_acrs);
3930 	restore_access_regs(vcpu->run->s.regs.acrs);
3931 	/* save host (userspace) fprs/vrs */
3932 	save_fpu_regs();
3933 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3934 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3935 	if (MACHINE_HAS_VX)
3936 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3937 	else
3938 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3939 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3940 	if (test_fp_ctl(current->thread.fpu.fpc))
3941 		/* User space provided an invalid FPC, let's clear it */
3942 		current->thread.fpu.fpc = 0;
3943 	if (MACHINE_HAS_GS) {
3944 		preempt_disable();
3945 		__ctl_set_bit(2, 4);
3946 		if (current->thread.gs_cb) {
3947 			vcpu->arch.host_gscb = current->thread.gs_cb;
3948 			save_gs_cb(vcpu->arch.host_gscb);
3949 		}
3950 		if (vcpu->arch.gs_enabled) {
3951 			current->thread.gs_cb = (struct gs_cb *)
3952 						&vcpu->run->s.regs.gscb;
3953 			restore_gs_cb(current->thread.gs_cb);
3954 		}
3955 		preempt_enable();
3956 	}
3957 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
3958 
3959 	kvm_run->kvm_dirty_regs = 0;
3960 }
3961 
3962 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3963 {
3964 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3965 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3966 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3967 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3968 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3969 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3970 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3971 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3972 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3973 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3974 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3975 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3976 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3977 	save_access_regs(vcpu->run->s.regs.acrs);
3978 	restore_access_regs(vcpu->arch.host_acrs);
3979 	/* Save guest register state */
3980 	save_fpu_regs();
3981 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3982 	/* Restore will be done lazily at return */
3983 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3984 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3985 	if (MACHINE_HAS_GS) {
3986 		__ctl_set_bit(2, 4);
3987 		if (vcpu->arch.gs_enabled)
3988 			save_gs_cb(current->thread.gs_cb);
3989 		preempt_disable();
3990 		current->thread.gs_cb = vcpu->arch.host_gscb;
3991 		restore_gs_cb(vcpu->arch.host_gscb);
3992 		preempt_enable();
3993 		if (!vcpu->arch.host_gscb)
3994 			__ctl_clear_bit(2, 4);
3995 		vcpu->arch.host_gscb = NULL;
3996 	}
3997 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
3998 }
3999 
4000 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4001 {
4002 	int rc;
4003 
4004 	if (kvm_run->immediate_exit)
4005 		return -EINTR;
4006 
4007 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4008 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4009 		return -EINVAL;
4010 
4011 	vcpu_load(vcpu);
4012 
4013 	if (guestdbg_exit_pending(vcpu)) {
4014 		kvm_s390_prepare_debug_exit(vcpu);
4015 		rc = 0;
4016 		goto out;
4017 	}
4018 
4019 	kvm_sigset_activate(vcpu);
4020 
4021 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4022 		kvm_s390_vcpu_start(vcpu);
4023 	} else if (is_vcpu_stopped(vcpu)) {
4024 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4025 				   vcpu->vcpu_id);
4026 		rc = -EINVAL;
4027 		goto out;
4028 	}
4029 
4030 	sync_regs(vcpu, kvm_run);
4031 	enable_cpu_timer_accounting(vcpu);
4032 
4033 	might_fault();
4034 	rc = __vcpu_run(vcpu);
4035 
4036 	if (signal_pending(current) && !rc) {
4037 		kvm_run->exit_reason = KVM_EXIT_INTR;
4038 		rc = -EINTR;
4039 	}
4040 
4041 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4042 		kvm_s390_prepare_debug_exit(vcpu);
4043 		rc = 0;
4044 	}
4045 
4046 	if (rc == -EREMOTE) {
4047 		/* userspace support is needed, kvm_run has been prepared */
4048 		rc = 0;
4049 	}
4050 
4051 	disable_cpu_timer_accounting(vcpu);
4052 	store_regs(vcpu, kvm_run);
4053 
4054 	kvm_sigset_deactivate(vcpu);
4055 
4056 	vcpu->stat.exit_userspace++;
4057 out:
4058 	vcpu_put(vcpu);
4059 	return rc;
4060 }
4061 
4062 /*
4063  * store status at address
4064  * we use have two special cases:
4065  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4066  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4067  */
4068 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4069 {
4070 	unsigned char archmode = 1;
4071 	freg_t fprs[NUM_FPRS];
4072 	unsigned int px;
4073 	u64 clkcomp, cputm;
4074 	int rc;
4075 
4076 	px = kvm_s390_get_prefix(vcpu);
4077 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4078 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4079 			return -EFAULT;
4080 		gpa = 0;
4081 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4082 		if (write_guest_real(vcpu, 163, &archmode, 1))
4083 			return -EFAULT;
4084 		gpa = px;
4085 	} else
4086 		gpa -= __LC_FPREGS_SAVE_AREA;
4087 
4088 	/* manually convert vector registers if necessary */
4089 	if (MACHINE_HAS_VX) {
4090 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4091 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4092 				     fprs, 128);
4093 	} else {
4094 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4095 				     vcpu->run->s.regs.fprs, 128);
4096 	}
4097 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4098 			      vcpu->run->s.regs.gprs, 128);
4099 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4100 			      &vcpu->arch.sie_block->gpsw, 16);
4101 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4102 			      &px, 4);
4103 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4104 			      &vcpu->run->s.regs.fpc, 4);
4105 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4106 			      &vcpu->arch.sie_block->todpr, 4);
4107 	cputm = kvm_s390_get_cpu_timer(vcpu);
4108 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4109 			      &cputm, 8);
4110 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4111 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4112 			      &clkcomp, 8);
4113 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4114 			      &vcpu->run->s.regs.acrs, 64);
4115 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4116 			      &vcpu->arch.sie_block->gcr, 128);
4117 	return rc ? -EFAULT : 0;
4118 }
4119 
4120 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4121 {
4122 	/*
4123 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4124 	 * switch in the run ioctl. Let's update our copies before we save
4125 	 * it into the save area
4126 	 */
4127 	save_fpu_regs();
4128 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4129 	save_access_regs(vcpu->run->s.regs.acrs);
4130 
4131 	return kvm_s390_store_status_unloaded(vcpu, addr);
4132 }
4133 
4134 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4135 {
4136 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4137 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4138 }
4139 
4140 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4141 {
4142 	unsigned int i;
4143 	struct kvm_vcpu *vcpu;
4144 
4145 	kvm_for_each_vcpu(i, vcpu, kvm) {
4146 		__disable_ibs_on_vcpu(vcpu);
4147 	}
4148 }
4149 
4150 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4151 {
4152 	if (!sclp.has_ibs)
4153 		return;
4154 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4155 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4156 }
4157 
4158 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4159 {
4160 	int i, online_vcpus, started_vcpus = 0;
4161 
4162 	if (!is_vcpu_stopped(vcpu))
4163 		return;
4164 
4165 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4166 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4167 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4168 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4169 
4170 	for (i = 0; i < online_vcpus; i++) {
4171 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4172 			started_vcpus++;
4173 	}
4174 
4175 	if (started_vcpus == 0) {
4176 		/* we're the only active VCPU -> speed it up */
4177 		__enable_ibs_on_vcpu(vcpu);
4178 	} else if (started_vcpus == 1) {
4179 		/*
4180 		 * As we are starting a second VCPU, we have to disable
4181 		 * the IBS facility on all VCPUs to remove potentially
4182 		 * oustanding ENABLE requests.
4183 		 */
4184 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4185 	}
4186 
4187 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4188 	/*
4189 	 * Another VCPU might have used IBS while we were offline.
4190 	 * Let's play safe and flush the VCPU at startup.
4191 	 */
4192 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4193 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4194 	return;
4195 }
4196 
4197 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4198 {
4199 	int i, online_vcpus, started_vcpus = 0;
4200 	struct kvm_vcpu *started_vcpu = NULL;
4201 
4202 	if (is_vcpu_stopped(vcpu))
4203 		return;
4204 
4205 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4206 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4207 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4208 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4209 
4210 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4211 	kvm_s390_clear_stop_irq(vcpu);
4212 
4213 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4214 	__disable_ibs_on_vcpu(vcpu);
4215 
4216 	for (i = 0; i < online_vcpus; i++) {
4217 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4218 			started_vcpus++;
4219 			started_vcpu = vcpu->kvm->vcpus[i];
4220 		}
4221 	}
4222 
4223 	if (started_vcpus == 1) {
4224 		/*
4225 		 * As we only have one VCPU left, we want to enable the
4226 		 * IBS facility for that VCPU to speed it up.
4227 		 */
4228 		__enable_ibs_on_vcpu(started_vcpu);
4229 	}
4230 
4231 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4232 	return;
4233 }
4234 
4235 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4236 				     struct kvm_enable_cap *cap)
4237 {
4238 	int r;
4239 
4240 	if (cap->flags)
4241 		return -EINVAL;
4242 
4243 	switch (cap->cap) {
4244 	case KVM_CAP_S390_CSS_SUPPORT:
4245 		if (!vcpu->kvm->arch.css_support) {
4246 			vcpu->kvm->arch.css_support = 1;
4247 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4248 			trace_kvm_s390_enable_css(vcpu->kvm);
4249 		}
4250 		r = 0;
4251 		break;
4252 	default:
4253 		r = -EINVAL;
4254 		break;
4255 	}
4256 	return r;
4257 }
4258 
4259 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4260 				  struct kvm_s390_mem_op *mop)
4261 {
4262 	void __user *uaddr = (void __user *)mop->buf;
4263 	void *tmpbuf = NULL;
4264 	int r, srcu_idx;
4265 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4266 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4267 
4268 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4269 		return -EINVAL;
4270 
4271 	if (mop->size > MEM_OP_MAX_SIZE)
4272 		return -E2BIG;
4273 
4274 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4275 		tmpbuf = vmalloc(mop->size);
4276 		if (!tmpbuf)
4277 			return -ENOMEM;
4278 	}
4279 
4280 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4281 
4282 	switch (mop->op) {
4283 	case KVM_S390_MEMOP_LOGICAL_READ:
4284 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4285 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4286 					    mop->size, GACC_FETCH);
4287 			break;
4288 		}
4289 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4290 		if (r == 0) {
4291 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4292 				r = -EFAULT;
4293 		}
4294 		break;
4295 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4296 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4297 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4298 					    mop->size, GACC_STORE);
4299 			break;
4300 		}
4301 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4302 			r = -EFAULT;
4303 			break;
4304 		}
4305 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4306 		break;
4307 	default:
4308 		r = -EINVAL;
4309 	}
4310 
4311 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4312 
4313 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4314 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4315 
4316 	vfree(tmpbuf);
4317 	return r;
4318 }
4319 
4320 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4321 			       unsigned int ioctl, unsigned long arg)
4322 {
4323 	struct kvm_vcpu *vcpu = filp->private_data;
4324 	void __user *argp = (void __user *)arg;
4325 
4326 	switch (ioctl) {
4327 	case KVM_S390_IRQ: {
4328 		struct kvm_s390_irq s390irq;
4329 
4330 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4331 			return -EFAULT;
4332 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4333 	}
4334 	case KVM_S390_INTERRUPT: {
4335 		struct kvm_s390_interrupt s390int;
4336 		struct kvm_s390_irq s390irq = {};
4337 
4338 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4339 			return -EFAULT;
4340 		if (s390int_to_s390irq(&s390int, &s390irq))
4341 			return -EINVAL;
4342 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4343 	}
4344 	}
4345 	return -ENOIOCTLCMD;
4346 }
4347 
4348 long kvm_arch_vcpu_ioctl(struct file *filp,
4349 			 unsigned int ioctl, unsigned long arg)
4350 {
4351 	struct kvm_vcpu *vcpu = filp->private_data;
4352 	void __user *argp = (void __user *)arg;
4353 	int idx;
4354 	long r;
4355 
4356 	vcpu_load(vcpu);
4357 
4358 	switch (ioctl) {
4359 	case KVM_S390_STORE_STATUS:
4360 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4361 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4362 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4363 		break;
4364 	case KVM_S390_SET_INITIAL_PSW: {
4365 		psw_t psw;
4366 
4367 		r = -EFAULT;
4368 		if (copy_from_user(&psw, argp, sizeof(psw)))
4369 			break;
4370 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4371 		break;
4372 	}
4373 	case KVM_S390_CLEAR_RESET:
4374 		r = 0;
4375 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4376 		break;
4377 	case KVM_S390_INITIAL_RESET:
4378 		r = 0;
4379 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4380 		break;
4381 	case KVM_S390_NORMAL_RESET:
4382 		r = 0;
4383 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4384 		break;
4385 	case KVM_SET_ONE_REG:
4386 	case KVM_GET_ONE_REG: {
4387 		struct kvm_one_reg reg;
4388 		r = -EFAULT;
4389 		if (copy_from_user(&reg, argp, sizeof(reg)))
4390 			break;
4391 		if (ioctl == KVM_SET_ONE_REG)
4392 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4393 		else
4394 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4395 		break;
4396 	}
4397 #ifdef CONFIG_KVM_S390_UCONTROL
4398 	case KVM_S390_UCAS_MAP: {
4399 		struct kvm_s390_ucas_mapping ucasmap;
4400 
4401 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4402 			r = -EFAULT;
4403 			break;
4404 		}
4405 
4406 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4407 			r = -EINVAL;
4408 			break;
4409 		}
4410 
4411 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4412 				     ucasmap.vcpu_addr, ucasmap.length);
4413 		break;
4414 	}
4415 	case KVM_S390_UCAS_UNMAP: {
4416 		struct kvm_s390_ucas_mapping ucasmap;
4417 
4418 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4419 			r = -EFAULT;
4420 			break;
4421 		}
4422 
4423 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4424 			r = -EINVAL;
4425 			break;
4426 		}
4427 
4428 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4429 			ucasmap.length);
4430 		break;
4431 	}
4432 #endif
4433 	case KVM_S390_VCPU_FAULT: {
4434 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4435 		break;
4436 	}
4437 	case KVM_ENABLE_CAP:
4438 	{
4439 		struct kvm_enable_cap cap;
4440 		r = -EFAULT;
4441 		if (copy_from_user(&cap, argp, sizeof(cap)))
4442 			break;
4443 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4444 		break;
4445 	}
4446 	case KVM_S390_MEM_OP: {
4447 		struct kvm_s390_mem_op mem_op;
4448 
4449 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4450 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4451 		else
4452 			r = -EFAULT;
4453 		break;
4454 	}
4455 	case KVM_S390_SET_IRQ_STATE: {
4456 		struct kvm_s390_irq_state irq_state;
4457 
4458 		r = -EFAULT;
4459 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4460 			break;
4461 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4462 		    irq_state.len == 0 ||
4463 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4464 			r = -EINVAL;
4465 			break;
4466 		}
4467 		/* do not use irq_state.flags, it will break old QEMUs */
4468 		r = kvm_s390_set_irq_state(vcpu,
4469 					   (void __user *) irq_state.buf,
4470 					   irq_state.len);
4471 		break;
4472 	}
4473 	case KVM_S390_GET_IRQ_STATE: {
4474 		struct kvm_s390_irq_state irq_state;
4475 
4476 		r = -EFAULT;
4477 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4478 			break;
4479 		if (irq_state.len == 0) {
4480 			r = -EINVAL;
4481 			break;
4482 		}
4483 		/* do not use irq_state.flags, it will break old QEMUs */
4484 		r = kvm_s390_get_irq_state(vcpu,
4485 					   (__u8 __user *)  irq_state.buf,
4486 					   irq_state.len);
4487 		break;
4488 	}
4489 	default:
4490 		r = -ENOTTY;
4491 	}
4492 
4493 	vcpu_put(vcpu);
4494 	return r;
4495 }
4496 
4497 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4498 {
4499 #ifdef CONFIG_KVM_S390_UCONTROL
4500 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4501 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4502 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4503 		get_page(vmf->page);
4504 		return 0;
4505 	}
4506 #endif
4507 	return VM_FAULT_SIGBUS;
4508 }
4509 
4510 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4511 			    unsigned long npages)
4512 {
4513 	return 0;
4514 }
4515 
4516 /* Section: memory related */
4517 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4518 				   struct kvm_memory_slot *memslot,
4519 				   const struct kvm_userspace_memory_region *mem,
4520 				   enum kvm_mr_change change)
4521 {
4522 	/* A few sanity checks. We can have memory slots which have to be
4523 	   located/ended at a segment boundary (1MB). The memory in userland is
4524 	   ok to be fragmented into various different vmas. It is okay to mmap()
4525 	   and munmap() stuff in this slot after doing this call at any time */
4526 
4527 	if (mem->userspace_addr & 0xffffful)
4528 		return -EINVAL;
4529 
4530 	if (mem->memory_size & 0xffffful)
4531 		return -EINVAL;
4532 
4533 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4534 		return -EINVAL;
4535 
4536 	return 0;
4537 }
4538 
4539 void kvm_arch_commit_memory_region(struct kvm *kvm,
4540 				const struct kvm_userspace_memory_region *mem,
4541 				const struct kvm_memory_slot *old,
4542 				const struct kvm_memory_slot *new,
4543 				enum kvm_mr_change change)
4544 {
4545 	int rc = 0;
4546 
4547 	switch (change) {
4548 	case KVM_MR_DELETE:
4549 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4550 					old->npages * PAGE_SIZE);
4551 		break;
4552 	case KVM_MR_MOVE:
4553 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4554 					old->npages * PAGE_SIZE);
4555 		if (rc)
4556 			break;
4557 		/* FALLTHROUGH */
4558 	case KVM_MR_CREATE:
4559 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4560 				      mem->guest_phys_addr, mem->memory_size);
4561 		break;
4562 	case KVM_MR_FLAGS_ONLY:
4563 		break;
4564 	default:
4565 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
4566 	}
4567 	if (rc)
4568 		pr_warn("failed to commit memory region\n");
4569 	return;
4570 }
4571 
4572 static inline unsigned long nonhyp_mask(int i)
4573 {
4574 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4575 
4576 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4577 }
4578 
4579 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4580 {
4581 	vcpu->valid_wakeup = false;
4582 }
4583 
4584 static int __init kvm_s390_init(void)
4585 {
4586 	int i;
4587 
4588 	if (!sclp.has_sief2) {
4589 		pr_info("SIE is not available\n");
4590 		return -ENODEV;
4591 	}
4592 
4593 	if (nested && hpage) {
4594 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4595 		return -EINVAL;
4596 	}
4597 
4598 	for (i = 0; i < 16; i++)
4599 		kvm_s390_fac_base[i] |=
4600 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4601 
4602 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4603 }
4604 
4605 static void __exit kvm_s390_exit(void)
4606 {
4607 	kvm_exit();
4608 }
4609 
4610 module_init(kvm_s390_init);
4611 module_exit(kvm_s390_exit);
4612 
4613 /*
4614  * Enable autoloading of the kvm module.
4615  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4616  * since x86 takes a different approach.
4617  */
4618 #include <linux/miscdevice.h>
4619 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4620 MODULE_ALIAS("devname:kvm");
4621