xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 61bf3293)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50 
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54 
55 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
59 
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 	VCPU_STAT("userspace_handled", exit_userspace),
62 	VCPU_STAT("exit_null", exit_null),
63 	VCPU_STAT("exit_validity", exit_validity),
64 	VCPU_STAT("exit_stop_request", exit_stop_request),
65 	VCPU_STAT("exit_external_request", exit_external_request),
66 	VCPU_STAT("exit_io_request", exit_io_request),
67 	VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
68 	VCPU_STAT("exit_instruction", exit_instruction),
69 	VCPU_STAT("exit_pei", exit_pei),
70 	VCPU_STAT("exit_program_interruption", exit_program_interruption),
71 	VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
72 	VCPU_STAT("exit_operation_exception", exit_operation_exception),
73 	VCPU_STAT("halt_successful_poll", halt_successful_poll),
74 	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
75 	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
76 	VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
77 	VCPU_STAT("halt_wakeup", halt_wakeup),
78 	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
79 	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
80 	VCPU_STAT("instruction_lctlg", instruction_lctlg),
81 	VCPU_STAT("instruction_lctl", instruction_lctl),
82 	VCPU_STAT("instruction_stctl", instruction_stctl),
83 	VCPU_STAT("instruction_stctg", instruction_stctg),
84 	VCPU_STAT("deliver_ckc", deliver_ckc),
85 	VCPU_STAT("deliver_cputm", deliver_cputm),
86 	VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
87 	VCPU_STAT("deliver_external_call", deliver_external_call),
88 	VCPU_STAT("deliver_service_signal", deliver_service_signal),
89 	VCPU_STAT("deliver_virtio", deliver_virtio),
90 	VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
91 	VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
92 	VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
93 	VCPU_STAT("deliver_program", deliver_program),
94 	VCPU_STAT("deliver_io", deliver_io),
95 	VCPU_STAT("deliver_machine_check", deliver_machine_check),
96 	VCPU_STAT("exit_wait_state", exit_wait_state),
97 	VCPU_STAT("inject_ckc", inject_ckc),
98 	VCPU_STAT("inject_cputm", inject_cputm),
99 	VCPU_STAT("inject_external_call", inject_external_call),
100 	VM_STAT("inject_float_mchk", inject_float_mchk),
101 	VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
102 	VM_STAT("inject_io", inject_io),
103 	VCPU_STAT("inject_mchk", inject_mchk),
104 	VM_STAT("inject_pfault_done", inject_pfault_done),
105 	VCPU_STAT("inject_program", inject_program),
106 	VCPU_STAT("inject_restart", inject_restart),
107 	VM_STAT("inject_service_signal", inject_service_signal),
108 	VCPU_STAT("inject_set_prefix", inject_set_prefix),
109 	VCPU_STAT("inject_stop_signal", inject_stop_signal),
110 	VCPU_STAT("inject_pfault_init", inject_pfault_init),
111 	VM_STAT("inject_virtio", inject_virtio),
112 	VCPU_STAT("instruction_epsw", instruction_epsw),
113 	VCPU_STAT("instruction_gs", instruction_gs),
114 	VCPU_STAT("instruction_io_other", instruction_io_other),
115 	VCPU_STAT("instruction_lpsw", instruction_lpsw),
116 	VCPU_STAT("instruction_lpswe", instruction_lpswe),
117 	VCPU_STAT("instruction_pfmf", instruction_pfmf),
118 	VCPU_STAT("instruction_ptff", instruction_ptff),
119 	VCPU_STAT("instruction_stidp", instruction_stidp),
120 	VCPU_STAT("instruction_sck", instruction_sck),
121 	VCPU_STAT("instruction_sckpf", instruction_sckpf),
122 	VCPU_STAT("instruction_spx", instruction_spx),
123 	VCPU_STAT("instruction_stpx", instruction_stpx),
124 	VCPU_STAT("instruction_stap", instruction_stap),
125 	VCPU_STAT("instruction_iske", instruction_iske),
126 	VCPU_STAT("instruction_ri", instruction_ri),
127 	VCPU_STAT("instruction_rrbe", instruction_rrbe),
128 	VCPU_STAT("instruction_sske", instruction_sske),
129 	VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
130 	VCPU_STAT("instruction_essa", instruction_essa),
131 	VCPU_STAT("instruction_stsi", instruction_stsi),
132 	VCPU_STAT("instruction_stfl", instruction_stfl),
133 	VCPU_STAT("instruction_tb", instruction_tb),
134 	VCPU_STAT("instruction_tpi", instruction_tpi),
135 	VCPU_STAT("instruction_tprot", instruction_tprot),
136 	VCPU_STAT("instruction_tsch", instruction_tsch),
137 	VCPU_STAT("instruction_sthyi", instruction_sthyi),
138 	VCPU_STAT("instruction_sie", instruction_sie),
139 	VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
140 	VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
141 	VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
142 	VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
143 	VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
144 	VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
145 	VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
146 	VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
147 	VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
148 	VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
149 	VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
150 	VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
151 	VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
152 	VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
153 	VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
154 	VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
155 	VCPU_STAT("instruction_diag_10", diagnose_10),
156 	VCPU_STAT("instruction_diag_44", diagnose_44),
157 	VCPU_STAT("instruction_diag_9c", diagnose_9c),
158 	VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
159 	VCPU_STAT("instruction_diag_258", diagnose_258),
160 	VCPU_STAT("instruction_diag_308", diagnose_308),
161 	VCPU_STAT("instruction_diag_500", diagnose_500),
162 	VCPU_STAT("instruction_diag_other", diagnose_other),
163 	{ NULL }
164 };
165 
166 struct kvm_s390_tod_clock_ext {
167 	__u8 epoch_idx;
168 	__u64 tod;
169 	__u8 reserved[7];
170 } __packed;
171 
172 /* allow nested virtualization in KVM (if enabled by user space) */
173 static int nested;
174 module_param(nested, int, S_IRUGO);
175 MODULE_PARM_DESC(nested, "Nested virtualization support");
176 
177 /* allow 1m huge page guest backing, if !nested */
178 static int hpage;
179 module_param(hpage, int, 0444);
180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
181 
182 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
183 static u8 halt_poll_max_steal = 10;
184 module_param(halt_poll_max_steal, byte, 0644);
185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
186 
187 /* if set to true, the GISA will be initialized and used if available */
188 static bool use_gisa  = true;
189 module_param(use_gisa, bool, 0644);
190 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
191 
192 /*
193  * For now we handle at most 16 double words as this is what the s390 base
194  * kernel handles and stores in the prefix page. If we ever need to go beyond
195  * this, this requires changes to code, but the external uapi can stay.
196  */
197 #define SIZE_INTERNAL 16
198 
199 /*
200  * Base feature mask that defines default mask for facilities. Consists of the
201  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
202  */
203 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
204 /*
205  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
206  * and defines the facilities that can be enabled via a cpu model.
207  */
208 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
209 
210 static unsigned long kvm_s390_fac_size(void)
211 {
212 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
213 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
214 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
215 		sizeof(S390_lowcore.stfle_fac_list));
216 
217 	return SIZE_INTERNAL;
218 }
219 
220 /* available cpu features supported by kvm */
221 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
222 /* available subfunctions indicated via query / "test bit" */
223 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
224 
225 static struct gmap_notifier gmap_notifier;
226 static struct gmap_notifier vsie_gmap_notifier;
227 debug_info_t *kvm_s390_dbf;
228 debug_info_t *kvm_s390_dbf_uv;
229 
230 /* Section: not file related */
231 int kvm_arch_hardware_enable(void)
232 {
233 	/* every s390 is virtualization enabled ;-) */
234 	return 0;
235 }
236 
237 int kvm_arch_check_processor_compat(void *opaque)
238 {
239 	return 0;
240 }
241 
242 /* forward declarations */
243 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
244 			      unsigned long end);
245 static int sca_switch_to_extended(struct kvm *kvm);
246 
247 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
248 {
249 	u8 delta_idx = 0;
250 
251 	/*
252 	 * The TOD jumps by delta, we have to compensate this by adding
253 	 * -delta to the epoch.
254 	 */
255 	delta = -delta;
256 
257 	/* sign-extension - we're adding to signed values below */
258 	if ((s64)delta < 0)
259 		delta_idx = -1;
260 
261 	scb->epoch += delta;
262 	if (scb->ecd & ECD_MEF) {
263 		scb->epdx += delta_idx;
264 		if (scb->epoch < delta)
265 			scb->epdx += 1;
266 	}
267 }
268 
269 /*
270  * This callback is executed during stop_machine(). All CPUs are therefore
271  * temporarily stopped. In order not to change guest behavior, we have to
272  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
273  * so a CPU won't be stopped while calculating with the epoch.
274  */
275 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
276 			  void *v)
277 {
278 	struct kvm *kvm;
279 	struct kvm_vcpu *vcpu;
280 	int i;
281 	unsigned long long *delta = v;
282 
283 	list_for_each_entry(kvm, &vm_list, vm_list) {
284 		kvm_for_each_vcpu(i, vcpu, kvm) {
285 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
286 			if (i == 0) {
287 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
288 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
289 			}
290 			if (vcpu->arch.cputm_enabled)
291 				vcpu->arch.cputm_start += *delta;
292 			if (vcpu->arch.vsie_block)
293 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
294 						   *delta);
295 		}
296 	}
297 	return NOTIFY_OK;
298 }
299 
300 static struct notifier_block kvm_clock_notifier = {
301 	.notifier_call = kvm_clock_sync,
302 };
303 
304 int kvm_arch_hardware_setup(void *opaque)
305 {
306 	gmap_notifier.notifier_call = kvm_gmap_notifier;
307 	gmap_register_pte_notifier(&gmap_notifier);
308 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
309 	gmap_register_pte_notifier(&vsie_gmap_notifier);
310 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
311 				       &kvm_clock_notifier);
312 	return 0;
313 }
314 
315 void kvm_arch_hardware_unsetup(void)
316 {
317 	gmap_unregister_pte_notifier(&gmap_notifier);
318 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
319 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
320 					 &kvm_clock_notifier);
321 }
322 
323 static void allow_cpu_feat(unsigned long nr)
324 {
325 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
326 }
327 
328 static inline int plo_test_bit(unsigned char nr)
329 {
330 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
331 	int cc;
332 
333 	asm volatile(
334 		/* Parameter registers are ignored for "test bit" */
335 		"	plo	0,0,0,0(0)\n"
336 		"	ipm	%0\n"
337 		"	srl	%0,28\n"
338 		: "=d" (cc)
339 		: "d" (r0)
340 		: "cc");
341 	return cc == 0;
342 }
343 
344 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
345 {
346 	register unsigned long r0 asm("0") = 0;	/* query function */
347 	register unsigned long r1 asm("1") = (unsigned long) query;
348 
349 	asm volatile(
350 		/* Parameter regs are ignored */
351 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
352 		:
353 		: "d" (r0), "a" (r1), [opc] "i" (opcode)
354 		: "cc", "memory");
355 }
356 
357 #define INSN_SORTL 0xb938
358 #define INSN_DFLTCC 0xb939
359 
360 static void kvm_s390_cpu_feat_init(void)
361 {
362 	int i;
363 
364 	for (i = 0; i < 256; ++i) {
365 		if (plo_test_bit(i))
366 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
367 	}
368 
369 	if (test_facility(28)) /* TOD-clock steering */
370 		ptff(kvm_s390_available_subfunc.ptff,
371 		     sizeof(kvm_s390_available_subfunc.ptff),
372 		     PTFF_QAF);
373 
374 	if (test_facility(17)) { /* MSA */
375 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
376 			      kvm_s390_available_subfunc.kmac);
377 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
378 			      kvm_s390_available_subfunc.kmc);
379 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
380 			      kvm_s390_available_subfunc.km);
381 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
382 			      kvm_s390_available_subfunc.kimd);
383 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
384 			      kvm_s390_available_subfunc.klmd);
385 	}
386 	if (test_facility(76)) /* MSA3 */
387 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
388 			      kvm_s390_available_subfunc.pckmo);
389 	if (test_facility(77)) { /* MSA4 */
390 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
391 			      kvm_s390_available_subfunc.kmctr);
392 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
393 			      kvm_s390_available_subfunc.kmf);
394 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
395 			      kvm_s390_available_subfunc.kmo);
396 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
397 			      kvm_s390_available_subfunc.pcc);
398 	}
399 	if (test_facility(57)) /* MSA5 */
400 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
401 			      kvm_s390_available_subfunc.ppno);
402 
403 	if (test_facility(146)) /* MSA8 */
404 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
405 			      kvm_s390_available_subfunc.kma);
406 
407 	if (test_facility(155)) /* MSA9 */
408 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
409 			      kvm_s390_available_subfunc.kdsa);
410 
411 	if (test_facility(150)) /* SORTL */
412 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
413 
414 	if (test_facility(151)) /* DFLTCC */
415 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
416 
417 	if (MACHINE_HAS_ESOP)
418 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
419 	/*
420 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
421 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
422 	 */
423 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
424 	    !test_facility(3) || !nested)
425 		return;
426 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
427 	if (sclp.has_64bscao)
428 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
429 	if (sclp.has_siif)
430 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
431 	if (sclp.has_gpere)
432 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
433 	if (sclp.has_gsls)
434 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
435 	if (sclp.has_ib)
436 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
437 	if (sclp.has_cei)
438 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
439 	if (sclp.has_ibs)
440 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
441 	if (sclp.has_kss)
442 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
443 	/*
444 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
445 	 * all skey handling functions read/set the skey from the PGSTE
446 	 * instead of the real storage key.
447 	 *
448 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
449 	 * pages being detected as preserved although they are resident.
450 	 *
451 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
452 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
453 	 *
454 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
455 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
456 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
457 	 *
458 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
459 	 * cannot easily shadow the SCA because of the ipte lock.
460 	 */
461 }
462 
463 int kvm_arch_init(void *opaque)
464 {
465 	int rc = -ENOMEM;
466 
467 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
468 	if (!kvm_s390_dbf)
469 		return -ENOMEM;
470 
471 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
472 	if (!kvm_s390_dbf_uv)
473 		goto out;
474 
475 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
476 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
477 		goto out;
478 
479 	kvm_s390_cpu_feat_init();
480 
481 	/* Register floating interrupt controller interface. */
482 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
483 	if (rc) {
484 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
485 		goto out;
486 	}
487 
488 	rc = kvm_s390_gib_init(GAL_ISC);
489 	if (rc)
490 		goto out;
491 
492 	return 0;
493 
494 out:
495 	kvm_arch_exit();
496 	return rc;
497 }
498 
499 void kvm_arch_exit(void)
500 {
501 	kvm_s390_gib_destroy();
502 	debug_unregister(kvm_s390_dbf);
503 	debug_unregister(kvm_s390_dbf_uv);
504 }
505 
506 /* Section: device related */
507 long kvm_arch_dev_ioctl(struct file *filp,
508 			unsigned int ioctl, unsigned long arg)
509 {
510 	if (ioctl == KVM_S390_ENABLE_SIE)
511 		return s390_enable_sie();
512 	return -EINVAL;
513 }
514 
515 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
516 {
517 	int r;
518 
519 	switch (ext) {
520 	case KVM_CAP_S390_PSW:
521 	case KVM_CAP_S390_GMAP:
522 	case KVM_CAP_SYNC_MMU:
523 #ifdef CONFIG_KVM_S390_UCONTROL
524 	case KVM_CAP_S390_UCONTROL:
525 #endif
526 	case KVM_CAP_ASYNC_PF:
527 	case KVM_CAP_SYNC_REGS:
528 	case KVM_CAP_ONE_REG:
529 	case KVM_CAP_ENABLE_CAP:
530 	case KVM_CAP_S390_CSS_SUPPORT:
531 	case KVM_CAP_IOEVENTFD:
532 	case KVM_CAP_DEVICE_CTRL:
533 	case KVM_CAP_S390_IRQCHIP:
534 	case KVM_CAP_VM_ATTRIBUTES:
535 	case KVM_CAP_MP_STATE:
536 	case KVM_CAP_IMMEDIATE_EXIT:
537 	case KVM_CAP_S390_INJECT_IRQ:
538 	case KVM_CAP_S390_USER_SIGP:
539 	case KVM_CAP_S390_USER_STSI:
540 	case KVM_CAP_S390_SKEYS:
541 	case KVM_CAP_S390_IRQ_STATE:
542 	case KVM_CAP_S390_USER_INSTR0:
543 	case KVM_CAP_S390_CMMA_MIGRATION:
544 	case KVM_CAP_S390_AIS:
545 	case KVM_CAP_S390_AIS_MIGRATION:
546 	case KVM_CAP_S390_VCPU_RESETS:
547 	case KVM_CAP_SET_GUEST_DEBUG:
548 		r = 1;
549 		break;
550 	case KVM_CAP_S390_HPAGE_1M:
551 		r = 0;
552 		if (hpage && !kvm_is_ucontrol(kvm))
553 			r = 1;
554 		break;
555 	case KVM_CAP_S390_MEM_OP:
556 		r = MEM_OP_MAX_SIZE;
557 		break;
558 	case KVM_CAP_NR_VCPUS:
559 	case KVM_CAP_MAX_VCPUS:
560 	case KVM_CAP_MAX_VCPU_ID:
561 		r = KVM_S390_BSCA_CPU_SLOTS;
562 		if (!kvm_s390_use_sca_entries())
563 			r = KVM_MAX_VCPUS;
564 		else if (sclp.has_esca && sclp.has_64bscao)
565 			r = KVM_S390_ESCA_CPU_SLOTS;
566 		break;
567 	case KVM_CAP_S390_COW:
568 		r = MACHINE_HAS_ESOP;
569 		break;
570 	case KVM_CAP_S390_VECTOR_REGISTERS:
571 		r = MACHINE_HAS_VX;
572 		break;
573 	case KVM_CAP_S390_RI:
574 		r = test_facility(64);
575 		break;
576 	case KVM_CAP_S390_GS:
577 		r = test_facility(133);
578 		break;
579 	case KVM_CAP_S390_BPB:
580 		r = test_facility(82);
581 		break;
582 	case KVM_CAP_S390_PROTECTED:
583 		r = is_prot_virt_host();
584 		break;
585 	default:
586 		r = 0;
587 	}
588 	return r;
589 }
590 
591 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
592 {
593 	int i;
594 	gfn_t cur_gfn, last_gfn;
595 	unsigned long gaddr, vmaddr;
596 	struct gmap *gmap = kvm->arch.gmap;
597 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
598 
599 	/* Loop over all guest segments */
600 	cur_gfn = memslot->base_gfn;
601 	last_gfn = memslot->base_gfn + memslot->npages;
602 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
603 		gaddr = gfn_to_gpa(cur_gfn);
604 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
605 		if (kvm_is_error_hva(vmaddr))
606 			continue;
607 
608 		bitmap_zero(bitmap, _PAGE_ENTRIES);
609 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
610 		for (i = 0; i < _PAGE_ENTRIES; i++) {
611 			if (test_bit(i, bitmap))
612 				mark_page_dirty(kvm, cur_gfn + i);
613 		}
614 
615 		if (fatal_signal_pending(current))
616 			return;
617 		cond_resched();
618 	}
619 }
620 
621 /* Section: vm related */
622 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
623 
624 /*
625  * Get (and clear) the dirty memory log for a memory slot.
626  */
627 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
628 			       struct kvm_dirty_log *log)
629 {
630 	int r;
631 	unsigned long n;
632 	struct kvm_memory_slot *memslot;
633 	int is_dirty;
634 
635 	if (kvm_is_ucontrol(kvm))
636 		return -EINVAL;
637 
638 	mutex_lock(&kvm->slots_lock);
639 
640 	r = -EINVAL;
641 	if (log->slot >= KVM_USER_MEM_SLOTS)
642 		goto out;
643 
644 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
645 	if (r)
646 		goto out;
647 
648 	/* Clear the dirty log */
649 	if (is_dirty) {
650 		n = kvm_dirty_bitmap_bytes(memslot);
651 		memset(memslot->dirty_bitmap, 0, n);
652 	}
653 	r = 0;
654 out:
655 	mutex_unlock(&kvm->slots_lock);
656 	return r;
657 }
658 
659 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
660 {
661 	unsigned int i;
662 	struct kvm_vcpu *vcpu;
663 
664 	kvm_for_each_vcpu(i, vcpu, kvm) {
665 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
666 	}
667 }
668 
669 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
670 {
671 	int r;
672 
673 	if (cap->flags)
674 		return -EINVAL;
675 
676 	switch (cap->cap) {
677 	case KVM_CAP_S390_IRQCHIP:
678 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
679 		kvm->arch.use_irqchip = 1;
680 		r = 0;
681 		break;
682 	case KVM_CAP_S390_USER_SIGP:
683 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
684 		kvm->arch.user_sigp = 1;
685 		r = 0;
686 		break;
687 	case KVM_CAP_S390_VECTOR_REGISTERS:
688 		mutex_lock(&kvm->lock);
689 		if (kvm->created_vcpus) {
690 			r = -EBUSY;
691 		} else if (MACHINE_HAS_VX) {
692 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
693 			set_kvm_facility(kvm->arch.model.fac_list, 129);
694 			if (test_facility(134)) {
695 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
696 				set_kvm_facility(kvm->arch.model.fac_list, 134);
697 			}
698 			if (test_facility(135)) {
699 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
700 				set_kvm_facility(kvm->arch.model.fac_list, 135);
701 			}
702 			if (test_facility(148)) {
703 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
704 				set_kvm_facility(kvm->arch.model.fac_list, 148);
705 			}
706 			if (test_facility(152)) {
707 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
708 				set_kvm_facility(kvm->arch.model.fac_list, 152);
709 			}
710 			r = 0;
711 		} else
712 			r = -EINVAL;
713 		mutex_unlock(&kvm->lock);
714 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
715 			 r ? "(not available)" : "(success)");
716 		break;
717 	case KVM_CAP_S390_RI:
718 		r = -EINVAL;
719 		mutex_lock(&kvm->lock);
720 		if (kvm->created_vcpus) {
721 			r = -EBUSY;
722 		} else if (test_facility(64)) {
723 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
724 			set_kvm_facility(kvm->arch.model.fac_list, 64);
725 			r = 0;
726 		}
727 		mutex_unlock(&kvm->lock);
728 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
729 			 r ? "(not available)" : "(success)");
730 		break;
731 	case KVM_CAP_S390_AIS:
732 		mutex_lock(&kvm->lock);
733 		if (kvm->created_vcpus) {
734 			r = -EBUSY;
735 		} else {
736 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
737 			set_kvm_facility(kvm->arch.model.fac_list, 72);
738 			r = 0;
739 		}
740 		mutex_unlock(&kvm->lock);
741 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
742 			 r ? "(not available)" : "(success)");
743 		break;
744 	case KVM_CAP_S390_GS:
745 		r = -EINVAL;
746 		mutex_lock(&kvm->lock);
747 		if (kvm->created_vcpus) {
748 			r = -EBUSY;
749 		} else if (test_facility(133)) {
750 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
751 			set_kvm_facility(kvm->arch.model.fac_list, 133);
752 			r = 0;
753 		}
754 		mutex_unlock(&kvm->lock);
755 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
756 			 r ? "(not available)" : "(success)");
757 		break;
758 	case KVM_CAP_S390_HPAGE_1M:
759 		mutex_lock(&kvm->lock);
760 		if (kvm->created_vcpus)
761 			r = -EBUSY;
762 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
763 			r = -EINVAL;
764 		else {
765 			r = 0;
766 			mmap_write_lock(kvm->mm);
767 			kvm->mm->context.allow_gmap_hpage_1m = 1;
768 			mmap_write_unlock(kvm->mm);
769 			/*
770 			 * We might have to create fake 4k page
771 			 * tables. To avoid that the hardware works on
772 			 * stale PGSTEs, we emulate these instructions.
773 			 */
774 			kvm->arch.use_skf = 0;
775 			kvm->arch.use_pfmfi = 0;
776 		}
777 		mutex_unlock(&kvm->lock);
778 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
779 			 r ? "(not available)" : "(success)");
780 		break;
781 	case KVM_CAP_S390_USER_STSI:
782 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
783 		kvm->arch.user_stsi = 1;
784 		r = 0;
785 		break;
786 	case KVM_CAP_S390_USER_INSTR0:
787 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
788 		kvm->arch.user_instr0 = 1;
789 		icpt_operexc_on_all_vcpus(kvm);
790 		r = 0;
791 		break;
792 	default:
793 		r = -EINVAL;
794 		break;
795 	}
796 	return r;
797 }
798 
799 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
800 {
801 	int ret;
802 
803 	switch (attr->attr) {
804 	case KVM_S390_VM_MEM_LIMIT_SIZE:
805 		ret = 0;
806 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
807 			 kvm->arch.mem_limit);
808 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
809 			ret = -EFAULT;
810 		break;
811 	default:
812 		ret = -ENXIO;
813 		break;
814 	}
815 	return ret;
816 }
817 
818 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
819 {
820 	int ret;
821 	unsigned int idx;
822 	switch (attr->attr) {
823 	case KVM_S390_VM_MEM_ENABLE_CMMA:
824 		ret = -ENXIO;
825 		if (!sclp.has_cmma)
826 			break;
827 
828 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
829 		mutex_lock(&kvm->lock);
830 		if (kvm->created_vcpus)
831 			ret = -EBUSY;
832 		else if (kvm->mm->context.allow_gmap_hpage_1m)
833 			ret = -EINVAL;
834 		else {
835 			kvm->arch.use_cmma = 1;
836 			/* Not compatible with cmma. */
837 			kvm->arch.use_pfmfi = 0;
838 			ret = 0;
839 		}
840 		mutex_unlock(&kvm->lock);
841 		break;
842 	case KVM_S390_VM_MEM_CLR_CMMA:
843 		ret = -ENXIO;
844 		if (!sclp.has_cmma)
845 			break;
846 		ret = -EINVAL;
847 		if (!kvm->arch.use_cmma)
848 			break;
849 
850 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
851 		mutex_lock(&kvm->lock);
852 		idx = srcu_read_lock(&kvm->srcu);
853 		s390_reset_cmma(kvm->arch.gmap->mm);
854 		srcu_read_unlock(&kvm->srcu, idx);
855 		mutex_unlock(&kvm->lock);
856 		ret = 0;
857 		break;
858 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
859 		unsigned long new_limit;
860 
861 		if (kvm_is_ucontrol(kvm))
862 			return -EINVAL;
863 
864 		if (get_user(new_limit, (u64 __user *)attr->addr))
865 			return -EFAULT;
866 
867 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
868 		    new_limit > kvm->arch.mem_limit)
869 			return -E2BIG;
870 
871 		if (!new_limit)
872 			return -EINVAL;
873 
874 		/* gmap_create takes last usable address */
875 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
876 			new_limit -= 1;
877 
878 		ret = -EBUSY;
879 		mutex_lock(&kvm->lock);
880 		if (!kvm->created_vcpus) {
881 			/* gmap_create will round the limit up */
882 			struct gmap *new = gmap_create(current->mm, new_limit);
883 
884 			if (!new) {
885 				ret = -ENOMEM;
886 			} else {
887 				gmap_remove(kvm->arch.gmap);
888 				new->private = kvm;
889 				kvm->arch.gmap = new;
890 				ret = 0;
891 			}
892 		}
893 		mutex_unlock(&kvm->lock);
894 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
895 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
896 			 (void *) kvm->arch.gmap->asce);
897 		break;
898 	}
899 	default:
900 		ret = -ENXIO;
901 		break;
902 	}
903 	return ret;
904 }
905 
906 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
907 
908 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
909 {
910 	struct kvm_vcpu *vcpu;
911 	int i;
912 
913 	kvm_s390_vcpu_block_all(kvm);
914 
915 	kvm_for_each_vcpu(i, vcpu, kvm) {
916 		kvm_s390_vcpu_crypto_setup(vcpu);
917 		/* recreate the shadow crycb by leaving the VSIE handler */
918 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
919 	}
920 
921 	kvm_s390_vcpu_unblock_all(kvm);
922 }
923 
924 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
925 {
926 	mutex_lock(&kvm->lock);
927 	switch (attr->attr) {
928 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
929 		if (!test_kvm_facility(kvm, 76)) {
930 			mutex_unlock(&kvm->lock);
931 			return -EINVAL;
932 		}
933 		get_random_bytes(
934 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
935 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
936 		kvm->arch.crypto.aes_kw = 1;
937 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
938 		break;
939 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
940 		if (!test_kvm_facility(kvm, 76)) {
941 			mutex_unlock(&kvm->lock);
942 			return -EINVAL;
943 		}
944 		get_random_bytes(
945 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
946 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
947 		kvm->arch.crypto.dea_kw = 1;
948 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
949 		break;
950 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
951 		if (!test_kvm_facility(kvm, 76)) {
952 			mutex_unlock(&kvm->lock);
953 			return -EINVAL;
954 		}
955 		kvm->arch.crypto.aes_kw = 0;
956 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
957 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
958 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
959 		break;
960 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
961 		if (!test_kvm_facility(kvm, 76)) {
962 			mutex_unlock(&kvm->lock);
963 			return -EINVAL;
964 		}
965 		kvm->arch.crypto.dea_kw = 0;
966 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
967 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
968 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
969 		break;
970 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
971 		if (!ap_instructions_available()) {
972 			mutex_unlock(&kvm->lock);
973 			return -EOPNOTSUPP;
974 		}
975 		kvm->arch.crypto.apie = 1;
976 		break;
977 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
978 		if (!ap_instructions_available()) {
979 			mutex_unlock(&kvm->lock);
980 			return -EOPNOTSUPP;
981 		}
982 		kvm->arch.crypto.apie = 0;
983 		break;
984 	default:
985 		mutex_unlock(&kvm->lock);
986 		return -ENXIO;
987 	}
988 
989 	kvm_s390_vcpu_crypto_reset_all(kvm);
990 	mutex_unlock(&kvm->lock);
991 	return 0;
992 }
993 
994 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
995 {
996 	int cx;
997 	struct kvm_vcpu *vcpu;
998 
999 	kvm_for_each_vcpu(cx, vcpu, kvm)
1000 		kvm_s390_sync_request(req, vcpu);
1001 }
1002 
1003 /*
1004  * Must be called with kvm->srcu held to avoid races on memslots, and with
1005  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1006  */
1007 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1008 {
1009 	struct kvm_memory_slot *ms;
1010 	struct kvm_memslots *slots;
1011 	unsigned long ram_pages = 0;
1012 	int slotnr;
1013 
1014 	/* migration mode already enabled */
1015 	if (kvm->arch.migration_mode)
1016 		return 0;
1017 	slots = kvm_memslots(kvm);
1018 	if (!slots || !slots->used_slots)
1019 		return -EINVAL;
1020 
1021 	if (!kvm->arch.use_cmma) {
1022 		kvm->arch.migration_mode = 1;
1023 		return 0;
1024 	}
1025 	/* mark all the pages in active slots as dirty */
1026 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1027 		ms = slots->memslots + slotnr;
1028 		if (!ms->dirty_bitmap)
1029 			return -EINVAL;
1030 		/*
1031 		 * The second half of the bitmap is only used on x86,
1032 		 * and would be wasted otherwise, so we put it to good
1033 		 * use here to keep track of the state of the storage
1034 		 * attributes.
1035 		 */
1036 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1037 		ram_pages += ms->npages;
1038 	}
1039 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1040 	kvm->arch.migration_mode = 1;
1041 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1042 	return 0;
1043 }
1044 
1045 /*
1046  * Must be called with kvm->slots_lock to avoid races with ourselves and
1047  * kvm_s390_vm_start_migration.
1048  */
1049 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1050 {
1051 	/* migration mode already disabled */
1052 	if (!kvm->arch.migration_mode)
1053 		return 0;
1054 	kvm->arch.migration_mode = 0;
1055 	if (kvm->arch.use_cmma)
1056 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1057 	return 0;
1058 }
1059 
1060 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1061 				     struct kvm_device_attr *attr)
1062 {
1063 	int res = -ENXIO;
1064 
1065 	mutex_lock(&kvm->slots_lock);
1066 	switch (attr->attr) {
1067 	case KVM_S390_VM_MIGRATION_START:
1068 		res = kvm_s390_vm_start_migration(kvm);
1069 		break;
1070 	case KVM_S390_VM_MIGRATION_STOP:
1071 		res = kvm_s390_vm_stop_migration(kvm);
1072 		break;
1073 	default:
1074 		break;
1075 	}
1076 	mutex_unlock(&kvm->slots_lock);
1077 
1078 	return res;
1079 }
1080 
1081 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1082 				     struct kvm_device_attr *attr)
1083 {
1084 	u64 mig = kvm->arch.migration_mode;
1085 
1086 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1087 		return -ENXIO;
1088 
1089 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1090 		return -EFAULT;
1091 	return 0;
1092 }
1093 
1094 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1095 {
1096 	struct kvm_s390_vm_tod_clock gtod;
1097 
1098 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1099 		return -EFAULT;
1100 
1101 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1102 		return -EINVAL;
1103 	kvm_s390_set_tod_clock(kvm, &gtod);
1104 
1105 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1106 		gtod.epoch_idx, gtod.tod);
1107 
1108 	return 0;
1109 }
1110 
1111 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1112 {
1113 	u8 gtod_high;
1114 
1115 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1116 					   sizeof(gtod_high)))
1117 		return -EFAULT;
1118 
1119 	if (gtod_high != 0)
1120 		return -EINVAL;
1121 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1122 
1123 	return 0;
1124 }
1125 
1126 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1127 {
1128 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1129 
1130 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1131 			   sizeof(gtod.tod)))
1132 		return -EFAULT;
1133 
1134 	kvm_s390_set_tod_clock(kvm, &gtod);
1135 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1136 	return 0;
1137 }
1138 
1139 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1140 {
1141 	int ret;
1142 
1143 	if (attr->flags)
1144 		return -EINVAL;
1145 
1146 	switch (attr->attr) {
1147 	case KVM_S390_VM_TOD_EXT:
1148 		ret = kvm_s390_set_tod_ext(kvm, attr);
1149 		break;
1150 	case KVM_S390_VM_TOD_HIGH:
1151 		ret = kvm_s390_set_tod_high(kvm, attr);
1152 		break;
1153 	case KVM_S390_VM_TOD_LOW:
1154 		ret = kvm_s390_set_tod_low(kvm, attr);
1155 		break;
1156 	default:
1157 		ret = -ENXIO;
1158 		break;
1159 	}
1160 	return ret;
1161 }
1162 
1163 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1164 				   struct kvm_s390_vm_tod_clock *gtod)
1165 {
1166 	struct kvm_s390_tod_clock_ext htod;
1167 
1168 	preempt_disable();
1169 
1170 	get_tod_clock_ext((char *)&htod);
1171 
1172 	gtod->tod = htod.tod + kvm->arch.epoch;
1173 	gtod->epoch_idx = 0;
1174 	if (test_kvm_facility(kvm, 139)) {
1175 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1176 		if (gtod->tod < htod.tod)
1177 			gtod->epoch_idx += 1;
1178 	}
1179 
1180 	preempt_enable();
1181 }
1182 
1183 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1184 {
1185 	struct kvm_s390_vm_tod_clock gtod;
1186 
1187 	memset(&gtod, 0, sizeof(gtod));
1188 	kvm_s390_get_tod_clock(kvm, &gtod);
1189 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1190 		return -EFAULT;
1191 
1192 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1193 		gtod.epoch_idx, gtod.tod);
1194 	return 0;
1195 }
1196 
1197 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1198 {
1199 	u8 gtod_high = 0;
1200 
1201 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1202 					 sizeof(gtod_high)))
1203 		return -EFAULT;
1204 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1205 
1206 	return 0;
1207 }
1208 
1209 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1210 {
1211 	u64 gtod;
1212 
1213 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1214 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1215 		return -EFAULT;
1216 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1217 
1218 	return 0;
1219 }
1220 
1221 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1222 {
1223 	int ret;
1224 
1225 	if (attr->flags)
1226 		return -EINVAL;
1227 
1228 	switch (attr->attr) {
1229 	case KVM_S390_VM_TOD_EXT:
1230 		ret = kvm_s390_get_tod_ext(kvm, attr);
1231 		break;
1232 	case KVM_S390_VM_TOD_HIGH:
1233 		ret = kvm_s390_get_tod_high(kvm, attr);
1234 		break;
1235 	case KVM_S390_VM_TOD_LOW:
1236 		ret = kvm_s390_get_tod_low(kvm, attr);
1237 		break;
1238 	default:
1239 		ret = -ENXIO;
1240 		break;
1241 	}
1242 	return ret;
1243 }
1244 
1245 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1246 {
1247 	struct kvm_s390_vm_cpu_processor *proc;
1248 	u16 lowest_ibc, unblocked_ibc;
1249 	int ret = 0;
1250 
1251 	mutex_lock(&kvm->lock);
1252 	if (kvm->created_vcpus) {
1253 		ret = -EBUSY;
1254 		goto out;
1255 	}
1256 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1257 	if (!proc) {
1258 		ret = -ENOMEM;
1259 		goto out;
1260 	}
1261 	if (!copy_from_user(proc, (void __user *)attr->addr,
1262 			    sizeof(*proc))) {
1263 		kvm->arch.model.cpuid = proc->cpuid;
1264 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1265 		unblocked_ibc = sclp.ibc & 0xfff;
1266 		if (lowest_ibc && proc->ibc) {
1267 			if (proc->ibc > unblocked_ibc)
1268 				kvm->arch.model.ibc = unblocked_ibc;
1269 			else if (proc->ibc < lowest_ibc)
1270 				kvm->arch.model.ibc = lowest_ibc;
1271 			else
1272 				kvm->arch.model.ibc = proc->ibc;
1273 		}
1274 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1275 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1276 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1277 			 kvm->arch.model.ibc,
1278 			 kvm->arch.model.cpuid);
1279 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1280 			 kvm->arch.model.fac_list[0],
1281 			 kvm->arch.model.fac_list[1],
1282 			 kvm->arch.model.fac_list[2]);
1283 	} else
1284 		ret = -EFAULT;
1285 	kfree(proc);
1286 out:
1287 	mutex_unlock(&kvm->lock);
1288 	return ret;
1289 }
1290 
1291 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1292 				       struct kvm_device_attr *attr)
1293 {
1294 	struct kvm_s390_vm_cpu_feat data;
1295 
1296 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1297 		return -EFAULT;
1298 	if (!bitmap_subset((unsigned long *) data.feat,
1299 			   kvm_s390_available_cpu_feat,
1300 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1301 		return -EINVAL;
1302 
1303 	mutex_lock(&kvm->lock);
1304 	if (kvm->created_vcpus) {
1305 		mutex_unlock(&kvm->lock);
1306 		return -EBUSY;
1307 	}
1308 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1309 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1310 	mutex_unlock(&kvm->lock);
1311 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1312 			 data.feat[0],
1313 			 data.feat[1],
1314 			 data.feat[2]);
1315 	return 0;
1316 }
1317 
1318 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1319 					  struct kvm_device_attr *attr)
1320 {
1321 	mutex_lock(&kvm->lock);
1322 	if (kvm->created_vcpus) {
1323 		mutex_unlock(&kvm->lock);
1324 		return -EBUSY;
1325 	}
1326 
1327 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1328 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1329 		mutex_unlock(&kvm->lock);
1330 		return -EFAULT;
1331 	}
1332 	mutex_unlock(&kvm->lock);
1333 
1334 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1335 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1336 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1337 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1338 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1339 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1340 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1341 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1342 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1343 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1344 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1345 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1346 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1347 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1348 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1349 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1350 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1351 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1352 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1353 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1354 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1355 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1356 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1357 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1358 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1359 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1360 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1361 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1363 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1366 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1369 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1372 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1375 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1378 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1381 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1382 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1383 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1384 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1385 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1386 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1387 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1388 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1389 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1390 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1391 
1392 	return 0;
1393 }
1394 
1395 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1396 {
1397 	int ret = -ENXIO;
1398 
1399 	switch (attr->attr) {
1400 	case KVM_S390_VM_CPU_PROCESSOR:
1401 		ret = kvm_s390_set_processor(kvm, attr);
1402 		break;
1403 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1404 		ret = kvm_s390_set_processor_feat(kvm, attr);
1405 		break;
1406 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1407 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1408 		break;
1409 	}
1410 	return ret;
1411 }
1412 
1413 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1414 {
1415 	struct kvm_s390_vm_cpu_processor *proc;
1416 	int ret = 0;
1417 
1418 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1419 	if (!proc) {
1420 		ret = -ENOMEM;
1421 		goto out;
1422 	}
1423 	proc->cpuid = kvm->arch.model.cpuid;
1424 	proc->ibc = kvm->arch.model.ibc;
1425 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1426 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1427 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1428 		 kvm->arch.model.ibc,
1429 		 kvm->arch.model.cpuid);
1430 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1431 		 kvm->arch.model.fac_list[0],
1432 		 kvm->arch.model.fac_list[1],
1433 		 kvm->arch.model.fac_list[2]);
1434 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1435 		ret = -EFAULT;
1436 	kfree(proc);
1437 out:
1438 	return ret;
1439 }
1440 
1441 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1442 {
1443 	struct kvm_s390_vm_cpu_machine *mach;
1444 	int ret = 0;
1445 
1446 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1447 	if (!mach) {
1448 		ret = -ENOMEM;
1449 		goto out;
1450 	}
1451 	get_cpu_id((struct cpuid *) &mach->cpuid);
1452 	mach->ibc = sclp.ibc;
1453 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1454 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1455 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1456 	       sizeof(S390_lowcore.stfle_fac_list));
1457 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1458 		 kvm->arch.model.ibc,
1459 		 kvm->arch.model.cpuid);
1460 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1461 		 mach->fac_mask[0],
1462 		 mach->fac_mask[1],
1463 		 mach->fac_mask[2]);
1464 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1465 		 mach->fac_list[0],
1466 		 mach->fac_list[1],
1467 		 mach->fac_list[2]);
1468 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1469 		ret = -EFAULT;
1470 	kfree(mach);
1471 out:
1472 	return ret;
1473 }
1474 
1475 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1476 				       struct kvm_device_attr *attr)
1477 {
1478 	struct kvm_s390_vm_cpu_feat data;
1479 
1480 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1481 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1482 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1483 		return -EFAULT;
1484 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1485 			 data.feat[0],
1486 			 data.feat[1],
1487 			 data.feat[2]);
1488 	return 0;
1489 }
1490 
1491 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1492 				     struct kvm_device_attr *attr)
1493 {
1494 	struct kvm_s390_vm_cpu_feat data;
1495 
1496 	bitmap_copy((unsigned long *) data.feat,
1497 		    kvm_s390_available_cpu_feat,
1498 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1499 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1500 		return -EFAULT;
1501 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1502 			 data.feat[0],
1503 			 data.feat[1],
1504 			 data.feat[2]);
1505 	return 0;
1506 }
1507 
1508 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1509 					  struct kvm_device_attr *attr)
1510 {
1511 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1512 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1513 		return -EFAULT;
1514 
1515 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1516 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1517 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1518 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1519 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1520 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1521 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1522 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1523 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1524 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1525 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1526 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1527 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1528 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1529 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1530 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1531 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1532 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1533 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1534 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1535 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1536 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1537 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1538 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1539 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1540 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1541 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1542 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1544 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1547 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1550 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1553 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1556 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1559 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1562 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1563 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1564 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1565 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1566 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1567 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1568 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1569 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1570 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1571 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1572 
1573 	return 0;
1574 }
1575 
1576 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1577 					struct kvm_device_attr *attr)
1578 {
1579 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1580 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1581 		return -EFAULT;
1582 
1583 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1584 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1585 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1586 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1587 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1588 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1589 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1590 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1591 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1592 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1593 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1594 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1595 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1596 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1597 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1598 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1599 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1600 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1601 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1602 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1603 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1604 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1605 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1606 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1607 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1608 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1609 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1610 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1612 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1615 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1618 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1621 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1624 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1627 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1630 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1631 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1632 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1633 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1634 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1635 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1636 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1637 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1638 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1639 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1640 
1641 	return 0;
1642 }
1643 
1644 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1645 {
1646 	int ret = -ENXIO;
1647 
1648 	switch (attr->attr) {
1649 	case KVM_S390_VM_CPU_PROCESSOR:
1650 		ret = kvm_s390_get_processor(kvm, attr);
1651 		break;
1652 	case KVM_S390_VM_CPU_MACHINE:
1653 		ret = kvm_s390_get_machine(kvm, attr);
1654 		break;
1655 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1656 		ret = kvm_s390_get_processor_feat(kvm, attr);
1657 		break;
1658 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1659 		ret = kvm_s390_get_machine_feat(kvm, attr);
1660 		break;
1661 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1662 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1663 		break;
1664 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1665 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1666 		break;
1667 	}
1668 	return ret;
1669 }
1670 
1671 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1672 {
1673 	int ret;
1674 
1675 	switch (attr->group) {
1676 	case KVM_S390_VM_MEM_CTRL:
1677 		ret = kvm_s390_set_mem_control(kvm, attr);
1678 		break;
1679 	case KVM_S390_VM_TOD:
1680 		ret = kvm_s390_set_tod(kvm, attr);
1681 		break;
1682 	case KVM_S390_VM_CPU_MODEL:
1683 		ret = kvm_s390_set_cpu_model(kvm, attr);
1684 		break;
1685 	case KVM_S390_VM_CRYPTO:
1686 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1687 		break;
1688 	case KVM_S390_VM_MIGRATION:
1689 		ret = kvm_s390_vm_set_migration(kvm, attr);
1690 		break;
1691 	default:
1692 		ret = -ENXIO;
1693 		break;
1694 	}
1695 
1696 	return ret;
1697 }
1698 
1699 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1700 {
1701 	int ret;
1702 
1703 	switch (attr->group) {
1704 	case KVM_S390_VM_MEM_CTRL:
1705 		ret = kvm_s390_get_mem_control(kvm, attr);
1706 		break;
1707 	case KVM_S390_VM_TOD:
1708 		ret = kvm_s390_get_tod(kvm, attr);
1709 		break;
1710 	case KVM_S390_VM_CPU_MODEL:
1711 		ret = kvm_s390_get_cpu_model(kvm, attr);
1712 		break;
1713 	case KVM_S390_VM_MIGRATION:
1714 		ret = kvm_s390_vm_get_migration(kvm, attr);
1715 		break;
1716 	default:
1717 		ret = -ENXIO;
1718 		break;
1719 	}
1720 
1721 	return ret;
1722 }
1723 
1724 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1725 {
1726 	int ret;
1727 
1728 	switch (attr->group) {
1729 	case KVM_S390_VM_MEM_CTRL:
1730 		switch (attr->attr) {
1731 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1732 		case KVM_S390_VM_MEM_CLR_CMMA:
1733 			ret = sclp.has_cmma ? 0 : -ENXIO;
1734 			break;
1735 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1736 			ret = 0;
1737 			break;
1738 		default:
1739 			ret = -ENXIO;
1740 			break;
1741 		}
1742 		break;
1743 	case KVM_S390_VM_TOD:
1744 		switch (attr->attr) {
1745 		case KVM_S390_VM_TOD_LOW:
1746 		case KVM_S390_VM_TOD_HIGH:
1747 			ret = 0;
1748 			break;
1749 		default:
1750 			ret = -ENXIO;
1751 			break;
1752 		}
1753 		break;
1754 	case KVM_S390_VM_CPU_MODEL:
1755 		switch (attr->attr) {
1756 		case KVM_S390_VM_CPU_PROCESSOR:
1757 		case KVM_S390_VM_CPU_MACHINE:
1758 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1759 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1760 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1761 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1762 			ret = 0;
1763 			break;
1764 		default:
1765 			ret = -ENXIO;
1766 			break;
1767 		}
1768 		break;
1769 	case KVM_S390_VM_CRYPTO:
1770 		switch (attr->attr) {
1771 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1772 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1773 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1774 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1775 			ret = 0;
1776 			break;
1777 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1778 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1779 			ret = ap_instructions_available() ? 0 : -ENXIO;
1780 			break;
1781 		default:
1782 			ret = -ENXIO;
1783 			break;
1784 		}
1785 		break;
1786 	case KVM_S390_VM_MIGRATION:
1787 		ret = 0;
1788 		break;
1789 	default:
1790 		ret = -ENXIO;
1791 		break;
1792 	}
1793 
1794 	return ret;
1795 }
1796 
1797 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1798 {
1799 	uint8_t *keys;
1800 	uint64_t hva;
1801 	int srcu_idx, i, r = 0;
1802 
1803 	if (args->flags != 0)
1804 		return -EINVAL;
1805 
1806 	/* Is this guest using storage keys? */
1807 	if (!mm_uses_skeys(current->mm))
1808 		return KVM_S390_GET_SKEYS_NONE;
1809 
1810 	/* Enforce sane limit on memory allocation */
1811 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1812 		return -EINVAL;
1813 
1814 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1815 	if (!keys)
1816 		return -ENOMEM;
1817 
1818 	mmap_read_lock(current->mm);
1819 	srcu_idx = srcu_read_lock(&kvm->srcu);
1820 	for (i = 0; i < args->count; i++) {
1821 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1822 		if (kvm_is_error_hva(hva)) {
1823 			r = -EFAULT;
1824 			break;
1825 		}
1826 
1827 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1828 		if (r)
1829 			break;
1830 	}
1831 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1832 	mmap_read_unlock(current->mm);
1833 
1834 	if (!r) {
1835 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1836 				 sizeof(uint8_t) * args->count);
1837 		if (r)
1838 			r = -EFAULT;
1839 	}
1840 
1841 	kvfree(keys);
1842 	return r;
1843 }
1844 
1845 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1846 {
1847 	uint8_t *keys;
1848 	uint64_t hva;
1849 	int srcu_idx, i, r = 0;
1850 	bool unlocked;
1851 
1852 	if (args->flags != 0)
1853 		return -EINVAL;
1854 
1855 	/* Enforce sane limit on memory allocation */
1856 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1857 		return -EINVAL;
1858 
1859 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1860 	if (!keys)
1861 		return -ENOMEM;
1862 
1863 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1864 			   sizeof(uint8_t) * args->count);
1865 	if (r) {
1866 		r = -EFAULT;
1867 		goto out;
1868 	}
1869 
1870 	/* Enable storage key handling for the guest */
1871 	r = s390_enable_skey();
1872 	if (r)
1873 		goto out;
1874 
1875 	i = 0;
1876 	mmap_read_lock(current->mm);
1877 	srcu_idx = srcu_read_lock(&kvm->srcu);
1878         while (i < args->count) {
1879 		unlocked = false;
1880 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1881 		if (kvm_is_error_hva(hva)) {
1882 			r = -EFAULT;
1883 			break;
1884 		}
1885 
1886 		/* Lowest order bit is reserved */
1887 		if (keys[i] & 0x01) {
1888 			r = -EINVAL;
1889 			break;
1890 		}
1891 
1892 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1893 		if (r) {
1894 			r = fixup_user_fault(current, current->mm, hva,
1895 					     FAULT_FLAG_WRITE, &unlocked);
1896 			if (r)
1897 				break;
1898 		}
1899 		if (!r)
1900 			i++;
1901 	}
1902 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1903 	mmap_read_unlock(current->mm);
1904 out:
1905 	kvfree(keys);
1906 	return r;
1907 }
1908 
1909 /*
1910  * Base address and length must be sent at the start of each block, therefore
1911  * it's cheaper to send some clean data, as long as it's less than the size of
1912  * two longs.
1913  */
1914 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1915 /* for consistency */
1916 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1917 
1918 /*
1919  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1920  * address falls in a hole. In that case the index of one of the memslots
1921  * bordering the hole is returned.
1922  */
1923 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1924 {
1925 	int start = 0, end = slots->used_slots;
1926 	int slot = atomic_read(&slots->lru_slot);
1927 	struct kvm_memory_slot *memslots = slots->memslots;
1928 
1929 	if (gfn >= memslots[slot].base_gfn &&
1930 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1931 		return slot;
1932 
1933 	while (start < end) {
1934 		slot = start + (end - start) / 2;
1935 
1936 		if (gfn >= memslots[slot].base_gfn)
1937 			end = slot;
1938 		else
1939 			start = slot + 1;
1940 	}
1941 
1942 	if (start >= slots->used_slots)
1943 		return slots->used_slots - 1;
1944 
1945 	if (gfn >= memslots[start].base_gfn &&
1946 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1947 		atomic_set(&slots->lru_slot, start);
1948 	}
1949 
1950 	return start;
1951 }
1952 
1953 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1954 			      u8 *res, unsigned long bufsize)
1955 {
1956 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1957 
1958 	args->count = 0;
1959 	while (args->count < bufsize) {
1960 		hva = gfn_to_hva(kvm, cur_gfn);
1961 		/*
1962 		 * We return an error if the first value was invalid, but we
1963 		 * return successfully if at least one value was copied.
1964 		 */
1965 		if (kvm_is_error_hva(hva))
1966 			return args->count ? 0 : -EFAULT;
1967 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1968 			pgstev = 0;
1969 		res[args->count++] = (pgstev >> 24) & 0x43;
1970 		cur_gfn++;
1971 	}
1972 
1973 	return 0;
1974 }
1975 
1976 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1977 					      unsigned long cur_gfn)
1978 {
1979 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1980 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1981 	unsigned long ofs = cur_gfn - ms->base_gfn;
1982 
1983 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1984 		slotidx--;
1985 		/* If we are above the highest slot, wrap around */
1986 		if (slotidx < 0)
1987 			slotidx = slots->used_slots - 1;
1988 
1989 		ms = slots->memslots + slotidx;
1990 		ofs = 0;
1991 	}
1992 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1993 	while ((slotidx > 0) && (ofs >= ms->npages)) {
1994 		slotidx--;
1995 		ms = slots->memslots + slotidx;
1996 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1997 	}
1998 	return ms->base_gfn + ofs;
1999 }
2000 
2001 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2002 			     u8 *res, unsigned long bufsize)
2003 {
2004 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2005 	struct kvm_memslots *slots = kvm_memslots(kvm);
2006 	struct kvm_memory_slot *ms;
2007 
2008 	if (unlikely(!slots->used_slots))
2009 		return 0;
2010 
2011 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2012 	ms = gfn_to_memslot(kvm, cur_gfn);
2013 	args->count = 0;
2014 	args->start_gfn = cur_gfn;
2015 	if (!ms)
2016 		return 0;
2017 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2018 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2019 
2020 	while (args->count < bufsize) {
2021 		hva = gfn_to_hva(kvm, cur_gfn);
2022 		if (kvm_is_error_hva(hva))
2023 			return 0;
2024 		/* Decrement only if we actually flipped the bit to 0 */
2025 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2026 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2027 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2028 			pgstev = 0;
2029 		/* Save the value */
2030 		res[args->count++] = (pgstev >> 24) & 0x43;
2031 		/* If the next bit is too far away, stop. */
2032 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2033 			return 0;
2034 		/* If we reached the previous "next", find the next one */
2035 		if (cur_gfn == next_gfn)
2036 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2037 		/* Reached the end of memory or of the buffer, stop */
2038 		if ((next_gfn >= mem_end) ||
2039 		    (next_gfn - args->start_gfn >= bufsize))
2040 			return 0;
2041 		cur_gfn++;
2042 		/* Reached the end of the current memslot, take the next one. */
2043 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2044 			ms = gfn_to_memslot(kvm, cur_gfn);
2045 			if (!ms)
2046 				return 0;
2047 		}
2048 	}
2049 	return 0;
2050 }
2051 
2052 /*
2053  * This function searches for the next page with dirty CMMA attributes, and
2054  * saves the attributes in the buffer up to either the end of the buffer or
2055  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2056  * no trailing clean bytes are saved.
2057  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2058  * output buffer will indicate 0 as length.
2059  */
2060 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2061 				  struct kvm_s390_cmma_log *args)
2062 {
2063 	unsigned long bufsize;
2064 	int srcu_idx, peek, ret;
2065 	u8 *values;
2066 
2067 	if (!kvm->arch.use_cmma)
2068 		return -ENXIO;
2069 	/* Invalid/unsupported flags were specified */
2070 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2071 		return -EINVAL;
2072 	/* Migration mode query, and we are not doing a migration */
2073 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2074 	if (!peek && !kvm->arch.migration_mode)
2075 		return -EINVAL;
2076 	/* CMMA is disabled or was not used, or the buffer has length zero */
2077 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2078 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2079 		memset(args, 0, sizeof(*args));
2080 		return 0;
2081 	}
2082 	/* We are not peeking, and there are no dirty pages */
2083 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2084 		memset(args, 0, sizeof(*args));
2085 		return 0;
2086 	}
2087 
2088 	values = vmalloc(bufsize);
2089 	if (!values)
2090 		return -ENOMEM;
2091 
2092 	mmap_read_lock(kvm->mm);
2093 	srcu_idx = srcu_read_lock(&kvm->srcu);
2094 	if (peek)
2095 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2096 	else
2097 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2098 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2099 	mmap_read_unlock(kvm->mm);
2100 
2101 	if (kvm->arch.migration_mode)
2102 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2103 	else
2104 		args->remaining = 0;
2105 
2106 	if (copy_to_user((void __user *)args->values, values, args->count))
2107 		ret = -EFAULT;
2108 
2109 	vfree(values);
2110 	return ret;
2111 }
2112 
2113 /*
2114  * This function sets the CMMA attributes for the given pages. If the input
2115  * buffer has zero length, no action is taken, otherwise the attributes are
2116  * set and the mm->context.uses_cmm flag is set.
2117  */
2118 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2119 				  const struct kvm_s390_cmma_log *args)
2120 {
2121 	unsigned long hva, mask, pgstev, i;
2122 	uint8_t *bits;
2123 	int srcu_idx, r = 0;
2124 
2125 	mask = args->mask;
2126 
2127 	if (!kvm->arch.use_cmma)
2128 		return -ENXIO;
2129 	/* invalid/unsupported flags */
2130 	if (args->flags != 0)
2131 		return -EINVAL;
2132 	/* Enforce sane limit on memory allocation */
2133 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2134 		return -EINVAL;
2135 	/* Nothing to do */
2136 	if (args->count == 0)
2137 		return 0;
2138 
2139 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2140 	if (!bits)
2141 		return -ENOMEM;
2142 
2143 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2144 	if (r) {
2145 		r = -EFAULT;
2146 		goto out;
2147 	}
2148 
2149 	mmap_read_lock(kvm->mm);
2150 	srcu_idx = srcu_read_lock(&kvm->srcu);
2151 	for (i = 0; i < args->count; i++) {
2152 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2153 		if (kvm_is_error_hva(hva)) {
2154 			r = -EFAULT;
2155 			break;
2156 		}
2157 
2158 		pgstev = bits[i];
2159 		pgstev = pgstev << 24;
2160 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2161 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2162 	}
2163 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2164 	mmap_read_unlock(kvm->mm);
2165 
2166 	if (!kvm->mm->context.uses_cmm) {
2167 		mmap_write_lock(kvm->mm);
2168 		kvm->mm->context.uses_cmm = 1;
2169 		mmap_write_unlock(kvm->mm);
2170 	}
2171 out:
2172 	vfree(bits);
2173 	return r;
2174 }
2175 
2176 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2177 {
2178 	struct kvm_vcpu *vcpu;
2179 	u16 rc, rrc;
2180 	int ret = 0;
2181 	int i;
2182 
2183 	/*
2184 	 * We ignore failures and try to destroy as many CPUs as possible.
2185 	 * At the same time we must not free the assigned resources when
2186 	 * this fails, as the ultravisor has still access to that memory.
2187 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2188 	 * behind.
2189 	 * We want to return the first failure rc and rrc, though.
2190 	 */
2191 	kvm_for_each_vcpu(i, vcpu, kvm) {
2192 		mutex_lock(&vcpu->mutex);
2193 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2194 			*rcp = rc;
2195 			*rrcp = rrc;
2196 			ret = -EIO;
2197 		}
2198 		mutex_unlock(&vcpu->mutex);
2199 	}
2200 	return ret;
2201 }
2202 
2203 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2204 {
2205 	int i, r = 0;
2206 	u16 dummy;
2207 
2208 	struct kvm_vcpu *vcpu;
2209 
2210 	kvm_for_each_vcpu(i, vcpu, kvm) {
2211 		mutex_lock(&vcpu->mutex);
2212 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2213 		mutex_unlock(&vcpu->mutex);
2214 		if (r)
2215 			break;
2216 	}
2217 	if (r)
2218 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2219 	return r;
2220 }
2221 
2222 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2223 {
2224 	int r = 0;
2225 	u16 dummy;
2226 	void __user *argp = (void __user *)cmd->data;
2227 
2228 	switch (cmd->cmd) {
2229 	case KVM_PV_ENABLE: {
2230 		r = -EINVAL;
2231 		if (kvm_s390_pv_is_protected(kvm))
2232 			break;
2233 
2234 		/*
2235 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2236 		 *  esca, we need no cleanup in the error cases below
2237 		 */
2238 		r = sca_switch_to_extended(kvm);
2239 		if (r)
2240 			break;
2241 
2242 		mmap_write_lock(current->mm);
2243 		r = gmap_mark_unmergeable();
2244 		mmap_write_unlock(current->mm);
2245 		if (r)
2246 			break;
2247 
2248 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2249 		if (r)
2250 			break;
2251 
2252 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2253 		if (r)
2254 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2255 
2256 		/* we need to block service interrupts from now on */
2257 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2258 		break;
2259 	}
2260 	case KVM_PV_DISABLE: {
2261 		r = -EINVAL;
2262 		if (!kvm_s390_pv_is_protected(kvm))
2263 			break;
2264 
2265 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2266 		/*
2267 		 * If a CPU could not be destroyed, destroy VM will also fail.
2268 		 * There is no point in trying to destroy it. Instead return
2269 		 * the rc and rrc from the first CPU that failed destroying.
2270 		 */
2271 		if (r)
2272 			break;
2273 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2274 
2275 		/* no need to block service interrupts any more */
2276 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2277 		break;
2278 	}
2279 	case KVM_PV_SET_SEC_PARMS: {
2280 		struct kvm_s390_pv_sec_parm parms = {};
2281 		void *hdr;
2282 
2283 		r = -EINVAL;
2284 		if (!kvm_s390_pv_is_protected(kvm))
2285 			break;
2286 
2287 		r = -EFAULT;
2288 		if (copy_from_user(&parms, argp, sizeof(parms)))
2289 			break;
2290 
2291 		/* Currently restricted to 8KB */
2292 		r = -EINVAL;
2293 		if (parms.length > PAGE_SIZE * 2)
2294 			break;
2295 
2296 		r = -ENOMEM;
2297 		hdr = vmalloc(parms.length);
2298 		if (!hdr)
2299 			break;
2300 
2301 		r = -EFAULT;
2302 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2303 				    parms.length))
2304 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2305 						      &cmd->rc, &cmd->rrc);
2306 
2307 		vfree(hdr);
2308 		break;
2309 	}
2310 	case KVM_PV_UNPACK: {
2311 		struct kvm_s390_pv_unp unp = {};
2312 
2313 		r = -EINVAL;
2314 		if (!kvm_s390_pv_is_protected(kvm))
2315 			break;
2316 
2317 		r = -EFAULT;
2318 		if (copy_from_user(&unp, argp, sizeof(unp)))
2319 			break;
2320 
2321 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2322 				       &cmd->rc, &cmd->rrc);
2323 		break;
2324 	}
2325 	case KVM_PV_VERIFY: {
2326 		r = -EINVAL;
2327 		if (!kvm_s390_pv_is_protected(kvm))
2328 			break;
2329 
2330 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2331 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2332 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2333 			     cmd->rrc);
2334 		break;
2335 	}
2336 	case KVM_PV_PREP_RESET: {
2337 		r = -EINVAL;
2338 		if (!kvm_s390_pv_is_protected(kvm))
2339 			break;
2340 
2341 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2342 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2343 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2344 			     cmd->rc, cmd->rrc);
2345 		break;
2346 	}
2347 	case KVM_PV_UNSHARE_ALL: {
2348 		r = -EINVAL;
2349 		if (!kvm_s390_pv_is_protected(kvm))
2350 			break;
2351 
2352 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2353 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2354 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2355 			     cmd->rc, cmd->rrc);
2356 		break;
2357 	}
2358 	default:
2359 		r = -ENOTTY;
2360 	}
2361 	return r;
2362 }
2363 
2364 long kvm_arch_vm_ioctl(struct file *filp,
2365 		       unsigned int ioctl, unsigned long arg)
2366 {
2367 	struct kvm *kvm = filp->private_data;
2368 	void __user *argp = (void __user *)arg;
2369 	struct kvm_device_attr attr;
2370 	int r;
2371 
2372 	switch (ioctl) {
2373 	case KVM_S390_INTERRUPT: {
2374 		struct kvm_s390_interrupt s390int;
2375 
2376 		r = -EFAULT;
2377 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2378 			break;
2379 		r = kvm_s390_inject_vm(kvm, &s390int);
2380 		break;
2381 	}
2382 	case KVM_CREATE_IRQCHIP: {
2383 		struct kvm_irq_routing_entry routing;
2384 
2385 		r = -EINVAL;
2386 		if (kvm->arch.use_irqchip) {
2387 			/* Set up dummy routing. */
2388 			memset(&routing, 0, sizeof(routing));
2389 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2390 		}
2391 		break;
2392 	}
2393 	case KVM_SET_DEVICE_ATTR: {
2394 		r = -EFAULT;
2395 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2396 			break;
2397 		r = kvm_s390_vm_set_attr(kvm, &attr);
2398 		break;
2399 	}
2400 	case KVM_GET_DEVICE_ATTR: {
2401 		r = -EFAULT;
2402 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2403 			break;
2404 		r = kvm_s390_vm_get_attr(kvm, &attr);
2405 		break;
2406 	}
2407 	case KVM_HAS_DEVICE_ATTR: {
2408 		r = -EFAULT;
2409 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2410 			break;
2411 		r = kvm_s390_vm_has_attr(kvm, &attr);
2412 		break;
2413 	}
2414 	case KVM_S390_GET_SKEYS: {
2415 		struct kvm_s390_skeys args;
2416 
2417 		r = -EFAULT;
2418 		if (copy_from_user(&args, argp,
2419 				   sizeof(struct kvm_s390_skeys)))
2420 			break;
2421 		r = kvm_s390_get_skeys(kvm, &args);
2422 		break;
2423 	}
2424 	case KVM_S390_SET_SKEYS: {
2425 		struct kvm_s390_skeys args;
2426 
2427 		r = -EFAULT;
2428 		if (copy_from_user(&args, argp,
2429 				   sizeof(struct kvm_s390_skeys)))
2430 			break;
2431 		r = kvm_s390_set_skeys(kvm, &args);
2432 		break;
2433 	}
2434 	case KVM_S390_GET_CMMA_BITS: {
2435 		struct kvm_s390_cmma_log args;
2436 
2437 		r = -EFAULT;
2438 		if (copy_from_user(&args, argp, sizeof(args)))
2439 			break;
2440 		mutex_lock(&kvm->slots_lock);
2441 		r = kvm_s390_get_cmma_bits(kvm, &args);
2442 		mutex_unlock(&kvm->slots_lock);
2443 		if (!r) {
2444 			r = copy_to_user(argp, &args, sizeof(args));
2445 			if (r)
2446 				r = -EFAULT;
2447 		}
2448 		break;
2449 	}
2450 	case KVM_S390_SET_CMMA_BITS: {
2451 		struct kvm_s390_cmma_log args;
2452 
2453 		r = -EFAULT;
2454 		if (copy_from_user(&args, argp, sizeof(args)))
2455 			break;
2456 		mutex_lock(&kvm->slots_lock);
2457 		r = kvm_s390_set_cmma_bits(kvm, &args);
2458 		mutex_unlock(&kvm->slots_lock);
2459 		break;
2460 	}
2461 	case KVM_S390_PV_COMMAND: {
2462 		struct kvm_pv_cmd args;
2463 
2464 		/* protvirt means user sigp */
2465 		kvm->arch.user_cpu_state_ctrl = 1;
2466 		r = 0;
2467 		if (!is_prot_virt_host()) {
2468 			r = -EINVAL;
2469 			break;
2470 		}
2471 		if (copy_from_user(&args, argp, sizeof(args))) {
2472 			r = -EFAULT;
2473 			break;
2474 		}
2475 		if (args.flags) {
2476 			r = -EINVAL;
2477 			break;
2478 		}
2479 		mutex_lock(&kvm->lock);
2480 		r = kvm_s390_handle_pv(kvm, &args);
2481 		mutex_unlock(&kvm->lock);
2482 		if (copy_to_user(argp, &args, sizeof(args))) {
2483 			r = -EFAULT;
2484 			break;
2485 		}
2486 		break;
2487 	}
2488 	default:
2489 		r = -ENOTTY;
2490 	}
2491 
2492 	return r;
2493 }
2494 
2495 static int kvm_s390_apxa_installed(void)
2496 {
2497 	struct ap_config_info info;
2498 
2499 	if (ap_instructions_available()) {
2500 		if (ap_qci(&info) == 0)
2501 			return info.apxa;
2502 	}
2503 
2504 	return 0;
2505 }
2506 
2507 /*
2508  * The format of the crypto control block (CRYCB) is specified in the 3 low
2509  * order bits of the CRYCB designation (CRYCBD) field as follows:
2510  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2511  *	     AP extended addressing (APXA) facility are installed.
2512  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2513  * Format 2: Both the APXA and MSAX3 facilities are installed
2514  */
2515 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2516 {
2517 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2518 
2519 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2520 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2521 
2522 	/* Check whether MSAX3 is installed */
2523 	if (!test_kvm_facility(kvm, 76))
2524 		return;
2525 
2526 	if (kvm_s390_apxa_installed())
2527 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2528 	else
2529 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2530 }
2531 
2532 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2533 			       unsigned long *aqm, unsigned long *adm)
2534 {
2535 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2536 
2537 	mutex_lock(&kvm->lock);
2538 	kvm_s390_vcpu_block_all(kvm);
2539 
2540 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2541 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2542 		memcpy(crycb->apcb1.apm, apm, 32);
2543 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2544 			 apm[0], apm[1], apm[2], apm[3]);
2545 		memcpy(crycb->apcb1.aqm, aqm, 32);
2546 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2547 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2548 		memcpy(crycb->apcb1.adm, adm, 32);
2549 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2550 			 adm[0], adm[1], adm[2], adm[3]);
2551 		break;
2552 	case CRYCB_FORMAT1:
2553 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2554 		memcpy(crycb->apcb0.apm, apm, 8);
2555 		memcpy(crycb->apcb0.aqm, aqm, 2);
2556 		memcpy(crycb->apcb0.adm, adm, 2);
2557 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2558 			 apm[0], *((unsigned short *)aqm),
2559 			 *((unsigned short *)adm));
2560 		break;
2561 	default:	/* Can not happen */
2562 		break;
2563 	}
2564 
2565 	/* recreate the shadow crycb for each vcpu */
2566 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2567 	kvm_s390_vcpu_unblock_all(kvm);
2568 	mutex_unlock(&kvm->lock);
2569 }
2570 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2571 
2572 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2573 {
2574 	mutex_lock(&kvm->lock);
2575 	kvm_s390_vcpu_block_all(kvm);
2576 
2577 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2578 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2579 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2580 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2581 
2582 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2583 	/* recreate the shadow crycb for each vcpu */
2584 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2585 	kvm_s390_vcpu_unblock_all(kvm);
2586 	mutex_unlock(&kvm->lock);
2587 }
2588 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2589 
2590 static u64 kvm_s390_get_initial_cpuid(void)
2591 {
2592 	struct cpuid cpuid;
2593 
2594 	get_cpu_id(&cpuid);
2595 	cpuid.version = 0xff;
2596 	return *((u64 *) &cpuid);
2597 }
2598 
2599 static void kvm_s390_crypto_init(struct kvm *kvm)
2600 {
2601 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2602 	kvm_s390_set_crycb_format(kvm);
2603 
2604 	if (!test_kvm_facility(kvm, 76))
2605 		return;
2606 
2607 	/* Enable AES/DEA protected key functions by default */
2608 	kvm->arch.crypto.aes_kw = 1;
2609 	kvm->arch.crypto.dea_kw = 1;
2610 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2611 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2612 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2613 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2614 }
2615 
2616 static void sca_dispose(struct kvm *kvm)
2617 {
2618 	if (kvm->arch.use_esca)
2619 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2620 	else
2621 		free_page((unsigned long)(kvm->arch.sca));
2622 	kvm->arch.sca = NULL;
2623 }
2624 
2625 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2626 {
2627 	gfp_t alloc_flags = GFP_KERNEL;
2628 	int i, rc;
2629 	char debug_name[16];
2630 	static unsigned long sca_offset;
2631 
2632 	rc = -EINVAL;
2633 #ifdef CONFIG_KVM_S390_UCONTROL
2634 	if (type & ~KVM_VM_S390_UCONTROL)
2635 		goto out_err;
2636 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2637 		goto out_err;
2638 #else
2639 	if (type)
2640 		goto out_err;
2641 #endif
2642 
2643 	rc = s390_enable_sie();
2644 	if (rc)
2645 		goto out_err;
2646 
2647 	rc = -ENOMEM;
2648 
2649 	if (!sclp.has_64bscao)
2650 		alloc_flags |= GFP_DMA;
2651 	rwlock_init(&kvm->arch.sca_lock);
2652 	/* start with basic SCA */
2653 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2654 	if (!kvm->arch.sca)
2655 		goto out_err;
2656 	mutex_lock(&kvm_lock);
2657 	sca_offset += 16;
2658 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2659 		sca_offset = 0;
2660 	kvm->arch.sca = (struct bsca_block *)
2661 			((char *) kvm->arch.sca + sca_offset);
2662 	mutex_unlock(&kvm_lock);
2663 
2664 	sprintf(debug_name, "kvm-%u", current->pid);
2665 
2666 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2667 	if (!kvm->arch.dbf)
2668 		goto out_err;
2669 
2670 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2671 	kvm->arch.sie_page2 =
2672 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2673 	if (!kvm->arch.sie_page2)
2674 		goto out_err;
2675 
2676 	kvm->arch.sie_page2->kvm = kvm;
2677 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2678 
2679 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2680 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2681 					      (kvm_s390_fac_base[i] |
2682 					       kvm_s390_fac_ext[i]);
2683 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2684 					      kvm_s390_fac_base[i];
2685 	}
2686 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2687 
2688 	/* we are always in czam mode - even on pre z14 machines */
2689 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2690 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2691 	/* we emulate STHYI in kvm */
2692 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2693 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2694 	if (MACHINE_HAS_TLB_GUEST) {
2695 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2696 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2697 	}
2698 
2699 	if (css_general_characteristics.aiv && test_facility(65))
2700 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2701 
2702 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2703 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2704 
2705 	kvm_s390_crypto_init(kvm);
2706 
2707 	mutex_init(&kvm->arch.float_int.ais_lock);
2708 	spin_lock_init(&kvm->arch.float_int.lock);
2709 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2710 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2711 	init_waitqueue_head(&kvm->arch.ipte_wq);
2712 	mutex_init(&kvm->arch.ipte_mutex);
2713 
2714 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2715 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2716 
2717 	if (type & KVM_VM_S390_UCONTROL) {
2718 		kvm->arch.gmap = NULL;
2719 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2720 	} else {
2721 		if (sclp.hamax == U64_MAX)
2722 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2723 		else
2724 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2725 						    sclp.hamax + 1);
2726 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2727 		if (!kvm->arch.gmap)
2728 			goto out_err;
2729 		kvm->arch.gmap->private = kvm;
2730 		kvm->arch.gmap->pfault_enabled = 0;
2731 	}
2732 
2733 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2734 	kvm->arch.use_skf = sclp.has_skey;
2735 	spin_lock_init(&kvm->arch.start_stop_lock);
2736 	kvm_s390_vsie_init(kvm);
2737 	if (use_gisa)
2738 		kvm_s390_gisa_init(kvm);
2739 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2740 
2741 	return 0;
2742 out_err:
2743 	free_page((unsigned long)kvm->arch.sie_page2);
2744 	debug_unregister(kvm->arch.dbf);
2745 	sca_dispose(kvm);
2746 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2747 	return rc;
2748 }
2749 
2750 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2751 {
2752 	u16 rc, rrc;
2753 
2754 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2755 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2756 	kvm_s390_clear_local_irqs(vcpu);
2757 	kvm_clear_async_pf_completion_queue(vcpu);
2758 	if (!kvm_is_ucontrol(vcpu->kvm))
2759 		sca_del_vcpu(vcpu);
2760 
2761 	if (kvm_is_ucontrol(vcpu->kvm))
2762 		gmap_remove(vcpu->arch.gmap);
2763 
2764 	if (vcpu->kvm->arch.use_cmma)
2765 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2766 	/* We can not hold the vcpu mutex here, we are already dying */
2767 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2768 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2769 	free_page((unsigned long)(vcpu->arch.sie_block));
2770 }
2771 
2772 static void kvm_free_vcpus(struct kvm *kvm)
2773 {
2774 	unsigned int i;
2775 	struct kvm_vcpu *vcpu;
2776 
2777 	kvm_for_each_vcpu(i, vcpu, kvm)
2778 		kvm_vcpu_destroy(vcpu);
2779 
2780 	mutex_lock(&kvm->lock);
2781 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2782 		kvm->vcpus[i] = NULL;
2783 
2784 	atomic_set(&kvm->online_vcpus, 0);
2785 	mutex_unlock(&kvm->lock);
2786 }
2787 
2788 void kvm_arch_destroy_vm(struct kvm *kvm)
2789 {
2790 	u16 rc, rrc;
2791 
2792 	kvm_free_vcpus(kvm);
2793 	sca_dispose(kvm);
2794 	kvm_s390_gisa_destroy(kvm);
2795 	/*
2796 	 * We are already at the end of life and kvm->lock is not taken.
2797 	 * This is ok as the file descriptor is closed by now and nobody
2798 	 * can mess with the pv state. To avoid lockdep_assert_held from
2799 	 * complaining we do not use kvm_s390_pv_is_protected.
2800 	 */
2801 	if (kvm_s390_pv_get_handle(kvm))
2802 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2803 	debug_unregister(kvm->arch.dbf);
2804 	free_page((unsigned long)kvm->arch.sie_page2);
2805 	if (!kvm_is_ucontrol(kvm))
2806 		gmap_remove(kvm->arch.gmap);
2807 	kvm_s390_destroy_adapters(kvm);
2808 	kvm_s390_clear_float_irqs(kvm);
2809 	kvm_s390_vsie_destroy(kvm);
2810 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2811 }
2812 
2813 /* Section: vcpu related */
2814 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2815 {
2816 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2817 	if (!vcpu->arch.gmap)
2818 		return -ENOMEM;
2819 	vcpu->arch.gmap->private = vcpu->kvm;
2820 
2821 	return 0;
2822 }
2823 
2824 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2825 {
2826 	if (!kvm_s390_use_sca_entries())
2827 		return;
2828 	read_lock(&vcpu->kvm->arch.sca_lock);
2829 	if (vcpu->kvm->arch.use_esca) {
2830 		struct esca_block *sca = vcpu->kvm->arch.sca;
2831 
2832 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2833 		sca->cpu[vcpu->vcpu_id].sda = 0;
2834 	} else {
2835 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2836 
2837 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2838 		sca->cpu[vcpu->vcpu_id].sda = 0;
2839 	}
2840 	read_unlock(&vcpu->kvm->arch.sca_lock);
2841 }
2842 
2843 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2844 {
2845 	if (!kvm_s390_use_sca_entries()) {
2846 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2847 
2848 		/* we still need the basic sca for the ipte control */
2849 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2850 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2851 		return;
2852 	}
2853 	read_lock(&vcpu->kvm->arch.sca_lock);
2854 	if (vcpu->kvm->arch.use_esca) {
2855 		struct esca_block *sca = vcpu->kvm->arch.sca;
2856 
2857 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2858 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2859 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2860 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2861 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2862 	} else {
2863 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2864 
2865 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2866 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2867 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2868 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2869 	}
2870 	read_unlock(&vcpu->kvm->arch.sca_lock);
2871 }
2872 
2873 /* Basic SCA to Extended SCA data copy routines */
2874 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2875 {
2876 	d->sda = s->sda;
2877 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2878 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2879 }
2880 
2881 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2882 {
2883 	int i;
2884 
2885 	d->ipte_control = s->ipte_control;
2886 	d->mcn[0] = s->mcn;
2887 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2888 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2889 }
2890 
2891 static int sca_switch_to_extended(struct kvm *kvm)
2892 {
2893 	struct bsca_block *old_sca = kvm->arch.sca;
2894 	struct esca_block *new_sca;
2895 	struct kvm_vcpu *vcpu;
2896 	unsigned int vcpu_idx;
2897 	u32 scaol, scaoh;
2898 
2899 	if (kvm->arch.use_esca)
2900 		return 0;
2901 
2902 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2903 	if (!new_sca)
2904 		return -ENOMEM;
2905 
2906 	scaoh = (u32)((u64)(new_sca) >> 32);
2907 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2908 
2909 	kvm_s390_vcpu_block_all(kvm);
2910 	write_lock(&kvm->arch.sca_lock);
2911 
2912 	sca_copy_b_to_e(new_sca, old_sca);
2913 
2914 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2915 		vcpu->arch.sie_block->scaoh = scaoh;
2916 		vcpu->arch.sie_block->scaol = scaol;
2917 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2918 	}
2919 	kvm->arch.sca = new_sca;
2920 	kvm->arch.use_esca = 1;
2921 
2922 	write_unlock(&kvm->arch.sca_lock);
2923 	kvm_s390_vcpu_unblock_all(kvm);
2924 
2925 	free_page((unsigned long)old_sca);
2926 
2927 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2928 		 old_sca, kvm->arch.sca);
2929 	return 0;
2930 }
2931 
2932 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2933 {
2934 	int rc;
2935 
2936 	if (!kvm_s390_use_sca_entries()) {
2937 		if (id < KVM_MAX_VCPUS)
2938 			return true;
2939 		return false;
2940 	}
2941 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2942 		return true;
2943 	if (!sclp.has_esca || !sclp.has_64bscao)
2944 		return false;
2945 
2946 	mutex_lock(&kvm->lock);
2947 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2948 	mutex_unlock(&kvm->lock);
2949 
2950 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2951 }
2952 
2953 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2954 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2955 {
2956 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2957 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2958 	vcpu->arch.cputm_start = get_tod_clock_fast();
2959 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2960 }
2961 
2962 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2963 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2964 {
2965 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2966 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2967 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2968 	vcpu->arch.cputm_start = 0;
2969 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2970 }
2971 
2972 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2973 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2974 {
2975 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2976 	vcpu->arch.cputm_enabled = true;
2977 	__start_cpu_timer_accounting(vcpu);
2978 }
2979 
2980 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2981 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2982 {
2983 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2984 	__stop_cpu_timer_accounting(vcpu);
2985 	vcpu->arch.cputm_enabled = false;
2986 }
2987 
2988 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2989 {
2990 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2991 	__enable_cpu_timer_accounting(vcpu);
2992 	preempt_enable();
2993 }
2994 
2995 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2996 {
2997 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2998 	__disable_cpu_timer_accounting(vcpu);
2999 	preempt_enable();
3000 }
3001 
3002 /* set the cpu timer - may only be called from the VCPU thread itself */
3003 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3004 {
3005 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3006 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3007 	if (vcpu->arch.cputm_enabled)
3008 		vcpu->arch.cputm_start = get_tod_clock_fast();
3009 	vcpu->arch.sie_block->cputm = cputm;
3010 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3011 	preempt_enable();
3012 }
3013 
3014 /* update and get the cpu timer - can also be called from other VCPU threads */
3015 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3016 {
3017 	unsigned int seq;
3018 	__u64 value;
3019 
3020 	if (unlikely(!vcpu->arch.cputm_enabled))
3021 		return vcpu->arch.sie_block->cputm;
3022 
3023 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3024 	do {
3025 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3026 		/*
3027 		 * If the writer would ever execute a read in the critical
3028 		 * section, e.g. in irq context, we have a deadlock.
3029 		 */
3030 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3031 		value = vcpu->arch.sie_block->cputm;
3032 		/* if cputm_start is 0, accounting is being started/stopped */
3033 		if (likely(vcpu->arch.cputm_start))
3034 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3035 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3036 	preempt_enable();
3037 	return value;
3038 }
3039 
3040 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3041 {
3042 
3043 	gmap_enable(vcpu->arch.enabled_gmap);
3044 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3045 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3046 		__start_cpu_timer_accounting(vcpu);
3047 	vcpu->cpu = cpu;
3048 }
3049 
3050 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3051 {
3052 	vcpu->cpu = -1;
3053 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3054 		__stop_cpu_timer_accounting(vcpu);
3055 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3056 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3057 	gmap_disable(vcpu->arch.enabled_gmap);
3058 
3059 }
3060 
3061 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3062 {
3063 	mutex_lock(&vcpu->kvm->lock);
3064 	preempt_disable();
3065 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3066 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3067 	preempt_enable();
3068 	mutex_unlock(&vcpu->kvm->lock);
3069 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3070 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3071 		sca_add_vcpu(vcpu);
3072 	}
3073 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3074 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3075 	/* make vcpu_load load the right gmap on the first trigger */
3076 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3077 }
3078 
3079 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3080 {
3081 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3082 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3083 		return true;
3084 	return false;
3085 }
3086 
3087 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3088 {
3089 	/* At least one ECC subfunction must be present */
3090 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3091 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3092 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3093 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3094 	       kvm_has_pckmo_subfunc(kvm, 41);
3095 
3096 }
3097 
3098 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3099 {
3100 	/*
3101 	 * If the AP instructions are not being interpreted and the MSAX3
3102 	 * facility is not configured for the guest, there is nothing to set up.
3103 	 */
3104 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3105 		return;
3106 
3107 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3108 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3109 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3110 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3111 
3112 	if (vcpu->kvm->arch.crypto.apie)
3113 		vcpu->arch.sie_block->eca |= ECA_APIE;
3114 
3115 	/* Set up protected key support */
3116 	if (vcpu->kvm->arch.crypto.aes_kw) {
3117 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3118 		/* ecc is also wrapped with AES key */
3119 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3120 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3121 	}
3122 
3123 	if (vcpu->kvm->arch.crypto.dea_kw)
3124 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3125 }
3126 
3127 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3128 {
3129 	free_page(vcpu->arch.sie_block->cbrlo);
3130 	vcpu->arch.sie_block->cbrlo = 0;
3131 }
3132 
3133 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3134 {
3135 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3136 	if (!vcpu->arch.sie_block->cbrlo)
3137 		return -ENOMEM;
3138 	return 0;
3139 }
3140 
3141 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3142 {
3143 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3144 
3145 	vcpu->arch.sie_block->ibc = model->ibc;
3146 	if (test_kvm_facility(vcpu->kvm, 7))
3147 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3148 }
3149 
3150 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3151 {
3152 	int rc = 0;
3153 	u16 uvrc, uvrrc;
3154 
3155 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3156 						    CPUSTAT_SM |
3157 						    CPUSTAT_STOPPED);
3158 
3159 	if (test_kvm_facility(vcpu->kvm, 78))
3160 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3161 	else if (test_kvm_facility(vcpu->kvm, 8))
3162 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3163 
3164 	kvm_s390_vcpu_setup_model(vcpu);
3165 
3166 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3167 	if (MACHINE_HAS_ESOP)
3168 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3169 	if (test_kvm_facility(vcpu->kvm, 9))
3170 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3171 	if (test_kvm_facility(vcpu->kvm, 73))
3172 		vcpu->arch.sie_block->ecb |= ECB_TE;
3173 
3174 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3175 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3176 	if (test_kvm_facility(vcpu->kvm, 130))
3177 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3178 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3179 	if (sclp.has_cei)
3180 		vcpu->arch.sie_block->eca |= ECA_CEI;
3181 	if (sclp.has_ib)
3182 		vcpu->arch.sie_block->eca |= ECA_IB;
3183 	if (sclp.has_siif)
3184 		vcpu->arch.sie_block->eca |= ECA_SII;
3185 	if (sclp.has_sigpif)
3186 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3187 	if (test_kvm_facility(vcpu->kvm, 129)) {
3188 		vcpu->arch.sie_block->eca |= ECA_VX;
3189 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3190 	}
3191 	if (test_kvm_facility(vcpu->kvm, 139))
3192 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3193 	if (test_kvm_facility(vcpu->kvm, 156))
3194 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3195 	if (vcpu->arch.sie_block->gd) {
3196 		vcpu->arch.sie_block->eca |= ECA_AIV;
3197 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3198 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3199 	}
3200 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3201 					| SDNXC;
3202 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3203 
3204 	if (sclp.has_kss)
3205 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3206 	else
3207 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3208 
3209 	if (vcpu->kvm->arch.use_cmma) {
3210 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3211 		if (rc)
3212 			return rc;
3213 	}
3214 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3215 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3216 
3217 	vcpu->arch.sie_block->hpid = HPID_KVM;
3218 
3219 	kvm_s390_vcpu_crypto_setup(vcpu);
3220 
3221 	mutex_lock(&vcpu->kvm->lock);
3222 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3223 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3224 		if (rc)
3225 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3226 	}
3227 	mutex_unlock(&vcpu->kvm->lock);
3228 
3229 	return rc;
3230 }
3231 
3232 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3233 {
3234 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3235 		return -EINVAL;
3236 	return 0;
3237 }
3238 
3239 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3240 {
3241 	struct sie_page *sie_page;
3242 	int rc;
3243 
3244 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3245 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3246 	if (!sie_page)
3247 		return -ENOMEM;
3248 
3249 	vcpu->arch.sie_block = &sie_page->sie_block;
3250 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3251 
3252 	/* the real guest size will always be smaller than msl */
3253 	vcpu->arch.sie_block->mso = 0;
3254 	vcpu->arch.sie_block->msl = sclp.hamax;
3255 
3256 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3257 	spin_lock_init(&vcpu->arch.local_int.lock);
3258 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3259 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3260 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3261 	seqcount_init(&vcpu->arch.cputm_seqcount);
3262 
3263 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3264 	kvm_clear_async_pf_completion_queue(vcpu);
3265 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3266 				    KVM_SYNC_GPRS |
3267 				    KVM_SYNC_ACRS |
3268 				    KVM_SYNC_CRS |
3269 				    KVM_SYNC_ARCH0 |
3270 				    KVM_SYNC_PFAULT;
3271 	kvm_s390_set_prefix(vcpu, 0);
3272 	if (test_kvm_facility(vcpu->kvm, 64))
3273 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3274 	if (test_kvm_facility(vcpu->kvm, 82))
3275 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3276 	if (test_kvm_facility(vcpu->kvm, 133))
3277 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3278 	if (test_kvm_facility(vcpu->kvm, 156))
3279 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3280 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3281 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3282 	 */
3283 	if (MACHINE_HAS_VX)
3284 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3285 	else
3286 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3287 
3288 	if (kvm_is_ucontrol(vcpu->kvm)) {
3289 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3290 		if (rc)
3291 			goto out_free_sie_block;
3292 	}
3293 
3294 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3295 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3296 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3297 
3298 	rc = kvm_s390_vcpu_setup(vcpu);
3299 	if (rc)
3300 		goto out_ucontrol_uninit;
3301 	return 0;
3302 
3303 out_ucontrol_uninit:
3304 	if (kvm_is_ucontrol(vcpu->kvm))
3305 		gmap_remove(vcpu->arch.gmap);
3306 out_free_sie_block:
3307 	free_page((unsigned long)(vcpu->arch.sie_block));
3308 	return rc;
3309 }
3310 
3311 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3312 {
3313 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3314 }
3315 
3316 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3317 {
3318 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3319 }
3320 
3321 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3322 {
3323 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3324 	exit_sie(vcpu);
3325 }
3326 
3327 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3328 {
3329 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3330 }
3331 
3332 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3333 {
3334 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3335 	exit_sie(vcpu);
3336 }
3337 
3338 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3339 {
3340 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3341 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3342 }
3343 
3344 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3345 {
3346 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3347 }
3348 
3349 /*
3350  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3351  * If the CPU is not running (e.g. waiting as idle) the function will
3352  * return immediately. */
3353 void exit_sie(struct kvm_vcpu *vcpu)
3354 {
3355 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3356 	kvm_s390_vsie_kick(vcpu);
3357 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3358 		cpu_relax();
3359 }
3360 
3361 /* Kick a guest cpu out of SIE to process a request synchronously */
3362 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3363 {
3364 	kvm_make_request(req, vcpu);
3365 	kvm_s390_vcpu_request(vcpu);
3366 }
3367 
3368 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3369 			      unsigned long end)
3370 {
3371 	struct kvm *kvm = gmap->private;
3372 	struct kvm_vcpu *vcpu;
3373 	unsigned long prefix;
3374 	int i;
3375 
3376 	if (gmap_is_shadow(gmap))
3377 		return;
3378 	if (start >= 1UL << 31)
3379 		/* We are only interested in prefix pages */
3380 		return;
3381 	kvm_for_each_vcpu(i, vcpu, kvm) {
3382 		/* match against both prefix pages */
3383 		prefix = kvm_s390_get_prefix(vcpu);
3384 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3385 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3386 				   start, end);
3387 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3388 		}
3389 	}
3390 }
3391 
3392 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3393 {
3394 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3395 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3396 	    halt_poll_max_steal) {
3397 		vcpu->stat.halt_no_poll_steal++;
3398 		return true;
3399 	}
3400 	return false;
3401 }
3402 
3403 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3404 {
3405 	/* kvm common code refers to this, but never calls it */
3406 	BUG();
3407 	return 0;
3408 }
3409 
3410 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3411 					   struct kvm_one_reg *reg)
3412 {
3413 	int r = -EINVAL;
3414 
3415 	switch (reg->id) {
3416 	case KVM_REG_S390_TODPR:
3417 		r = put_user(vcpu->arch.sie_block->todpr,
3418 			     (u32 __user *)reg->addr);
3419 		break;
3420 	case KVM_REG_S390_EPOCHDIFF:
3421 		r = put_user(vcpu->arch.sie_block->epoch,
3422 			     (u64 __user *)reg->addr);
3423 		break;
3424 	case KVM_REG_S390_CPU_TIMER:
3425 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3426 			     (u64 __user *)reg->addr);
3427 		break;
3428 	case KVM_REG_S390_CLOCK_COMP:
3429 		r = put_user(vcpu->arch.sie_block->ckc,
3430 			     (u64 __user *)reg->addr);
3431 		break;
3432 	case KVM_REG_S390_PFTOKEN:
3433 		r = put_user(vcpu->arch.pfault_token,
3434 			     (u64 __user *)reg->addr);
3435 		break;
3436 	case KVM_REG_S390_PFCOMPARE:
3437 		r = put_user(vcpu->arch.pfault_compare,
3438 			     (u64 __user *)reg->addr);
3439 		break;
3440 	case KVM_REG_S390_PFSELECT:
3441 		r = put_user(vcpu->arch.pfault_select,
3442 			     (u64 __user *)reg->addr);
3443 		break;
3444 	case KVM_REG_S390_PP:
3445 		r = put_user(vcpu->arch.sie_block->pp,
3446 			     (u64 __user *)reg->addr);
3447 		break;
3448 	case KVM_REG_S390_GBEA:
3449 		r = put_user(vcpu->arch.sie_block->gbea,
3450 			     (u64 __user *)reg->addr);
3451 		break;
3452 	default:
3453 		break;
3454 	}
3455 
3456 	return r;
3457 }
3458 
3459 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3460 					   struct kvm_one_reg *reg)
3461 {
3462 	int r = -EINVAL;
3463 	__u64 val;
3464 
3465 	switch (reg->id) {
3466 	case KVM_REG_S390_TODPR:
3467 		r = get_user(vcpu->arch.sie_block->todpr,
3468 			     (u32 __user *)reg->addr);
3469 		break;
3470 	case KVM_REG_S390_EPOCHDIFF:
3471 		r = get_user(vcpu->arch.sie_block->epoch,
3472 			     (u64 __user *)reg->addr);
3473 		break;
3474 	case KVM_REG_S390_CPU_TIMER:
3475 		r = get_user(val, (u64 __user *)reg->addr);
3476 		if (!r)
3477 			kvm_s390_set_cpu_timer(vcpu, val);
3478 		break;
3479 	case KVM_REG_S390_CLOCK_COMP:
3480 		r = get_user(vcpu->arch.sie_block->ckc,
3481 			     (u64 __user *)reg->addr);
3482 		break;
3483 	case KVM_REG_S390_PFTOKEN:
3484 		r = get_user(vcpu->arch.pfault_token,
3485 			     (u64 __user *)reg->addr);
3486 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3487 			kvm_clear_async_pf_completion_queue(vcpu);
3488 		break;
3489 	case KVM_REG_S390_PFCOMPARE:
3490 		r = get_user(vcpu->arch.pfault_compare,
3491 			     (u64 __user *)reg->addr);
3492 		break;
3493 	case KVM_REG_S390_PFSELECT:
3494 		r = get_user(vcpu->arch.pfault_select,
3495 			     (u64 __user *)reg->addr);
3496 		break;
3497 	case KVM_REG_S390_PP:
3498 		r = get_user(vcpu->arch.sie_block->pp,
3499 			     (u64 __user *)reg->addr);
3500 		break;
3501 	case KVM_REG_S390_GBEA:
3502 		r = get_user(vcpu->arch.sie_block->gbea,
3503 			     (u64 __user *)reg->addr);
3504 		break;
3505 	default:
3506 		break;
3507 	}
3508 
3509 	return r;
3510 }
3511 
3512 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3513 {
3514 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3515 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3516 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3517 
3518 	kvm_clear_async_pf_completion_queue(vcpu);
3519 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3520 		kvm_s390_vcpu_stop(vcpu);
3521 	kvm_s390_clear_local_irqs(vcpu);
3522 }
3523 
3524 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3525 {
3526 	/* Initial reset is a superset of the normal reset */
3527 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3528 
3529 	/*
3530 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3531 	 * We do not only reset the internal data, but also ...
3532 	 */
3533 	vcpu->arch.sie_block->gpsw.mask = 0;
3534 	vcpu->arch.sie_block->gpsw.addr = 0;
3535 	kvm_s390_set_prefix(vcpu, 0);
3536 	kvm_s390_set_cpu_timer(vcpu, 0);
3537 	vcpu->arch.sie_block->ckc = 0;
3538 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3539 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3540 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3541 
3542 	/* ... the data in sync regs */
3543 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3544 	vcpu->run->s.regs.ckc = 0;
3545 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3546 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3547 	vcpu->run->psw_addr = 0;
3548 	vcpu->run->psw_mask = 0;
3549 	vcpu->run->s.regs.todpr = 0;
3550 	vcpu->run->s.regs.cputm = 0;
3551 	vcpu->run->s.regs.ckc = 0;
3552 	vcpu->run->s.regs.pp = 0;
3553 	vcpu->run->s.regs.gbea = 1;
3554 	vcpu->run->s.regs.fpc = 0;
3555 	/*
3556 	 * Do not reset these registers in the protected case, as some of
3557 	 * them are overlayed and they are not accessible in this case
3558 	 * anyway.
3559 	 */
3560 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3561 		vcpu->arch.sie_block->gbea = 1;
3562 		vcpu->arch.sie_block->pp = 0;
3563 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3564 		vcpu->arch.sie_block->todpr = 0;
3565 	}
3566 }
3567 
3568 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3569 {
3570 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3571 
3572 	/* Clear reset is a superset of the initial reset */
3573 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3574 
3575 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3576 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3577 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3578 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3579 
3580 	regs->etoken = 0;
3581 	regs->etoken_extension = 0;
3582 }
3583 
3584 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3585 {
3586 	vcpu_load(vcpu);
3587 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3588 	vcpu_put(vcpu);
3589 	return 0;
3590 }
3591 
3592 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3593 {
3594 	vcpu_load(vcpu);
3595 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3596 	vcpu_put(vcpu);
3597 	return 0;
3598 }
3599 
3600 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3601 				  struct kvm_sregs *sregs)
3602 {
3603 	vcpu_load(vcpu);
3604 
3605 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3606 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3607 
3608 	vcpu_put(vcpu);
3609 	return 0;
3610 }
3611 
3612 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3613 				  struct kvm_sregs *sregs)
3614 {
3615 	vcpu_load(vcpu);
3616 
3617 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3618 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3619 
3620 	vcpu_put(vcpu);
3621 	return 0;
3622 }
3623 
3624 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3625 {
3626 	int ret = 0;
3627 
3628 	vcpu_load(vcpu);
3629 
3630 	if (test_fp_ctl(fpu->fpc)) {
3631 		ret = -EINVAL;
3632 		goto out;
3633 	}
3634 	vcpu->run->s.regs.fpc = fpu->fpc;
3635 	if (MACHINE_HAS_VX)
3636 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3637 				 (freg_t *) fpu->fprs);
3638 	else
3639 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3640 
3641 out:
3642 	vcpu_put(vcpu);
3643 	return ret;
3644 }
3645 
3646 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3647 {
3648 	vcpu_load(vcpu);
3649 
3650 	/* make sure we have the latest values */
3651 	save_fpu_regs();
3652 	if (MACHINE_HAS_VX)
3653 		convert_vx_to_fp((freg_t *) fpu->fprs,
3654 				 (__vector128 *) vcpu->run->s.regs.vrs);
3655 	else
3656 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3657 	fpu->fpc = vcpu->run->s.regs.fpc;
3658 
3659 	vcpu_put(vcpu);
3660 	return 0;
3661 }
3662 
3663 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3664 {
3665 	int rc = 0;
3666 
3667 	if (!is_vcpu_stopped(vcpu))
3668 		rc = -EBUSY;
3669 	else {
3670 		vcpu->run->psw_mask = psw.mask;
3671 		vcpu->run->psw_addr = psw.addr;
3672 	}
3673 	return rc;
3674 }
3675 
3676 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3677 				  struct kvm_translation *tr)
3678 {
3679 	return -EINVAL; /* not implemented yet */
3680 }
3681 
3682 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3683 			      KVM_GUESTDBG_USE_HW_BP | \
3684 			      KVM_GUESTDBG_ENABLE)
3685 
3686 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3687 					struct kvm_guest_debug *dbg)
3688 {
3689 	int rc = 0;
3690 
3691 	vcpu_load(vcpu);
3692 
3693 	vcpu->guest_debug = 0;
3694 	kvm_s390_clear_bp_data(vcpu);
3695 
3696 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3697 		rc = -EINVAL;
3698 		goto out;
3699 	}
3700 	if (!sclp.has_gpere) {
3701 		rc = -EINVAL;
3702 		goto out;
3703 	}
3704 
3705 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3706 		vcpu->guest_debug = dbg->control;
3707 		/* enforce guest PER */
3708 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3709 
3710 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3711 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3712 	} else {
3713 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3714 		vcpu->arch.guestdbg.last_bp = 0;
3715 	}
3716 
3717 	if (rc) {
3718 		vcpu->guest_debug = 0;
3719 		kvm_s390_clear_bp_data(vcpu);
3720 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3721 	}
3722 
3723 out:
3724 	vcpu_put(vcpu);
3725 	return rc;
3726 }
3727 
3728 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3729 				    struct kvm_mp_state *mp_state)
3730 {
3731 	int ret;
3732 
3733 	vcpu_load(vcpu);
3734 
3735 	/* CHECK_STOP and LOAD are not supported yet */
3736 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3737 				      KVM_MP_STATE_OPERATING;
3738 
3739 	vcpu_put(vcpu);
3740 	return ret;
3741 }
3742 
3743 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3744 				    struct kvm_mp_state *mp_state)
3745 {
3746 	int rc = 0;
3747 
3748 	vcpu_load(vcpu);
3749 
3750 	/* user space knows about this interface - let it control the state */
3751 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3752 
3753 	switch (mp_state->mp_state) {
3754 	case KVM_MP_STATE_STOPPED:
3755 		rc = kvm_s390_vcpu_stop(vcpu);
3756 		break;
3757 	case KVM_MP_STATE_OPERATING:
3758 		rc = kvm_s390_vcpu_start(vcpu);
3759 		break;
3760 	case KVM_MP_STATE_LOAD:
3761 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3762 			rc = -ENXIO;
3763 			break;
3764 		}
3765 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3766 		break;
3767 	case KVM_MP_STATE_CHECK_STOP:
3768 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3769 	default:
3770 		rc = -ENXIO;
3771 	}
3772 
3773 	vcpu_put(vcpu);
3774 	return rc;
3775 }
3776 
3777 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3778 {
3779 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3780 }
3781 
3782 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3783 {
3784 retry:
3785 	kvm_s390_vcpu_request_handled(vcpu);
3786 	if (!kvm_request_pending(vcpu))
3787 		return 0;
3788 	/*
3789 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3790 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3791 	 * This ensures that the ipte instruction for this request has
3792 	 * already finished. We might race against a second unmapper that
3793 	 * wants to set the blocking bit. Lets just retry the request loop.
3794 	 */
3795 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3796 		int rc;
3797 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3798 					  kvm_s390_get_prefix(vcpu),
3799 					  PAGE_SIZE * 2, PROT_WRITE);
3800 		if (rc) {
3801 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3802 			return rc;
3803 		}
3804 		goto retry;
3805 	}
3806 
3807 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3808 		vcpu->arch.sie_block->ihcpu = 0xffff;
3809 		goto retry;
3810 	}
3811 
3812 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3813 		if (!ibs_enabled(vcpu)) {
3814 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3815 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3816 		}
3817 		goto retry;
3818 	}
3819 
3820 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3821 		if (ibs_enabled(vcpu)) {
3822 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3823 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3824 		}
3825 		goto retry;
3826 	}
3827 
3828 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3829 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3830 		goto retry;
3831 	}
3832 
3833 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3834 		/*
3835 		 * Disable CMM virtualization; we will emulate the ESSA
3836 		 * instruction manually, in order to provide additional
3837 		 * functionalities needed for live migration.
3838 		 */
3839 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3840 		goto retry;
3841 	}
3842 
3843 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3844 		/*
3845 		 * Re-enable CMM virtualization if CMMA is available and
3846 		 * CMM has been used.
3847 		 */
3848 		if ((vcpu->kvm->arch.use_cmma) &&
3849 		    (vcpu->kvm->mm->context.uses_cmm))
3850 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3851 		goto retry;
3852 	}
3853 
3854 	/* nothing to do, just clear the request */
3855 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3856 	/* we left the vsie handler, nothing to do, just clear the request */
3857 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3858 
3859 	return 0;
3860 }
3861 
3862 void kvm_s390_set_tod_clock(struct kvm *kvm,
3863 			    const struct kvm_s390_vm_tod_clock *gtod)
3864 {
3865 	struct kvm_vcpu *vcpu;
3866 	struct kvm_s390_tod_clock_ext htod;
3867 	int i;
3868 
3869 	mutex_lock(&kvm->lock);
3870 	preempt_disable();
3871 
3872 	get_tod_clock_ext((char *)&htod);
3873 
3874 	kvm->arch.epoch = gtod->tod - htod.tod;
3875 	kvm->arch.epdx = 0;
3876 	if (test_kvm_facility(kvm, 139)) {
3877 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3878 		if (kvm->arch.epoch > gtod->tod)
3879 			kvm->arch.epdx -= 1;
3880 	}
3881 
3882 	kvm_s390_vcpu_block_all(kvm);
3883 	kvm_for_each_vcpu(i, vcpu, kvm) {
3884 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3885 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3886 	}
3887 
3888 	kvm_s390_vcpu_unblock_all(kvm);
3889 	preempt_enable();
3890 	mutex_unlock(&kvm->lock);
3891 }
3892 
3893 /**
3894  * kvm_arch_fault_in_page - fault-in guest page if necessary
3895  * @vcpu: The corresponding virtual cpu
3896  * @gpa: Guest physical address
3897  * @writable: Whether the page should be writable or not
3898  *
3899  * Make sure that a guest page has been faulted-in on the host.
3900  *
3901  * Return: Zero on success, negative error code otherwise.
3902  */
3903 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3904 {
3905 	return gmap_fault(vcpu->arch.gmap, gpa,
3906 			  writable ? FAULT_FLAG_WRITE : 0);
3907 }
3908 
3909 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3910 				      unsigned long token)
3911 {
3912 	struct kvm_s390_interrupt inti;
3913 	struct kvm_s390_irq irq;
3914 
3915 	if (start_token) {
3916 		irq.u.ext.ext_params2 = token;
3917 		irq.type = KVM_S390_INT_PFAULT_INIT;
3918 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3919 	} else {
3920 		inti.type = KVM_S390_INT_PFAULT_DONE;
3921 		inti.parm64 = token;
3922 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3923 	}
3924 }
3925 
3926 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3927 				     struct kvm_async_pf *work)
3928 {
3929 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3930 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3931 
3932 	return true;
3933 }
3934 
3935 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3936 				 struct kvm_async_pf *work)
3937 {
3938 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3939 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3940 }
3941 
3942 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3943 			       struct kvm_async_pf *work)
3944 {
3945 	/* s390 will always inject the page directly */
3946 }
3947 
3948 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3949 {
3950 	/*
3951 	 * s390 will always inject the page directly,
3952 	 * but we still want check_async_completion to cleanup
3953 	 */
3954 	return true;
3955 }
3956 
3957 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3958 {
3959 	hva_t hva;
3960 	struct kvm_arch_async_pf arch;
3961 	int rc;
3962 
3963 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3964 		return 0;
3965 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3966 	    vcpu->arch.pfault_compare)
3967 		return 0;
3968 	if (psw_extint_disabled(vcpu))
3969 		return 0;
3970 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3971 		return 0;
3972 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3973 		return 0;
3974 	if (!vcpu->arch.gmap->pfault_enabled)
3975 		return 0;
3976 
3977 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3978 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3979 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3980 		return 0;
3981 
3982 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3983 	return rc;
3984 }
3985 
3986 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3987 {
3988 	int rc, cpuflags;
3989 
3990 	/*
3991 	 * On s390 notifications for arriving pages will be delivered directly
3992 	 * to the guest but the house keeping for completed pfaults is
3993 	 * handled outside the worker.
3994 	 */
3995 	kvm_check_async_pf_completion(vcpu);
3996 
3997 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3998 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3999 
4000 	if (need_resched())
4001 		schedule();
4002 
4003 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4004 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4005 		if (rc)
4006 			return rc;
4007 	}
4008 
4009 	rc = kvm_s390_handle_requests(vcpu);
4010 	if (rc)
4011 		return rc;
4012 
4013 	if (guestdbg_enabled(vcpu)) {
4014 		kvm_s390_backup_guest_per_regs(vcpu);
4015 		kvm_s390_patch_guest_per_regs(vcpu);
4016 	}
4017 
4018 	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4019 
4020 	vcpu->arch.sie_block->icptcode = 0;
4021 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4022 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4023 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4024 
4025 	return 0;
4026 }
4027 
4028 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4029 {
4030 	struct kvm_s390_pgm_info pgm_info = {
4031 		.code = PGM_ADDRESSING,
4032 	};
4033 	u8 opcode, ilen;
4034 	int rc;
4035 
4036 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4037 	trace_kvm_s390_sie_fault(vcpu);
4038 
4039 	/*
4040 	 * We want to inject an addressing exception, which is defined as a
4041 	 * suppressing or terminating exception. However, since we came here
4042 	 * by a DAT access exception, the PSW still points to the faulting
4043 	 * instruction since DAT exceptions are nullifying. So we've got
4044 	 * to look up the current opcode to get the length of the instruction
4045 	 * to be able to forward the PSW.
4046 	 */
4047 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4048 	ilen = insn_length(opcode);
4049 	if (rc < 0) {
4050 		return rc;
4051 	} else if (rc) {
4052 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4053 		 * Forward by arbitrary ilc, injection will take care of
4054 		 * nullification if necessary.
4055 		 */
4056 		pgm_info = vcpu->arch.pgm;
4057 		ilen = 4;
4058 	}
4059 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4060 	kvm_s390_forward_psw(vcpu, ilen);
4061 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4062 }
4063 
4064 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4065 {
4066 	struct mcck_volatile_info *mcck_info;
4067 	struct sie_page *sie_page;
4068 
4069 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4070 		   vcpu->arch.sie_block->icptcode);
4071 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4072 
4073 	if (guestdbg_enabled(vcpu))
4074 		kvm_s390_restore_guest_per_regs(vcpu);
4075 
4076 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4077 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4078 
4079 	if (exit_reason == -EINTR) {
4080 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4081 		sie_page = container_of(vcpu->arch.sie_block,
4082 					struct sie_page, sie_block);
4083 		mcck_info = &sie_page->mcck_info;
4084 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4085 		return 0;
4086 	}
4087 
4088 	if (vcpu->arch.sie_block->icptcode > 0) {
4089 		int rc = kvm_handle_sie_intercept(vcpu);
4090 
4091 		if (rc != -EOPNOTSUPP)
4092 			return rc;
4093 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4094 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4095 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4096 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4097 		return -EREMOTE;
4098 	} else if (exit_reason != -EFAULT) {
4099 		vcpu->stat.exit_null++;
4100 		return 0;
4101 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4102 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4103 		vcpu->run->s390_ucontrol.trans_exc_code =
4104 						current->thread.gmap_addr;
4105 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4106 		return -EREMOTE;
4107 	} else if (current->thread.gmap_pfault) {
4108 		trace_kvm_s390_major_guest_pfault(vcpu);
4109 		current->thread.gmap_pfault = 0;
4110 		if (kvm_arch_setup_async_pf(vcpu))
4111 			return 0;
4112 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4113 	}
4114 	return vcpu_post_run_fault_in_sie(vcpu);
4115 }
4116 
4117 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4118 static int __vcpu_run(struct kvm_vcpu *vcpu)
4119 {
4120 	int rc, exit_reason;
4121 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4122 
4123 	/*
4124 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4125 	 * ning the guest), so that memslots (and other stuff) are protected
4126 	 */
4127 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4128 
4129 	do {
4130 		rc = vcpu_pre_run(vcpu);
4131 		if (rc)
4132 			break;
4133 
4134 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4135 		/*
4136 		 * As PF_VCPU will be used in fault handler, between
4137 		 * guest_enter and guest_exit should be no uaccess.
4138 		 */
4139 		local_irq_disable();
4140 		guest_enter_irqoff();
4141 		__disable_cpu_timer_accounting(vcpu);
4142 		local_irq_enable();
4143 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4144 			memcpy(sie_page->pv_grregs,
4145 			       vcpu->run->s.regs.gprs,
4146 			       sizeof(sie_page->pv_grregs));
4147 		}
4148 		exit_reason = sie64a(vcpu->arch.sie_block,
4149 				     vcpu->run->s.regs.gprs);
4150 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4151 			memcpy(vcpu->run->s.regs.gprs,
4152 			       sie_page->pv_grregs,
4153 			       sizeof(sie_page->pv_grregs));
4154 			/*
4155 			 * We're not allowed to inject interrupts on intercepts
4156 			 * that leave the guest state in an "in-between" state
4157 			 * where the next SIE entry will do a continuation.
4158 			 * Fence interrupts in our "internal" PSW.
4159 			 */
4160 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4161 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4162 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4163 			}
4164 		}
4165 		local_irq_disable();
4166 		__enable_cpu_timer_accounting(vcpu);
4167 		guest_exit_irqoff();
4168 		local_irq_enable();
4169 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4170 
4171 		rc = vcpu_post_run(vcpu, exit_reason);
4172 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4173 
4174 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4175 	return rc;
4176 }
4177 
4178 static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4179 {
4180 	struct runtime_instr_cb *riccb;
4181 	struct gs_cb *gscb;
4182 
4183 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4184 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4185 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4186 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4187 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4188 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4189 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4190 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4191 	}
4192 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4193 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4194 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4195 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4196 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4197 			kvm_clear_async_pf_completion_queue(vcpu);
4198 	}
4199 	/*
4200 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4201 	 * we should enable RI here instead of doing the lazy enablement.
4202 	 */
4203 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4204 	    test_kvm_facility(vcpu->kvm, 64) &&
4205 	    riccb->v &&
4206 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4207 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4208 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4209 	}
4210 	/*
4211 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4212 	 * we should enable GS here instead of doing the lazy enablement.
4213 	 */
4214 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4215 	    test_kvm_facility(vcpu->kvm, 133) &&
4216 	    gscb->gssm &&
4217 	    !vcpu->arch.gs_enabled) {
4218 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4219 		vcpu->arch.sie_block->ecb |= ECB_GS;
4220 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4221 		vcpu->arch.gs_enabled = 1;
4222 	}
4223 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4224 	    test_kvm_facility(vcpu->kvm, 82)) {
4225 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4226 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4227 	}
4228 	if (MACHINE_HAS_GS) {
4229 		preempt_disable();
4230 		__ctl_set_bit(2, 4);
4231 		if (current->thread.gs_cb) {
4232 			vcpu->arch.host_gscb = current->thread.gs_cb;
4233 			save_gs_cb(vcpu->arch.host_gscb);
4234 		}
4235 		if (vcpu->arch.gs_enabled) {
4236 			current->thread.gs_cb = (struct gs_cb *)
4237 						&vcpu->run->s.regs.gscb;
4238 			restore_gs_cb(current->thread.gs_cb);
4239 		}
4240 		preempt_enable();
4241 	}
4242 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4243 }
4244 
4245 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4246 {
4247 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4248 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4249 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4250 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4251 		/* some control register changes require a tlb flush */
4252 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4253 	}
4254 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4255 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4256 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4257 	}
4258 	save_access_regs(vcpu->arch.host_acrs);
4259 	restore_access_regs(vcpu->run->s.regs.acrs);
4260 	/* save host (userspace) fprs/vrs */
4261 	save_fpu_regs();
4262 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4263 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4264 	if (MACHINE_HAS_VX)
4265 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4266 	else
4267 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4268 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4269 	if (test_fp_ctl(current->thread.fpu.fpc))
4270 		/* User space provided an invalid FPC, let's clear it */
4271 		current->thread.fpu.fpc = 0;
4272 
4273 	/* Sync fmt2 only data */
4274 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4275 		sync_regs_fmt2(vcpu, kvm_run);
4276 	} else {
4277 		/*
4278 		 * In several places we have to modify our internal view to
4279 		 * not do things that are disallowed by the ultravisor. For
4280 		 * example we must not inject interrupts after specific exits
4281 		 * (e.g. 112 prefix page not secure). We do this by turning
4282 		 * off the machine check, external and I/O interrupt bits
4283 		 * of our PSW copy. To avoid getting validity intercepts, we
4284 		 * do only accept the condition code from userspace.
4285 		 */
4286 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4287 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4288 						   PSW_MASK_CC;
4289 	}
4290 
4291 	kvm_run->kvm_dirty_regs = 0;
4292 }
4293 
4294 static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4295 {
4296 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4297 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4298 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4299 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4300 	if (MACHINE_HAS_GS) {
4301 		__ctl_set_bit(2, 4);
4302 		if (vcpu->arch.gs_enabled)
4303 			save_gs_cb(current->thread.gs_cb);
4304 		preempt_disable();
4305 		current->thread.gs_cb = vcpu->arch.host_gscb;
4306 		restore_gs_cb(vcpu->arch.host_gscb);
4307 		preempt_enable();
4308 		if (!vcpu->arch.host_gscb)
4309 			__ctl_clear_bit(2, 4);
4310 		vcpu->arch.host_gscb = NULL;
4311 	}
4312 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4313 }
4314 
4315 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4316 {
4317 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4318 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4319 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4320 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4321 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4322 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4323 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4324 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4325 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4326 	save_access_regs(vcpu->run->s.regs.acrs);
4327 	restore_access_regs(vcpu->arch.host_acrs);
4328 	/* Save guest register state */
4329 	save_fpu_regs();
4330 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4331 	/* Restore will be done lazily at return */
4332 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4333 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4334 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4335 		store_regs_fmt2(vcpu, kvm_run);
4336 }
4337 
4338 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4339 {
4340 	struct kvm_run *kvm_run = vcpu->run;
4341 	int rc;
4342 
4343 	if (kvm_run->immediate_exit)
4344 		return -EINTR;
4345 
4346 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4347 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4348 		return -EINVAL;
4349 
4350 	vcpu_load(vcpu);
4351 
4352 	if (guestdbg_exit_pending(vcpu)) {
4353 		kvm_s390_prepare_debug_exit(vcpu);
4354 		rc = 0;
4355 		goto out;
4356 	}
4357 
4358 	kvm_sigset_activate(vcpu);
4359 
4360 	/*
4361 	 * no need to check the return value of vcpu_start as it can only have
4362 	 * an error for protvirt, but protvirt means user cpu state
4363 	 */
4364 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4365 		kvm_s390_vcpu_start(vcpu);
4366 	} else if (is_vcpu_stopped(vcpu)) {
4367 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4368 				   vcpu->vcpu_id);
4369 		rc = -EINVAL;
4370 		goto out;
4371 	}
4372 
4373 	sync_regs(vcpu, kvm_run);
4374 	enable_cpu_timer_accounting(vcpu);
4375 
4376 	might_fault();
4377 	rc = __vcpu_run(vcpu);
4378 
4379 	if (signal_pending(current) && !rc) {
4380 		kvm_run->exit_reason = KVM_EXIT_INTR;
4381 		rc = -EINTR;
4382 	}
4383 
4384 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4385 		kvm_s390_prepare_debug_exit(vcpu);
4386 		rc = 0;
4387 	}
4388 
4389 	if (rc == -EREMOTE) {
4390 		/* userspace support is needed, kvm_run has been prepared */
4391 		rc = 0;
4392 	}
4393 
4394 	disable_cpu_timer_accounting(vcpu);
4395 	store_regs(vcpu, kvm_run);
4396 
4397 	kvm_sigset_deactivate(vcpu);
4398 
4399 	vcpu->stat.exit_userspace++;
4400 out:
4401 	vcpu_put(vcpu);
4402 	return rc;
4403 }
4404 
4405 /*
4406  * store status at address
4407  * we use have two special cases:
4408  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4409  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4410  */
4411 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4412 {
4413 	unsigned char archmode = 1;
4414 	freg_t fprs[NUM_FPRS];
4415 	unsigned int px;
4416 	u64 clkcomp, cputm;
4417 	int rc;
4418 
4419 	px = kvm_s390_get_prefix(vcpu);
4420 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4421 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4422 			return -EFAULT;
4423 		gpa = 0;
4424 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4425 		if (write_guest_real(vcpu, 163, &archmode, 1))
4426 			return -EFAULT;
4427 		gpa = px;
4428 	} else
4429 		gpa -= __LC_FPREGS_SAVE_AREA;
4430 
4431 	/* manually convert vector registers if necessary */
4432 	if (MACHINE_HAS_VX) {
4433 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4434 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4435 				     fprs, 128);
4436 	} else {
4437 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4438 				     vcpu->run->s.regs.fprs, 128);
4439 	}
4440 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4441 			      vcpu->run->s.regs.gprs, 128);
4442 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4443 			      &vcpu->arch.sie_block->gpsw, 16);
4444 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4445 			      &px, 4);
4446 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4447 			      &vcpu->run->s.regs.fpc, 4);
4448 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4449 			      &vcpu->arch.sie_block->todpr, 4);
4450 	cputm = kvm_s390_get_cpu_timer(vcpu);
4451 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4452 			      &cputm, 8);
4453 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4454 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4455 			      &clkcomp, 8);
4456 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4457 			      &vcpu->run->s.regs.acrs, 64);
4458 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4459 			      &vcpu->arch.sie_block->gcr, 128);
4460 	return rc ? -EFAULT : 0;
4461 }
4462 
4463 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4464 {
4465 	/*
4466 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4467 	 * switch in the run ioctl. Let's update our copies before we save
4468 	 * it into the save area
4469 	 */
4470 	save_fpu_regs();
4471 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4472 	save_access_regs(vcpu->run->s.regs.acrs);
4473 
4474 	return kvm_s390_store_status_unloaded(vcpu, addr);
4475 }
4476 
4477 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4478 {
4479 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4480 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4481 }
4482 
4483 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4484 {
4485 	unsigned int i;
4486 	struct kvm_vcpu *vcpu;
4487 
4488 	kvm_for_each_vcpu(i, vcpu, kvm) {
4489 		__disable_ibs_on_vcpu(vcpu);
4490 	}
4491 }
4492 
4493 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4494 {
4495 	if (!sclp.has_ibs)
4496 		return;
4497 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4498 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4499 }
4500 
4501 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4502 {
4503 	int i, online_vcpus, r = 0, started_vcpus = 0;
4504 
4505 	if (!is_vcpu_stopped(vcpu))
4506 		return 0;
4507 
4508 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4509 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4510 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4511 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4512 
4513 	/* Let's tell the UV that we want to change into the operating state */
4514 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4515 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4516 		if (r) {
4517 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4518 			return r;
4519 		}
4520 	}
4521 
4522 	for (i = 0; i < online_vcpus; i++) {
4523 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4524 			started_vcpus++;
4525 	}
4526 
4527 	if (started_vcpus == 0) {
4528 		/* we're the only active VCPU -> speed it up */
4529 		__enable_ibs_on_vcpu(vcpu);
4530 	} else if (started_vcpus == 1) {
4531 		/*
4532 		 * As we are starting a second VCPU, we have to disable
4533 		 * the IBS facility on all VCPUs to remove potentially
4534 		 * oustanding ENABLE requests.
4535 		 */
4536 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4537 	}
4538 
4539 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4540 	/*
4541 	 * The real PSW might have changed due to a RESTART interpreted by the
4542 	 * ultravisor. We block all interrupts and let the next sie exit
4543 	 * refresh our view.
4544 	 */
4545 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4546 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4547 	/*
4548 	 * Another VCPU might have used IBS while we were offline.
4549 	 * Let's play safe and flush the VCPU at startup.
4550 	 */
4551 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4552 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4553 	return 0;
4554 }
4555 
4556 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4557 {
4558 	int i, online_vcpus, r = 0, started_vcpus = 0;
4559 	struct kvm_vcpu *started_vcpu = NULL;
4560 
4561 	if (is_vcpu_stopped(vcpu))
4562 		return 0;
4563 
4564 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4565 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4566 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4567 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4568 
4569 	/* Let's tell the UV that we want to change into the stopped state */
4570 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4571 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4572 		if (r) {
4573 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4574 			return r;
4575 		}
4576 	}
4577 
4578 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4579 	kvm_s390_clear_stop_irq(vcpu);
4580 
4581 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4582 	__disable_ibs_on_vcpu(vcpu);
4583 
4584 	for (i = 0; i < online_vcpus; i++) {
4585 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4586 			started_vcpus++;
4587 			started_vcpu = vcpu->kvm->vcpus[i];
4588 		}
4589 	}
4590 
4591 	if (started_vcpus == 1) {
4592 		/*
4593 		 * As we only have one VCPU left, we want to enable the
4594 		 * IBS facility for that VCPU to speed it up.
4595 		 */
4596 		__enable_ibs_on_vcpu(started_vcpu);
4597 	}
4598 
4599 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4600 	return 0;
4601 }
4602 
4603 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4604 				     struct kvm_enable_cap *cap)
4605 {
4606 	int r;
4607 
4608 	if (cap->flags)
4609 		return -EINVAL;
4610 
4611 	switch (cap->cap) {
4612 	case KVM_CAP_S390_CSS_SUPPORT:
4613 		if (!vcpu->kvm->arch.css_support) {
4614 			vcpu->kvm->arch.css_support = 1;
4615 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4616 			trace_kvm_s390_enable_css(vcpu->kvm);
4617 		}
4618 		r = 0;
4619 		break;
4620 	default:
4621 		r = -EINVAL;
4622 		break;
4623 	}
4624 	return r;
4625 }
4626 
4627 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4628 				   struct kvm_s390_mem_op *mop)
4629 {
4630 	void __user *uaddr = (void __user *)mop->buf;
4631 	int r = 0;
4632 
4633 	if (mop->flags || !mop->size)
4634 		return -EINVAL;
4635 	if (mop->size + mop->sida_offset < mop->size)
4636 		return -EINVAL;
4637 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4638 		return -E2BIG;
4639 
4640 	switch (mop->op) {
4641 	case KVM_S390_MEMOP_SIDA_READ:
4642 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4643 				 mop->sida_offset), mop->size))
4644 			r = -EFAULT;
4645 
4646 		break;
4647 	case KVM_S390_MEMOP_SIDA_WRITE:
4648 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4649 				   mop->sida_offset), uaddr, mop->size))
4650 			r = -EFAULT;
4651 		break;
4652 	}
4653 	return r;
4654 }
4655 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4656 				  struct kvm_s390_mem_op *mop)
4657 {
4658 	void __user *uaddr = (void __user *)mop->buf;
4659 	void *tmpbuf = NULL;
4660 	int r = 0;
4661 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4662 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4663 
4664 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4665 		return -EINVAL;
4666 
4667 	if (mop->size > MEM_OP_MAX_SIZE)
4668 		return -E2BIG;
4669 
4670 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4671 		return -EINVAL;
4672 
4673 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4674 		tmpbuf = vmalloc(mop->size);
4675 		if (!tmpbuf)
4676 			return -ENOMEM;
4677 	}
4678 
4679 	switch (mop->op) {
4680 	case KVM_S390_MEMOP_LOGICAL_READ:
4681 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4682 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4683 					    mop->size, GACC_FETCH);
4684 			break;
4685 		}
4686 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4687 		if (r == 0) {
4688 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4689 				r = -EFAULT;
4690 		}
4691 		break;
4692 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4693 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4694 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4695 					    mop->size, GACC_STORE);
4696 			break;
4697 		}
4698 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4699 			r = -EFAULT;
4700 			break;
4701 		}
4702 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4703 		break;
4704 	}
4705 
4706 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4707 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4708 
4709 	vfree(tmpbuf);
4710 	return r;
4711 }
4712 
4713 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4714 				      struct kvm_s390_mem_op *mop)
4715 {
4716 	int r, srcu_idx;
4717 
4718 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4719 
4720 	switch (mop->op) {
4721 	case KVM_S390_MEMOP_LOGICAL_READ:
4722 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4723 		r = kvm_s390_guest_mem_op(vcpu, mop);
4724 		break;
4725 	case KVM_S390_MEMOP_SIDA_READ:
4726 	case KVM_S390_MEMOP_SIDA_WRITE:
4727 		/* we are locked against sida going away by the vcpu->mutex */
4728 		r = kvm_s390_guest_sida_op(vcpu, mop);
4729 		break;
4730 	default:
4731 		r = -EINVAL;
4732 	}
4733 
4734 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4735 	return r;
4736 }
4737 
4738 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4739 			       unsigned int ioctl, unsigned long arg)
4740 {
4741 	struct kvm_vcpu *vcpu = filp->private_data;
4742 	void __user *argp = (void __user *)arg;
4743 
4744 	switch (ioctl) {
4745 	case KVM_S390_IRQ: {
4746 		struct kvm_s390_irq s390irq;
4747 
4748 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4749 			return -EFAULT;
4750 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4751 	}
4752 	case KVM_S390_INTERRUPT: {
4753 		struct kvm_s390_interrupt s390int;
4754 		struct kvm_s390_irq s390irq = {};
4755 
4756 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4757 			return -EFAULT;
4758 		if (s390int_to_s390irq(&s390int, &s390irq))
4759 			return -EINVAL;
4760 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4761 	}
4762 	}
4763 	return -ENOIOCTLCMD;
4764 }
4765 
4766 long kvm_arch_vcpu_ioctl(struct file *filp,
4767 			 unsigned int ioctl, unsigned long arg)
4768 {
4769 	struct kvm_vcpu *vcpu = filp->private_data;
4770 	void __user *argp = (void __user *)arg;
4771 	int idx;
4772 	long r;
4773 	u16 rc, rrc;
4774 
4775 	vcpu_load(vcpu);
4776 
4777 	switch (ioctl) {
4778 	case KVM_S390_STORE_STATUS:
4779 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4780 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4781 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4782 		break;
4783 	case KVM_S390_SET_INITIAL_PSW: {
4784 		psw_t psw;
4785 
4786 		r = -EFAULT;
4787 		if (copy_from_user(&psw, argp, sizeof(psw)))
4788 			break;
4789 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4790 		break;
4791 	}
4792 	case KVM_S390_CLEAR_RESET:
4793 		r = 0;
4794 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4795 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4796 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4797 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4798 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4799 				   rc, rrc);
4800 		}
4801 		break;
4802 	case KVM_S390_INITIAL_RESET:
4803 		r = 0;
4804 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4805 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4806 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4807 					  UVC_CMD_CPU_RESET_INITIAL,
4808 					  &rc, &rrc);
4809 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4810 				   rc, rrc);
4811 		}
4812 		break;
4813 	case KVM_S390_NORMAL_RESET:
4814 		r = 0;
4815 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4816 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4817 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4818 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4819 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4820 				   rc, rrc);
4821 		}
4822 		break;
4823 	case KVM_SET_ONE_REG:
4824 	case KVM_GET_ONE_REG: {
4825 		struct kvm_one_reg reg;
4826 		r = -EINVAL;
4827 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4828 			break;
4829 		r = -EFAULT;
4830 		if (copy_from_user(&reg, argp, sizeof(reg)))
4831 			break;
4832 		if (ioctl == KVM_SET_ONE_REG)
4833 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4834 		else
4835 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4836 		break;
4837 	}
4838 #ifdef CONFIG_KVM_S390_UCONTROL
4839 	case KVM_S390_UCAS_MAP: {
4840 		struct kvm_s390_ucas_mapping ucasmap;
4841 
4842 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4843 			r = -EFAULT;
4844 			break;
4845 		}
4846 
4847 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4848 			r = -EINVAL;
4849 			break;
4850 		}
4851 
4852 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4853 				     ucasmap.vcpu_addr, ucasmap.length);
4854 		break;
4855 	}
4856 	case KVM_S390_UCAS_UNMAP: {
4857 		struct kvm_s390_ucas_mapping ucasmap;
4858 
4859 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4860 			r = -EFAULT;
4861 			break;
4862 		}
4863 
4864 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4865 			r = -EINVAL;
4866 			break;
4867 		}
4868 
4869 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4870 			ucasmap.length);
4871 		break;
4872 	}
4873 #endif
4874 	case KVM_S390_VCPU_FAULT: {
4875 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4876 		break;
4877 	}
4878 	case KVM_ENABLE_CAP:
4879 	{
4880 		struct kvm_enable_cap cap;
4881 		r = -EFAULT;
4882 		if (copy_from_user(&cap, argp, sizeof(cap)))
4883 			break;
4884 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4885 		break;
4886 	}
4887 	case KVM_S390_MEM_OP: {
4888 		struct kvm_s390_mem_op mem_op;
4889 
4890 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4891 			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4892 		else
4893 			r = -EFAULT;
4894 		break;
4895 	}
4896 	case KVM_S390_SET_IRQ_STATE: {
4897 		struct kvm_s390_irq_state irq_state;
4898 
4899 		r = -EFAULT;
4900 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4901 			break;
4902 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4903 		    irq_state.len == 0 ||
4904 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4905 			r = -EINVAL;
4906 			break;
4907 		}
4908 		/* do not use irq_state.flags, it will break old QEMUs */
4909 		r = kvm_s390_set_irq_state(vcpu,
4910 					   (void __user *) irq_state.buf,
4911 					   irq_state.len);
4912 		break;
4913 	}
4914 	case KVM_S390_GET_IRQ_STATE: {
4915 		struct kvm_s390_irq_state irq_state;
4916 
4917 		r = -EFAULT;
4918 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4919 			break;
4920 		if (irq_state.len == 0) {
4921 			r = -EINVAL;
4922 			break;
4923 		}
4924 		/* do not use irq_state.flags, it will break old QEMUs */
4925 		r = kvm_s390_get_irq_state(vcpu,
4926 					   (__u8 __user *)  irq_state.buf,
4927 					   irq_state.len);
4928 		break;
4929 	}
4930 	default:
4931 		r = -ENOTTY;
4932 	}
4933 
4934 	vcpu_put(vcpu);
4935 	return r;
4936 }
4937 
4938 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4939 {
4940 #ifdef CONFIG_KVM_S390_UCONTROL
4941 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4942 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4943 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4944 		get_page(vmf->page);
4945 		return 0;
4946 	}
4947 #endif
4948 	return VM_FAULT_SIGBUS;
4949 }
4950 
4951 /* Section: memory related */
4952 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4953 				   struct kvm_memory_slot *memslot,
4954 				   const struct kvm_userspace_memory_region *mem,
4955 				   enum kvm_mr_change change)
4956 {
4957 	/* A few sanity checks. We can have memory slots which have to be
4958 	   located/ended at a segment boundary (1MB). The memory in userland is
4959 	   ok to be fragmented into various different vmas. It is okay to mmap()
4960 	   and munmap() stuff in this slot after doing this call at any time */
4961 
4962 	if (mem->userspace_addr & 0xffffful)
4963 		return -EINVAL;
4964 
4965 	if (mem->memory_size & 0xffffful)
4966 		return -EINVAL;
4967 
4968 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4969 		return -EINVAL;
4970 
4971 	/* When we are protected, we should not change the memory slots */
4972 	if (kvm_s390_pv_get_handle(kvm))
4973 		return -EINVAL;
4974 	return 0;
4975 }
4976 
4977 void kvm_arch_commit_memory_region(struct kvm *kvm,
4978 				const struct kvm_userspace_memory_region *mem,
4979 				struct kvm_memory_slot *old,
4980 				const struct kvm_memory_slot *new,
4981 				enum kvm_mr_change change)
4982 {
4983 	int rc = 0;
4984 
4985 	switch (change) {
4986 	case KVM_MR_DELETE:
4987 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4988 					old->npages * PAGE_SIZE);
4989 		break;
4990 	case KVM_MR_MOVE:
4991 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4992 					old->npages * PAGE_SIZE);
4993 		if (rc)
4994 			break;
4995 		fallthrough;
4996 	case KVM_MR_CREATE:
4997 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4998 				      mem->guest_phys_addr, mem->memory_size);
4999 		break;
5000 	case KVM_MR_FLAGS_ONLY:
5001 		break;
5002 	default:
5003 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5004 	}
5005 	if (rc)
5006 		pr_warn("failed to commit memory region\n");
5007 	return;
5008 }
5009 
5010 static inline unsigned long nonhyp_mask(int i)
5011 {
5012 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5013 
5014 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5015 }
5016 
5017 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5018 {
5019 	vcpu->valid_wakeup = false;
5020 }
5021 
5022 static int __init kvm_s390_init(void)
5023 {
5024 	int i;
5025 
5026 	if (!sclp.has_sief2) {
5027 		pr_info("SIE is not available\n");
5028 		return -ENODEV;
5029 	}
5030 
5031 	if (nested && hpage) {
5032 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5033 		return -EINVAL;
5034 	}
5035 
5036 	for (i = 0; i < 16; i++)
5037 		kvm_s390_fac_base[i] |=
5038 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5039 
5040 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5041 }
5042 
5043 static void __exit kvm_s390_exit(void)
5044 {
5045 	kvm_exit();
5046 }
5047 
5048 module_init(kvm_s390_init);
5049 module_exit(kvm_s390_exit);
5050 
5051 /*
5052  * Enable autoloading of the kvm module.
5053  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5054  * since x86 takes a different approach.
5055  */
5056 #include <linux/miscdevice.h>
5057 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5058 MODULE_ALIAS("devname:kvm");
5059