xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision bdaedca74d6293b6ac643a8ebe8231b52bf1171b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51 
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55 
56 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
60 
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 	VCPU_STAT("userspace_handled", exit_userspace),
63 	VCPU_STAT("exit_null", exit_null),
64 	VCPU_STAT("pfault_sync", pfault_sync),
65 	VCPU_STAT("exit_validity", exit_validity),
66 	VCPU_STAT("exit_stop_request", exit_stop_request),
67 	VCPU_STAT("exit_external_request", exit_external_request),
68 	VCPU_STAT("exit_io_request", exit_io_request),
69 	VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
70 	VCPU_STAT("exit_instruction", exit_instruction),
71 	VCPU_STAT("exit_pei", exit_pei),
72 	VCPU_STAT("exit_program_interruption", exit_program_interruption),
73 	VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
74 	VCPU_STAT("exit_operation_exception", exit_operation_exception),
75 	VCPU_STAT("halt_successful_poll", halt_successful_poll),
76 	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
77 	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
78 	VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
79 	VCPU_STAT("halt_wakeup", halt_wakeup),
80 	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
81 	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
82 	VCPU_STAT("instruction_lctlg", instruction_lctlg),
83 	VCPU_STAT("instruction_lctl", instruction_lctl),
84 	VCPU_STAT("instruction_stctl", instruction_stctl),
85 	VCPU_STAT("instruction_stctg", instruction_stctg),
86 	VCPU_STAT("deliver_ckc", deliver_ckc),
87 	VCPU_STAT("deliver_cputm", deliver_cputm),
88 	VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
89 	VCPU_STAT("deliver_external_call", deliver_external_call),
90 	VCPU_STAT("deliver_service_signal", deliver_service_signal),
91 	VCPU_STAT("deliver_virtio", deliver_virtio),
92 	VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
93 	VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
94 	VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
95 	VCPU_STAT("deliver_program", deliver_program),
96 	VCPU_STAT("deliver_io", deliver_io),
97 	VCPU_STAT("deliver_machine_check", deliver_machine_check),
98 	VCPU_STAT("exit_wait_state", exit_wait_state),
99 	VCPU_STAT("inject_ckc", inject_ckc),
100 	VCPU_STAT("inject_cputm", inject_cputm),
101 	VCPU_STAT("inject_external_call", inject_external_call),
102 	VM_STAT("inject_float_mchk", inject_float_mchk),
103 	VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
104 	VM_STAT("inject_io", inject_io),
105 	VCPU_STAT("inject_mchk", inject_mchk),
106 	VM_STAT("inject_pfault_done", inject_pfault_done),
107 	VCPU_STAT("inject_program", inject_program),
108 	VCPU_STAT("inject_restart", inject_restart),
109 	VM_STAT("inject_service_signal", inject_service_signal),
110 	VCPU_STAT("inject_set_prefix", inject_set_prefix),
111 	VCPU_STAT("inject_stop_signal", inject_stop_signal),
112 	VCPU_STAT("inject_pfault_init", inject_pfault_init),
113 	VM_STAT("inject_virtio", inject_virtio),
114 	VCPU_STAT("instruction_epsw", instruction_epsw),
115 	VCPU_STAT("instruction_gs", instruction_gs),
116 	VCPU_STAT("instruction_io_other", instruction_io_other),
117 	VCPU_STAT("instruction_lpsw", instruction_lpsw),
118 	VCPU_STAT("instruction_lpswe", instruction_lpswe),
119 	VCPU_STAT("instruction_pfmf", instruction_pfmf),
120 	VCPU_STAT("instruction_ptff", instruction_ptff),
121 	VCPU_STAT("instruction_stidp", instruction_stidp),
122 	VCPU_STAT("instruction_sck", instruction_sck),
123 	VCPU_STAT("instruction_sckpf", instruction_sckpf),
124 	VCPU_STAT("instruction_spx", instruction_spx),
125 	VCPU_STAT("instruction_stpx", instruction_stpx),
126 	VCPU_STAT("instruction_stap", instruction_stap),
127 	VCPU_STAT("instruction_iske", instruction_iske),
128 	VCPU_STAT("instruction_ri", instruction_ri),
129 	VCPU_STAT("instruction_rrbe", instruction_rrbe),
130 	VCPU_STAT("instruction_sske", instruction_sske),
131 	VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
132 	VCPU_STAT("instruction_essa", instruction_essa),
133 	VCPU_STAT("instruction_stsi", instruction_stsi),
134 	VCPU_STAT("instruction_stfl", instruction_stfl),
135 	VCPU_STAT("instruction_tb", instruction_tb),
136 	VCPU_STAT("instruction_tpi", instruction_tpi),
137 	VCPU_STAT("instruction_tprot", instruction_tprot),
138 	VCPU_STAT("instruction_tsch", instruction_tsch),
139 	VCPU_STAT("instruction_sthyi", instruction_sthyi),
140 	VCPU_STAT("instruction_sie", instruction_sie),
141 	VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
142 	VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
143 	VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
144 	VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
145 	VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
146 	VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
147 	VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
148 	VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
149 	VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
150 	VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
151 	VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
152 	VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
153 	VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
154 	VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
155 	VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
156 	VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
157 	VCPU_STAT("instruction_diag_10", diagnose_10),
158 	VCPU_STAT("instruction_diag_44", diagnose_44),
159 	VCPU_STAT("instruction_diag_9c", diagnose_9c),
160 	VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
161 	VCPU_STAT("diag_9c_forward", diagnose_9c_forward),
162 	VCPU_STAT("instruction_diag_258", diagnose_258),
163 	VCPU_STAT("instruction_diag_308", diagnose_308),
164 	VCPU_STAT("instruction_diag_500", diagnose_500),
165 	VCPU_STAT("instruction_diag_other", diagnose_other),
166 	{ NULL }
167 };
168 
169 /* allow nested virtualization in KVM (if enabled by user space) */
170 static int nested;
171 module_param(nested, int, S_IRUGO);
172 MODULE_PARM_DESC(nested, "Nested virtualization support");
173 
174 /* allow 1m huge page guest backing, if !nested */
175 static int hpage;
176 module_param(hpage, int, 0444);
177 MODULE_PARM_DESC(hpage, "1m huge page backing support");
178 
179 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
180 static u8 halt_poll_max_steal = 10;
181 module_param(halt_poll_max_steal, byte, 0644);
182 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
183 
184 /* if set to true, the GISA will be initialized and used if available */
185 static bool use_gisa  = true;
186 module_param(use_gisa, bool, 0644);
187 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
188 
189 /* maximum diag9c forwarding per second */
190 unsigned int diag9c_forwarding_hz;
191 module_param(diag9c_forwarding_hz, uint, 0644);
192 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
193 
194 /*
195  * For now we handle at most 16 double words as this is what the s390 base
196  * kernel handles and stores in the prefix page. If we ever need to go beyond
197  * this, this requires changes to code, but the external uapi can stay.
198  */
199 #define SIZE_INTERNAL 16
200 
201 /*
202  * Base feature mask that defines default mask for facilities. Consists of the
203  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
204  */
205 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
206 /*
207  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
208  * and defines the facilities that can be enabled via a cpu model.
209  */
210 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
211 
212 static unsigned long kvm_s390_fac_size(void)
213 {
214 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
215 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
216 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
217 		sizeof(S390_lowcore.stfle_fac_list));
218 
219 	return SIZE_INTERNAL;
220 }
221 
222 /* available cpu features supported by kvm */
223 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
224 /* available subfunctions indicated via query / "test bit" */
225 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
226 
227 static struct gmap_notifier gmap_notifier;
228 static struct gmap_notifier vsie_gmap_notifier;
229 debug_info_t *kvm_s390_dbf;
230 debug_info_t *kvm_s390_dbf_uv;
231 
232 /* Section: not file related */
233 int kvm_arch_hardware_enable(void)
234 {
235 	/* every s390 is virtualization enabled ;-) */
236 	return 0;
237 }
238 
239 int kvm_arch_check_processor_compat(void *opaque)
240 {
241 	return 0;
242 }
243 
244 /* forward declarations */
245 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
246 			      unsigned long end);
247 static int sca_switch_to_extended(struct kvm *kvm);
248 
249 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
250 {
251 	u8 delta_idx = 0;
252 
253 	/*
254 	 * The TOD jumps by delta, we have to compensate this by adding
255 	 * -delta to the epoch.
256 	 */
257 	delta = -delta;
258 
259 	/* sign-extension - we're adding to signed values below */
260 	if ((s64)delta < 0)
261 		delta_idx = -1;
262 
263 	scb->epoch += delta;
264 	if (scb->ecd & ECD_MEF) {
265 		scb->epdx += delta_idx;
266 		if (scb->epoch < delta)
267 			scb->epdx += 1;
268 	}
269 }
270 
271 /*
272  * This callback is executed during stop_machine(). All CPUs are therefore
273  * temporarily stopped. In order not to change guest behavior, we have to
274  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
275  * so a CPU won't be stopped while calculating with the epoch.
276  */
277 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
278 			  void *v)
279 {
280 	struct kvm *kvm;
281 	struct kvm_vcpu *vcpu;
282 	int i;
283 	unsigned long long *delta = v;
284 
285 	list_for_each_entry(kvm, &vm_list, vm_list) {
286 		kvm_for_each_vcpu(i, vcpu, kvm) {
287 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
288 			if (i == 0) {
289 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
290 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
291 			}
292 			if (vcpu->arch.cputm_enabled)
293 				vcpu->arch.cputm_start += *delta;
294 			if (vcpu->arch.vsie_block)
295 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
296 						   *delta);
297 		}
298 	}
299 	return NOTIFY_OK;
300 }
301 
302 static struct notifier_block kvm_clock_notifier = {
303 	.notifier_call = kvm_clock_sync,
304 };
305 
306 int kvm_arch_hardware_setup(void *opaque)
307 {
308 	gmap_notifier.notifier_call = kvm_gmap_notifier;
309 	gmap_register_pte_notifier(&gmap_notifier);
310 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
311 	gmap_register_pte_notifier(&vsie_gmap_notifier);
312 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
313 				       &kvm_clock_notifier);
314 	return 0;
315 }
316 
317 void kvm_arch_hardware_unsetup(void)
318 {
319 	gmap_unregister_pte_notifier(&gmap_notifier);
320 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
321 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
322 					 &kvm_clock_notifier);
323 }
324 
325 static void allow_cpu_feat(unsigned long nr)
326 {
327 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
328 }
329 
330 static inline int plo_test_bit(unsigned char nr)
331 {
332 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
333 	int cc;
334 
335 	asm volatile(
336 		/* Parameter registers are ignored for "test bit" */
337 		"	plo	0,0,0,0(0)\n"
338 		"	ipm	%0\n"
339 		"	srl	%0,28\n"
340 		: "=d" (cc)
341 		: "d" (r0)
342 		: "cc");
343 	return cc == 0;
344 }
345 
346 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
347 {
348 	register unsigned long r0 asm("0") = 0;	/* query function */
349 	register unsigned long r1 asm("1") = (unsigned long) query;
350 
351 	asm volatile(
352 		/* Parameter regs are ignored */
353 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
354 		:
355 		: "d" (r0), "a" (r1), [opc] "i" (opcode)
356 		: "cc", "memory");
357 }
358 
359 #define INSN_SORTL 0xb938
360 #define INSN_DFLTCC 0xb939
361 
362 static void kvm_s390_cpu_feat_init(void)
363 {
364 	int i;
365 
366 	for (i = 0; i < 256; ++i) {
367 		if (plo_test_bit(i))
368 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
369 	}
370 
371 	if (test_facility(28)) /* TOD-clock steering */
372 		ptff(kvm_s390_available_subfunc.ptff,
373 		     sizeof(kvm_s390_available_subfunc.ptff),
374 		     PTFF_QAF);
375 
376 	if (test_facility(17)) { /* MSA */
377 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
378 			      kvm_s390_available_subfunc.kmac);
379 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
380 			      kvm_s390_available_subfunc.kmc);
381 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
382 			      kvm_s390_available_subfunc.km);
383 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
384 			      kvm_s390_available_subfunc.kimd);
385 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
386 			      kvm_s390_available_subfunc.klmd);
387 	}
388 	if (test_facility(76)) /* MSA3 */
389 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
390 			      kvm_s390_available_subfunc.pckmo);
391 	if (test_facility(77)) { /* MSA4 */
392 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
393 			      kvm_s390_available_subfunc.kmctr);
394 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
395 			      kvm_s390_available_subfunc.kmf);
396 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
397 			      kvm_s390_available_subfunc.kmo);
398 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
399 			      kvm_s390_available_subfunc.pcc);
400 	}
401 	if (test_facility(57)) /* MSA5 */
402 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
403 			      kvm_s390_available_subfunc.ppno);
404 
405 	if (test_facility(146)) /* MSA8 */
406 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
407 			      kvm_s390_available_subfunc.kma);
408 
409 	if (test_facility(155)) /* MSA9 */
410 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
411 			      kvm_s390_available_subfunc.kdsa);
412 
413 	if (test_facility(150)) /* SORTL */
414 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
415 
416 	if (test_facility(151)) /* DFLTCC */
417 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
418 
419 	if (MACHINE_HAS_ESOP)
420 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
421 	/*
422 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
423 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
424 	 */
425 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
426 	    !test_facility(3) || !nested)
427 		return;
428 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
429 	if (sclp.has_64bscao)
430 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
431 	if (sclp.has_siif)
432 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
433 	if (sclp.has_gpere)
434 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
435 	if (sclp.has_gsls)
436 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
437 	if (sclp.has_ib)
438 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
439 	if (sclp.has_cei)
440 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
441 	if (sclp.has_ibs)
442 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
443 	if (sclp.has_kss)
444 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
445 	/*
446 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
447 	 * all skey handling functions read/set the skey from the PGSTE
448 	 * instead of the real storage key.
449 	 *
450 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
451 	 * pages being detected as preserved although they are resident.
452 	 *
453 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
454 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
455 	 *
456 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
457 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
458 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
459 	 *
460 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
461 	 * cannot easily shadow the SCA because of the ipte lock.
462 	 */
463 }
464 
465 int kvm_arch_init(void *opaque)
466 {
467 	int rc = -ENOMEM;
468 
469 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
470 	if (!kvm_s390_dbf)
471 		return -ENOMEM;
472 
473 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
474 	if (!kvm_s390_dbf_uv)
475 		goto out;
476 
477 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
478 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
479 		goto out;
480 
481 	kvm_s390_cpu_feat_init();
482 
483 	/* Register floating interrupt controller interface. */
484 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
485 	if (rc) {
486 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
487 		goto out;
488 	}
489 
490 	rc = kvm_s390_gib_init(GAL_ISC);
491 	if (rc)
492 		goto out;
493 
494 	return 0;
495 
496 out:
497 	kvm_arch_exit();
498 	return rc;
499 }
500 
501 void kvm_arch_exit(void)
502 {
503 	kvm_s390_gib_destroy();
504 	debug_unregister(kvm_s390_dbf);
505 	debug_unregister(kvm_s390_dbf_uv);
506 }
507 
508 /* Section: device related */
509 long kvm_arch_dev_ioctl(struct file *filp,
510 			unsigned int ioctl, unsigned long arg)
511 {
512 	if (ioctl == KVM_S390_ENABLE_SIE)
513 		return s390_enable_sie();
514 	return -EINVAL;
515 }
516 
517 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
518 {
519 	int r;
520 
521 	switch (ext) {
522 	case KVM_CAP_S390_PSW:
523 	case KVM_CAP_S390_GMAP:
524 	case KVM_CAP_SYNC_MMU:
525 #ifdef CONFIG_KVM_S390_UCONTROL
526 	case KVM_CAP_S390_UCONTROL:
527 #endif
528 	case KVM_CAP_ASYNC_PF:
529 	case KVM_CAP_SYNC_REGS:
530 	case KVM_CAP_ONE_REG:
531 	case KVM_CAP_ENABLE_CAP:
532 	case KVM_CAP_S390_CSS_SUPPORT:
533 	case KVM_CAP_IOEVENTFD:
534 	case KVM_CAP_DEVICE_CTRL:
535 	case KVM_CAP_S390_IRQCHIP:
536 	case KVM_CAP_VM_ATTRIBUTES:
537 	case KVM_CAP_MP_STATE:
538 	case KVM_CAP_IMMEDIATE_EXIT:
539 	case KVM_CAP_S390_INJECT_IRQ:
540 	case KVM_CAP_S390_USER_SIGP:
541 	case KVM_CAP_S390_USER_STSI:
542 	case KVM_CAP_S390_SKEYS:
543 	case KVM_CAP_S390_IRQ_STATE:
544 	case KVM_CAP_S390_USER_INSTR0:
545 	case KVM_CAP_S390_CMMA_MIGRATION:
546 	case KVM_CAP_S390_AIS:
547 	case KVM_CAP_S390_AIS_MIGRATION:
548 	case KVM_CAP_S390_VCPU_RESETS:
549 	case KVM_CAP_SET_GUEST_DEBUG:
550 	case KVM_CAP_S390_DIAG318:
551 		r = 1;
552 		break;
553 	case KVM_CAP_SET_GUEST_DEBUG2:
554 		r = KVM_GUESTDBG_VALID_MASK;
555 		break;
556 	case KVM_CAP_S390_HPAGE_1M:
557 		r = 0;
558 		if (hpage && !kvm_is_ucontrol(kvm))
559 			r = 1;
560 		break;
561 	case KVM_CAP_S390_MEM_OP:
562 		r = MEM_OP_MAX_SIZE;
563 		break;
564 	case KVM_CAP_NR_VCPUS:
565 	case KVM_CAP_MAX_VCPUS:
566 	case KVM_CAP_MAX_VCPU_ID:
567 		r = KVM_S390_BSCA_CPU_SLOTS;
568 		if (!kvm_s390_use_sca_entries())
569 			r = KVM_MAX_VCPUS;
570 		else if (sclp.has_esca && sclp.has_64bscao)
571 			r = KVM_S390_ESCA_CPU_SLOTS;
572 		break;
573 	case KVM_CAP_S390_COW:
574 		r = MACHINE_HAS_ESOP;
575 		break;
576 	case KVM_CAP_S390_VECTOR_REGISTERS:
577 		r = MACHINE_HAS_VX;
578 		break;
579 	case KVM_CAP_S390_RI:
580 		r = test_facility(64);
581 		break;
582 	case KVM_CAP_S390_GS:
583 		r = test_facility(133);
584 		break;
585 	case KVM_CAP_S390_BPB:
586 		r = test_facility(82);
587 		break;
588 	case KVM_CAP_S390_PROTECTED:
589 		r = is_prot_virt_host();
590 		break;
591 	default:
592 		r = 0;
593 	}
594 	return r;
595 }
596 
597 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
598 {
599 	int i;
600 	gfn_t cur_gfn, last_gfn;
601 	unsigned long gaddr, vmaddr;
602 	struct gmap *gmap = kvm->arch.gmap;
603 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
604 
605 	/* Loop over all guest segments */
606 	cur_gfn = memslot->base_gfn;
607 	last_gfn = memslot->base_gfn + memslot->npages;
608 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
609 		gaddr = gfn_to_gpa(cur_gfn);
610 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
611 		if (kvm_is_error_hva(vmaddr))
612 			continue;
613 
614 		bitmap_zero(bitmap, _PAGE_ENTRIES);
615 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
616 		for (i = 0; i < _PAGE_ENTRIES; i++) {
617 			if (test_bit(i, bitmap))
618 				mark_page_dirty(kvm, cur_gfn + i);
619 		}
620 
621 		if (fatal_signal_pending(current))
622 			return;
623 		cond_resched();
624 	}
625 }
626 
627 /* Section: vm related */
628 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
629 
630 /*
631  * Get (and clear) the dirty memory log for a memory slot.
632  */
633 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
634 			       struct kvm_dirty_log *log)
635 {
636 	int r;
637 	unsigned long n;
638 	struct kvm_memory_slot *memslot;
639 	int is_dirty;
640 
641 	if (kvm_is_ucontrol(kvm))
642 		return -EINVAL;
643 
644 	mutex_lock(&kvm->slots_lock);
645 
646 	r = -EINVAL;
647 	if (log->slot >= KVM_USER_MEM_SLOTS)
648 		goto out;
649 
650 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
651 	if (r)
652 		goto out;
653 
654 	/* Clear the dirty log */
655 	if (is_dirty) {
656 		n = kvm_dirty_bitmap_bytes(memslot);
657 		memset(memslot->dirty_bitmap, 0, n);
658 	}
659 	r = 0;
660 out:
661 	mutex_unlock(&kvm->slots_lock);
662 	return r;
663 }
664 
665 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
666 {
667 	unsigned int i;
668 	struct kvm_vcpu *vcpu;
669 
670 	kvm_for_each_vcpu(i, vcpu, kvm) {
671 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
672 	}
673 }
674 
675 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
676 {
677 	int r;
678 
679 	if (cap->flags)
680 		return -EINVAL;
681 
682 	switch (cap->cap) {
683 	case KVM_CAP_S390_IRQCHIP:
684 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
685 		kvm->arch.use_irqchip = 1;
686 		r = 0;
687 		break;
688 	case KVM_CAP_S390_USER_SIGP:
689 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
690 		kvm->arch.user_sigp = 1;
691 		r = 0;
692 		break;
693 	case KVM_CAP_S390_VECTOR_REGISTERS:
694 		mutex_lock(&kvm->lock);
695 		if (kvm->created_vcpus) {
696 			r = -EBUSY;
697 		} else if (MACHINE_HAS_VX) {
698 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
699 			set_kvm_facility(kvm->arch.model.fac_list, 129);
700 			if (test_facility(134)) {
701 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
702 				set_kvm_facility(kvm->arch.model.fac_list, 134);
703 			}
704 			if (test_facility(135)) {
705 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
706 				set_kvm_facility(kvm->arch.model.fac_list, 135);
707 			}
708 			if (test_facility(148)) {
709 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
710 				set_kvm_facility(kvm->arch.model.fac_list, 148);
711 			}
712 			if (test_facility(152)) {
713 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
714 				set_kvm_facility(kvm->arch.model.fac_list, 152);
715 			}
716 			r = 0;
717 		} else
718 			r = -EINVAL;
719 		mutex_unlock(&kvm->lock);
720 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
721 			 r ? "(not available)" : "(success)");
722 		break;
723 	case KVM_CAP_S390_RI:
724 		r = -EINVAL;
725 		mutex_lock(&kvm->lock);
726 		if (kvm->created_vcpus) {
727 			r = -EBUSY;
728 		} else if (test_facility(64)) {
729 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
730 			set_kvm_facility(kvm->arch.model.fac_list, 64);
731 			r = 0;
732 		}
733 		mutex_unlock(&kvm->lock);
734 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
735 			 r ? "(not available)" : "(success)");
736 		break;
737 	case KVM_CAP_S390_AIS:
738 		mutex_lock(&kvm->lock);
739 		if (kvm->created_vcpus) {
740 			r = -EBUSY;
741 		} else {
742 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
743 			set_kvm_facility(kvm->arch.model.fac_list, 72);
744 			r = 0;
745 		}
746 		mutex_unlock(&kvm->lock);
747 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
748 			 r ? "(not available)" : "(success)");
749 		break;
750 	case KVM_CAP_S390_GS:
751 		r = -EINVAL;
752 		mutex_lock(&kvm->lock);
753 		if (kvm->created_vcpus) {
754 			r = -EBUSY;
755 		} else if (test_facility(133)) {
756 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
757 			set_kvm_facility(kvm->arch.model.fac_list, 133);
758 			r = 0;
759 		}
760 		mutex_unlock(&kvm->lock);
761 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
762 			 r ? "(not available)" : "(success)");
763 		break;
764 	case KVM_CAP_S390_HPAGE_1M:
765 		mutex_lock(&kvm->lock);
766 		if (kvm->created_vcpus)
767 			r = -EBUSY;
768 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
769 			r = -EINVAL;
770 		else {
771 			r = 0;
772 			mmap_write_lock(kvm->mm);
773 			kvm->mm->context.allow_gmap_hpage_1m = 1;
774 			mmap_write_unlock(kvm->mm);
775 			/*
776 			 * We might have to create fake 4k page
777 			 * tables. To avoid that the hardware works on
778 			 * stale PGSTEs, we emulate these instructions.
779 			 */
780 			kvm->arch.use_skf = 0;
781 			kvm->arch.use_pfmfi = 0;
782 		}
783 		mutex_unlock(&kvm->lock);
784 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
785 			 r ? "(not available)" : "(success)");
786 		break;
787 	case KVM_CAP_S390_USER_STSI:
788 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
789 		kvm->arch.user_stsi = 1;
790 		r = 0;
791 		break;
792 	case KVM_CAP_S390_USER_INSTR0:
793 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
794 		kvm->arch.user_instr0 = 1;
795 		icpt_operexc_on_all_vcpus(kvm);
796 		r = 0;
797 		break;
798 	default:
799 		r = -EINVAL;
800 		break;
801 	}
802 	return r;
803 }
804 
805 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
806 {
807 	int ret;
808 
809 	switch (attr->attr) {
810 	case KVM_S390_VM_MEM_LIMIT_SIZE:
811 		ret = 0;
812 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
813 			 kvm->arch.mem_limit);
814 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
815 			ret = -EFAULT;
816 		break;
817 	default:
818 		ret = -ENXIO;
819 		break;
820 	}
821 	return ret;
822 }
823 
824 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
825 {
826 	int ret;
827 	unsigned int idx;
828 	switch (attr->attr) {
829 	case KVM_S390_VM_MEM_ENABLE_CMMA:
830 		ret = -ENXIO;
831 		if (!sclp.has_cmma)
832 			break;
833 
834 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
835 		mutex_lock(&kvm->lock);
836 		if (kvm->created_vcpus)
837 			ret = -EBUSY;
838 		else if (kvm->mm->context.allow_gmap_hpage_1m)
839 			ret = -EINVAL;
840 		else {
841 			kvm->arch.use_cmma = 1;
842 			/* Not compatible with cmma. */
843 			kvm->arch.use_pfmfi = 0;
844 			ret = 0;
845 		}
846 		mutex_unlock(&kvm->lock);
847 		break;
848 	case KVM_S390_VM_MEM_CLR_CMMA:
849 		ret = -ENXIO;
850 		if (!sclp.has_cmma)
851 			break;
852 		ret = -EINVAL;
853 		if (!kvm->arch.use_cmma)
854 			break;
855 
856 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
857 		mutex_lock(&kvm->lock);
858 		idx = srcu_read_lock(&kvm->srcu);
859 		s390_reset_cmma(kvm->arch.gmap->mm);
860 		srcu_read_unlock(&kvm->srcu, idx);
861 		mutex_unlock(&kvm->lock);
862 		ret = 0;
863 		break;
864 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
865 		unsigned long new_limit;
866 
867 		if (kvm_is_ucontrol(kvm))
868 			return -EINVAL;
869 
870 		if (get_user(new_limit, (u64 __user *)attr->addr))
871 			return -EFAULT;
872 
873 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
874 		    new_limit > kvm->arch.mem_limit)
875 			return -E2BIG;
876 
877 		if (!new_limit)
878 			return -EINVAL;
879 
880 		/* gmap_create takes last usable address */
881 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
882 			new_limit -= 1;
883 
884 		ret = -EBUSY;
885 		mutex_lock(&kvm->lock);
886 		if (!kvm->created_vcpus) {
887 			/* gmap_create will round the limit up */
888 			struct gmap *new = gmap_create(current->mm, new_limit);
889 
890 			if (!new) {
891 				ret = -ENOMEM;
892 			} else {
893 				gmap_remove(kvm->arch.gmap);
894 				new->private = kvm;
895 				kvm->arch.gmap = new;
896 				ret = 0;
897 			}
898 		}
899 		mutex_unlock(&kvm->lock);
900 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
901 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
902 			 (void *) kvm->arch.gmap->asce);
903 		break;
904 	}
905 	default:
906 		ret = -ENXIO;
907 		break;
908 	}
909 	return ret;
910 }
911 
912 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
913 
914 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
915 {
916 	struct kvm_vcpu *vcpu;
917 	int i;
918 
919 	kvm_s390_vcpu_block_all(kvm);
920 
921 	kvm_for_each_vcpu(i, vcpu, kvm) {
922 		kvm_s390_vcpu_crypto_setup(vcpu);
923 		/* recreate the shadow crycb by leaving the VSIE handler */
924 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
925 	}
926 
927 	kvm_s390_vcpu_unblock_all(kvm);
928 }
929 
930 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
931 {
932 	mutex_lock(&kvm->lock);
933 	switch (attr->attr) {
934 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
935 		if (!test_kvm_facility(kvm, 76)) {
936 			mutex_unlock(&kvm->lock);
937 			return -EINVAL;
938 		}
939 		get_random_bytes(
940 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
941 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
942 		kvm->arch.crypto.aes_kw = 1;
943 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
944 		break;
945 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
946 		if (!test_kvm_facility(kvm, 76)) {
947 			mutex_unlock(&kvm->lock);
948 			return -EINVAL;
949 		}
950 		get_random_bytes(
951 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
952 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
953 		kvm->arch.crypto.dea_kw = 1;
954 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
955 		break;
956 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
957 		if (!test_kvm_facility(kvm, 76)) {
958 			mutex_unlock(&kvm->lock);
959 			return -EINVAL;
960 		}
961 		kvm->arch.crypto.aes_kw = 0;
962 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
963 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
964 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
965 		break;
966 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
967 		if (!test_kvm_facility(kvm, 76)) {
968 			mutex_unlock(&kvm->lock);
969 			return -EINVAL;
970 		}
971 		kvm->arch.crypto.dea_kw = 0;
972 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
973 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
974 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
975 		break;
976 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
977 		if (!ap_instructions_available()) {
978 			mutex_unlock(&kvm->lock);
979 			return -EOPNOTSUPP;
980 		}
981 		kvm->arch.crypto.apie = 1;
982 		break;
983 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
984 		if (!ap_instructions_available()) {
985 			mutex_unlock(&kvm->lock);
986 			return -EOPNOTSUPP;
987 		}
988 		kvm->arch.crypto.apie = 0;
989 		break;
990 	default:
991 		mutex_unlock(&kvm->lock);
992 		return -ENXIO;
993 	}
994 
995 	kvm_s390_vcpu_crypto_reset_all(kvm);
996 	mutex_unlock(&kvm->lock);
997 	return 0;
998 }
999 
1000 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1001 {
1002 	int cx;
1003 	struct kvm_vcpu *vcpu;
1004 
1005 	kvm_for_each_vcpu(cx, vcpu, kvm)
1006 		kvm_s390_sync_request(req, vcpu);
1007 }
1008 
1009 /*
1010  * Must be called with kvm->srcu held to avoid races on memslots, and with
1011  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1012  */
1013 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1014 {
1015 	struct kvm_memory_slot *ms;
1016 	struct kvm_memslots *slots;
1017 	unsigned long ram_pages = 0;
1018 	int slotnr;
1019 
1020 	/* migration mode already enabled */
1021 	if (kvm->arch.migration_mode)
1022 		return 0;
1023 	slots = kvm_memslots(kvm);
1024 	if (!slots || !slots->used_slots)
1025 		return -EINVAL;
1026 
1027 	if (!kvm->arch.use_cmma) {
1028 		kvm->arch.migration_mode = 1;
1029 		return 0;
1030 	}
1031 	/* mark all the pages in active slots as dirty */
1032 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1033 		ms = slots->memslots + slotnr;
1034 		if (!ms->dirty_bitmap)
1035 			return -EINVAL;
1036 		/*
1037 		 * The second half of the bitmap is only used on x86,
1038 		 * and would be wasted otherwise, so we put it to good
1039 		 * use here to keep track of the state of the storage
1040 		 * attributes.
1041 		 */
1042 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1043 		ram_pages += ms->npages;
1044 	}
1045 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1046 	kvm->arch.migration_mode = 1;
1047 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1048 	return 0;
1049 }
1050 
1051 /*
1052  * Must be called with kvm->slots_lock to avoid races with ourselves and
1053  * kvm_s390_vm_start_migration.
1054  */
1055 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1056 {
1057 	/* migration mode already disabled */
1058 	if (!kvm->arch.migration_mode)
1059 		return 0;
1060 	kvm->arch.migration_mode = 0;
1061 	if (kvm->arch.use_cmma)
1062 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1063 	return 0;
1064 }
1065 
1066 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1067 				     struct kvm_device_attr *attr)
1068 {
1069 	int res = -ENXIO;
1070 
1071 	mutex_lock(&kvm->slots_lock);
1072 	switch (attr->attr) {
1073 	case KVM_S390_VM_MIGRATION_START:
1074 		res = kvm_s390_vm_start_migration(kvm);
1075 		break;
1076 	case KVM_S390_VM_MIGRATION_STOP:
1077 		res = kvm_s390_vm_stop_migration(kvm);
1078 		break;
1079 	default:
1080 		break;
1081 	}
1082 	mutex_unlock(&kvm->slots_lock);
1083 
1084 	return res;
1085 }
1086 
1087 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1088 				     struct kvm_device_attr *attr)
1089 {
1090 	u64 mig = kvm->arch.migration_mode;
1091 
1092 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1093 		return -ENXIO;
1094 
1095 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1096 		return -EFAULT;
1097 	return 0;
1098 }
1099 
1100 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1101 {
1102 	struct kvm_s390_vm_tod_clock gtod;
1103 
1104 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1105 		return -EFAULT;
1106 
1107 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1108 		return -EINVAL;
1109 	kvm_s390_set_tod_clock(kvm, &gtod);
1110 
1111 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1112 		gtod.epoch_idx, gtod.tod);
1113 
1114 	return 0;
1115 }
1116 
1117 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1118 {
1119 	u8 gtod_high;
1120 
1121 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1122 					   sizeof(gtod_high)))
1123 		return -EFAULT;
1124 
1125 	if (gtod_high != 0)
1126 		return -EINVAL;
1127 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1128 
1129 	return 0;
1130 }
1131 
1132 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1133 {
1134 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1135 
1136 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1137 			   sizeof(gtod.tod)))
1138 		return -EFAULT;
1139 
1140 	kvm_s390_set_tod_clock(kvm, &gtod);
1141 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1142 	return 0;
1143 }
1144 
1145 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1146 {
1147 	int ret;
1148 
1149 	if (attr->flags)
1150 		return -EINVAL;
1151 
1152 	switch (attr->attr) {
1153 	case KVM_S390_VM_TOD_EXT:
1154 		ret = kvm_s390_set_tod_ext(kvm, attr);
1155 		break;
1156 	case KVM_S390_VM_TOD_HIGH:
1157 		ret = kvm_s390_set_tod_high(kvm, attr);
1158 		break;
1159 	case KVM_S390_VM_TOD_LOW:
1160 		ret = kvm_s390_set_tod_low(kvm, attr);
1161 		break;
1162 	default:
1163 		ret = -ENXIO;
1164 		break;
1165 	}
1166 	return ret;
1167 }
1168 
1169 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1170 				   struct kvm_s390_vm_tod_clock *gtod)
1171 {
1172 	union tod_clock clk;
1173 
1174 	preempt_disable();
1175 
1176 	store_tod_clock_ext(&clk);
1177 
1178 	gtod->tod = clk.tod + kvm->arch.epoch;
1179 	gtod->epoch_idx = 0;
1180 	if (test_kvm_facility(kvm, 139)) {
1181 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1182 		if (gtod->tod < clk.tod)
1183 			gtod->epoch_idx += 1;
1184 	}
1185 
1186 	preempt_enable();
1187 }
1188 
1189 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1190 {
1191 	struct kvm_s390_vm_tod_clock gtod;
1192 
1193 	memset(&gtod, 0, sizeof(gtod));
1194 	kvm_s390_get_tod_clock(kvm, &gtod);
1195 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1196 		return -EFAULT;
1197 
1198 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1199 		gtod.epoch_idx, gtod.tod);
1200 	return 0;
1201 }
1202 
1203 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1204 {
1205 	u8 gtod_high = 0;
1206 
1207 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1208 					 sizeof(gtod_high)))
1209 		return -EFAULT;
1210 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1211 
1212 	return 0;
1213 }
1214 
1215 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1216 {
1217 	u64 gtod;
1218 
1219 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1220 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1221 		return -EFAULT;
1222 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1223 
1224 	return 0;
1225 }
1226 
1227 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1228 {
1229 	int ret;
1230 
1231 	if (attr->flags)
1232 		return -EINVAL;
1233 
1234 	switch (attr->attr) {
1235 	case KVM_S390_VM_TOD_EXT:
1236 		ret = kvm_s390_get_tod_ext(kvm, attr);
1237 		break;
1238 	case KVM_S390_VM_TOD_HIGH:
1239 		ret = kvm_s390_get_tod_high(kvm, attr);
1240 		break;
1241 	case KVM_S390_VM_TOD_LOW:
1242 		ret = kvm_s390_get_tod_low(kvm, attr);
1243 		break;
1244 	default:
1245 		ret = -ENXIO;
1246 		break;
1247 	}
1248 	return ret;
1249 }
1250 
1251 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1252 {
1253 	struct kvm_s390_vm_cpu_processor *proc;
1254 	u16 lowest_ibc, unblocked_ibc;
1255 	int ret = 0;
1256 
1257 	mutex_lock(&kvm->lock);
1258 	if (kvm->created_vcpus) {
1259 		ret = -EBUSY;
1260 		goto out;
1261 	}
1262 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1263 	if (!proc) {
1264 		ret = -ENOMEM;
1265 		goto out;
1266 	}
1267 	if (!copy_from_user(proc, (void __user *)attr->addr,
1268 			    sizeof(*proc))) {
1269 		kvm->arch.model.cpuid = proc->cpuid;
1270 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1271 		unblocked_ibc = sclp.ibc & 0xfff;
1272 		if (lowest_ibc && proc->ibc) {
1273 			if (proc->ibc > unblocked_ibc)
1274 				kvm->arch.model.ibc = unblocked_ibc;
1275 			else if (proc->ibc < lowest_ibc)
1276 				kvm->arch.model.ibc = lowest_ibc;
1277 			else
1278 				kvm->arch.model.ibc = proc->ibc;
1279 		}
1280 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1281 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1282 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1283 			 kvm->arch.model.ibc,
1284 			 kvm->arch.model.cpuid);
1285 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1286 			 kvm->arch.model.fac_list[0],
1287 			 kvm->arch.model.fac_list[1],
1288 			 kvm->arch.model.fac_list[2]);
1289 	} else
1290 		ret = -EFAULT;
1291 	kfree(proc);
1292 out:
1293 	mutex_unlock(&kvm->lock);
1294 	return ret;
1295 }
1296 
1297 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1298 				       struct kvm_device_attr *attr)
1299 {
1300 	struct kvm_s390_vm_cpu_feat data;
1301 
1302 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1303 		return -EFAULT;
1304 	if (!bitmap_subset((unsigned long *) data.feat,
1305 			   kvm_s390_available_cpu_feat,
1306 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1307 		return -EINVAL;
1308 
1309 	mutex_lock(&kvm->lock);
1310 	if (kvm->created_vcpus) {
1311 		mutex_unlock(&kvm->lock);
1312 		return -EBUSY;
1313 	}
1314 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1315 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1316 	mutex_unlock(&kvm->lock);
1317 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1318 			 data.feat[0],
1319 			 data.feat[1],
1320 			 data.feat[2]);
1321 	return 0;
1322 }
1323 
1324 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1325 					  struct kvm_device_attr *attr)
1326 {
1327 	mutex_lock(&kvm->lock);
1328 	if (kvm->created_vcpus) {
1329 		mutex_unlock(&kvm->lock);
1330 		return -EBUSY;
1331 	}
1332 
1333 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1334 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1335 		mutex_unlock(&kvm->lock);
1336 		return -EFAULT;
1337 	}
1338 	mutex_unlock(&kvm->lock);
1339 
1340 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1341 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1342 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1343 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1344 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1345 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1346 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1347 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1348 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1349 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1350 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1351 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1352 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1353 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1354 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1355 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1356 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1357 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1358 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1359 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1360 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1361 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1363 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1366 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1369 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1372 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1375 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1378 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1381 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1382 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1383 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1384 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1385 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1386 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1387 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1388 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1389 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1390 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1391 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1392 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1393 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1394 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1395 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1396 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1397 
1398 	return 0;
1399 }
1400 
1401 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1402 {
1403 	int ret = -ENXIO;
1404 
1405 	switch (attr->attr) {
1406 	case KVM_S390_VM_CPU_PROCESSOR:
1407 		ret = kvm_s390_set_processor(kvm, attr);
1408 		break;
1409 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1410 		ret = kvm_s390_set_processor_feat(kvm, attr);
1411 		break;
1412 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1413 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1414 		break;
1415 	}
1416 	return ret;
1417 }
1418 
1419 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1420 {
1421 	struct kvm_s390_vm_cpu_processor *proc;
1422 	int ret = 0;
1423 
1424 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1425 	if (!proc) {
1426 		ret = -ENOMEM;
1427 		goto out;
1428 	}
1429 	proc->cpuid = kvm->arch.model.cpuid;
1430 	proc->ibc = kvm->arch.model.ibc;
1431 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1432 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1433 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1434 		 kvm->arch.model.ibc,
1435 		 kvm->arch.model.cpuid);
1436 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1437 		 kvm->arch.model.fac_list[0],
1438 		 kvm->arch.model.fac_list[1],
1439 		 kvm->arch.model.fac_list[2]);
1440 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1441 		ret = -EFAULT;
1442 	kfree(proc);
1443 out:
1444 	return ret;
1445 }
1446 
1447 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1448 {
1449 	struct kvm_s390_vm_cpu_machine *mach;
1450 	int ret = 0;
1451 
1452 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1453 	if (!mach) {
1454 		ret = -ENOMEM;
1455 		goto out;
1456 	}
1457 	get_cpu_id((struct cpuid *) &mach->cpuid);
1458 	mach->ibc = sclp.ibc;
1459 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1460 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1461 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1462 	       sizeof(S390_lowcore.stfle_fac_list));
1463 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1464 		 kvm->arch.model.ibc,
1465 		 kvm->arch.model.cpuid);
1466 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1467 		 mach->fac_mask[0],
1468 		 mach->fac_mask[1],
1469 		 mach->fac_mask[2]);
1470 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1471 		 mach->fac_list[0],
1472 		 mach->fac_list[1],
1473 		 mach->fac_list[2]);
1474 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1475 		ret = -EFAULT;
1476 	kfree(mach);
1477 out:
1478 	return ret;
1479 }
1480 
1481 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1482 				       struct kvm_device_attr *attr)
1483 {
1484 	struct kvm_s390_vm_cpu_feat data;
1485 
1486 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1487 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1488 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1489 		return -EFAULT;
1490 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1491 			 data.feat[0],
1492 			 data.feat[1],
1493 			 data.feat[2]);
1494 	return 0;
1495 }
1496 
1497 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1498 				     struct kvm_device_attr *attr)
1499 {
1500 	struct kvm_s390_vm_cpu_feat data;
1501 
1502 	bitmap_copy((unsigned long *) data.feat,
1503 		    kvm_s390_available_cpu_feat,
1504 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1505 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1506 		return -EFAULT;
1507 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1508 			 data.feat[0],
1509 			 data.feat[1],
1510 			 data.feat[2]);
1511 	return 0;
1512 }
1513 
1514 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1515 					  struct kvm_device_attr *attr)
1516 {
1517 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1518 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1519 		return -EFAULT;
1520 
1521 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1522 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1523 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1524 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1525 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1526 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1527 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1528 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1529 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1530 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1531 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1532 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1533 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1534 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1535 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1536 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1537 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1538 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1539 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1540 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1541 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1542 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1544 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1547 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1550 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1553 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1556 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1559 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1562 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1563 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1564 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1565 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1566 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1567 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1568 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1569 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1570 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1571 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1572 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1573 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1574 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1575 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1576 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1577 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1578 
1579 	return 0;
1580 }
1581 
1582 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1583 					struct kvm_device_attr *attr)
1584 {
1585 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1586 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1587 		return -EFAULT;
1588 
1589 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1590 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1591 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1592 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1593 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1594 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1595 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1596 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1597 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1598 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1599 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1600 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1601 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1602 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1603 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1604 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1605 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1606 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1607 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1608 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1609 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1610 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1612 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1615 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1618 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1621 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1624 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1627 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1630 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1631 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1632 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1633 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1634 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1635 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1636 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1637 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1638 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1639 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1640 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1641 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1642 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1643 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1644 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1645 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1646 
1647 	return 0;
1648 }
1649 
1650 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1651 {
1652 	int ret = -ENXIO;
1653 
1654 	switch (attr->attr) {
1655 	case KVM_S390_VM_CPU_PROCESSOR:
1656 		ret = kvm_s390_get_processor(kvm, attr);
1657 		break;
1658 	case KVM_S390_VM_CPU_MACHINE:
1659 		ret = kvm_s390_get_machine(kvm, attr);
1660 		break;
1661 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1662 		ret = kvm_s390_get_processor_feat(kvm, attr);
1663 		break;
1664 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1665 		ret = kvm_s390_get_machine_feat(kvm, attr);
1666 		break;
1667 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1668 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1669 		break;
1670 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1671 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1672 		break;
1673 	}
1674 	return ret;
1675 }
1676 
1677 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1678 {
1679 	int ret;
1680 
1681 	switch (attr->group) {
1682 	case KVM_S390_VM_MEM_CTRL:
1683 		ret = kvm_s390_set_mem_control(kvm, attr);
1684 		break;
1685 	case KVM_S390_VM_TOD:
1686 		ret = kvm_s390_set_tod(kvm, attr);
1687 		break;
1688 	case KVM_S390_VM_CPU_MODEL:
1689 		ret = kvm_s390_set_cpu_model(kvm, attr);
1690 		break;
1691 	case KVM_S390_VM_CRYPTO:
1692 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1693 		break;
1694 	case KVM_S390_VM_MIGRATION:
1695 		ret = kvm_s390_vm_set_migration(kvm, attr);
1696 		break;
1697 	default:
1698 		ret = -ENXIO;
1699 		break;
1700 	}
1701 
1702 	return ret;
1703 }
1704 
1705 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1706 {
1707 	int ret;
1708 
1709 	switch (attr->group) {
1710 	case KVM_S390_VM_MEM_CTRL:
1711 		ret = kvm_s390_get_mem_control(kvm, attr);
1712 		break;
1713 	case KVM_S390_VM_TOD:
1714 		ret = kvm_s390_get_tod(kvm, attr);
1715 		break;
1716 	case KVM_S390_VM_CPU_MODEL:
1717 		ret = kvm_s390_get_cpu_model(kvm, attr);
1718 		break;
1719 	case KVM_S390_VM_MIGRATION:
1720 		ret = kvm_s390_vm_get_migration(kvm, attr);
1721 		break;
1722 	default:
1723 		ret = -ENXIO;
1724 		break;
1725 	}
1726 
1727 	return ret;
1728 }
1729 
1730 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1731 {
1732 	int ret;
1733 
1734 	switch (attr->group) {
1735 	case KVM_S390_VM_MEM_CTRL:
1736 		switch (attr->attr) {
1737 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1738 		case KVM_S390_VM_MEM_CLR_CMMA:
1739 			ret = sclp.has_cmma ? 0 : -ENXIO;
1740 			break;
1741 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1742 			ret = 0;
1743 			break;
1744 		default:
1745 			ret = -ENXIO;
1746 			break;
1747 		}
1748 		break;
1749 	case KVM_S390_VM_TOD:
1750 		switch (attr->attr) {
1751 		case KVM_S390_VM_TOD_LOW:
1752 		case KVM_S390_VM_TOD_HIGH:
1753 			ret = 0;
1754 			break;
1755 		default:
1756 			ret = -ENXIO;
1757 			break;
1758 		}
1759 		break;
1760 	case KVM_S390_VM_CPU_MODEL:
1761 		switch (attr->attr) {
1762 		case KVM_S390_VM_CPU_PROCESSOR:
1763 		case KVM_S390_VM_CPU_MACHINE:
1764 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1765 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1766 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1767 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1768 			ret = 0;
1769 			break;
1770 		default:
1771 			ret = -ENXIO;
1772 			break;
1773 		}
1774 		break;
1775 	case KVM_S390_VM_CRYPTO:
1776 		switch (attr->attr) {
1777 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1778 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1779 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1780 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1781 			ret = 0;
1782 			break;
1783 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1784 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1785 			ret = ap_instructions_available() ? 0 : -ENXIO;
1786 			break;
1787 		default:
1788 			ret = -ENXIO;
1789 			break;
1790 		}
1791 		break;
1792 	case KVM_S390_VM_MIGRATION:
1793 		ret = 0;
1794 		break;
1795 	default:
1796 		ret = -ENXIO;
1797 		break;
1798 	}
1799 
1800 	return ret;
1801 }
1802 
1803 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1804 {
1805 	uint8_t *keys;
1806 	uint64_t hva;
1807 	int srcu_idx, i, r = 0;
1808 
1809 	if (args->flags != 0)
1810 		return -EINVAL;
1811 
1812 	/* Is this guest using storage keys? */
1813 	if (!mm_uses_skeys(current->mm))
1814 		return KVM_S390_GET_SKEYS_NONE;
1815 
1816 	/* Enforce sane limit on memory allocation */
1817 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1818 		return -EINVAL;
1819 
1820 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1821 	if (!keys)
1822 		return -ENOMEM;
1823 
1824 	mmap_read_lock(current->mm);
1825 	srcu_idx = srcu_read_lock(&kvm->srcu);
1826 	for (i = 0; i < args->count; i++) {
1827 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1828 		if (kvm_is_error_hva(hva)) {
1829 			r = -EFAULT;
1830 			break;
1831 		}
1832 
1833 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1834 		if (r)
1835 			break;
1836 	}
1837 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1838 	mmap_read_unlock(current->mm);
1839 
1840 	if (!r) {
1841 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1842 				 sizeof(uint8_t) * args->count);
1843 		if (r)
1844 			r = -EFAULT;
1845 	}
1846 
1847 	kvfree(keys);
1848 	return r;
1849 }
1850 
1851 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1852 {
1853 	uint8_t *keys;
1854 	uint64_t hva;
1855 	int srcu_idx, i, r = 0;
1856 	bool unlocked;
1857 
1858 	if (args->flags != 0)
1859 		return -EINVAL;
1860 
1861 	/* Enforce sane limit on memory allocation */
1862 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1863 		return -EINVAL;
1864 
1865 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1866 	if (!keys)
1867 		return -ENOMEM;
1868 
1869 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1870 			   sizeof(uint8_t) * args->count);
1871 	if (r) {
1872 		r = -EFAULT;
1873 		goto out;
1874 	}
1875 
1876 	/* Enable storage key handling for the guest */
1877 	r = s390_enable_skey();
1878 	if (r)
1879 		goto out;
1880 
1881 	i = 0;
1882 	mmap_read_lock(current->mm);
1883 	srcu_idx = srcu_read_lock(&kvm->srcu);
1884         while (i < args->count) {
1885 		unlocked = false;
1886 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1887 		if (kvm_is_error_hva(hva)) {
1888 			r = -EFAULT;
1889 			break;
1890 		}
1891 
1892 		/* Lowest order bit is reserved */
1893 		if (keys[i] & 0x01) {
1894 			r = -EINVAL;
1895 			break;
1896 		}
1897 
1898 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1899 		if (r) {
1900 			r = fixup_user_fault(current->mm, hva,
1901 					     FAULT_FLAG_WRITE, &unlocked);
1902 			if (r)
1903 				break;
1904 		}
1905 		if (!r)
1906 			i++;
1907 	}
1908 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1909 	mmap_read_unlock(current->mm);
1910 out:
1911 	kvfree(keys);
1912 	return r;
1913 }
1914 
1915 /*
1916  * Base address and length must be sent at the start of each block, therefore
1917  * it's cheaper to send some clean data, as long as it's less than the size of
1918  * two longs.
1919  */
1920 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1921 /* for consistency */
1922 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1923 
1924 /*
1925  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1926  * address falls in a hole. In that case the index of one of the memslots
1927  * bordering the hole is returned.
1928  */
1929 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1930 {
1931 	int start = 0, end = slots->used_slots;
1932 	int slot = atomic_read(&slots->lru_slot);
1933 	struct kvm_memory_slot *memslots = slots->memslots;
1934 
1935 	if (gfn >= memslots[slot].base_gfn &&
1936 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1937 		return slot;
1938 
1939 	while (start < end) {
1940 		slot = start + (end - start) / 2;
1941 
1942 		if (gfn >= memslots[slot].base_gfn)
1943 			end = slot;
1944 		else
1945 			start = slot + 1;
1946 	}
1947 
1948 	if (start >= slots->used_slots)
1949 		return slots->used_slots - 1;
1950 
1951 	if (gfn >= memslots[start].base_gfn &&
1952 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1953 		atomic_set(&slots->lru_slot, start);
1954 	}
1955 
1956 	return start;
1957 }
1958 
1959 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1960 			      u8 *res, unsigned long bufsize)
1961 {
1962 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1963 
1964 	args->count = 0;
1965 	while (args->count < bufsize) {
1966 		hva = gfn_to_hva(kvm, cur_gfn);
1967 		/*
1968 		 * We return an error if the first value was invalid, but we
1969 		 * return successfully if at least one value was copied.
1970 		 */
1971 		if (kvm_is_error_hva(hva))
1972 			return args->count ? 0 : -EFAULT;
1973 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1974 			pgstev = 0;
1975 		res[args->count++] = (pgstev >> 24) & 0x43;
1976 		cur_gfn++;
1977 	}
1978 
1979 	return 0;
1980 }
1981 
1982 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1983 					      unsigned long cur_gfn)
1984 {
1985 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1986 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1987 	unsigned long ofs = cur_gfn - ms->base_gfn;
1988 
1989 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1990 		slotidx--;
1991 		/* If we are above the highest slot, wrap around */
1992 		if (slotidx < 0)
1993 			slotidx = slots->used_slots - 1;
1994 
1995 		ms = slots->memslots + slotidx;
1996 		ofs = 0;
1997 	}
1998 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1999 	while ((slotidx > 0) && (ofs >= ms->npages)) {
2000 		slotidx--;
2001 		ms = slots->memslots + slotidx;
2002 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2003 	}
2004 	return ms->base_gfn + ofs;
2005 }
2006 
2007 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2008 			     u8 *res, unsigned long bufsize)
2009 {
2010 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2011 	struct kvm_memslots *slots = kvm_memslots(kvm);
2012 	struct kvm_memory_slot *ms;
2013 
2014 	if (unlikely(!slots->used_slots))
2015 		return 0;
2016 
2017 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2018 	ms = gfn_to_memslot(kvm, cur_gfn);
2019 	args->count = 0;
2020 	args->start_gfn = cur_gfn;
2021 	if (!ms)
2022 		return 0;
2023 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2024 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2025 
2026 	while (args->count < bufsize) {
2027 		hva = gfn_to_hva(kvm, cur_gfn);
2028 		if (kvm_is_error_hva(hva))
2029 			return 0;
2030 		/* Decrement only if we actually flipped the bit to 0 */
2031 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2032 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2033 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2034 			pgstev = 0;
2035 		/* Save the value */
2036 		res[args->count++] = (pgstev >> 24) & 0x43;
2037 		/* If the next bit is too far away, stop. */
2038 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2039 			return 0;
2040 		/* If we reached the previous "next", find the next one */
2041 		if (cur_gfn == next_gfn)
2042 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2043 		/* Reached the end of memory or of the buffer, stop */
2044 		if ((next_gfn >= mem_end) ||
2045 		    (next_gfn - args->start_gfn >= bufsize))
2046 			return 0;
2047 		cur_gfn++;
2048 		/* Reached the end of the current memslot, take the next one. */
2049 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2050 			ms = gfn_to_memslot(kvm, cur_gfn);
2051 			if (!ms)
2052 				return 0;
2053 		}
2054 	}
2055 	return 0;
2056 }
2057 
2058 /*
2059  * This function searches for the next page with dirty CMMA attributes, and
2060  * saves the attributes in the buffer up to either the end of the buffer or
2061  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2062  * no trailing clean bytes are saved.
2063  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2064  * output buffer will indicate 0 as length.
2065  */
2066 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2067 				  struct kvm_s390_cmma_log *args)
2068 {
2069 	unsigned long bufsize;
2070 	int srcu_idx, peek, ret;
2071 	u8 *values;
2072 
2073 	if (!kvm->arch.use_cmma)
2074 		return -ENXIO;
2075 	/* Invalid/unsupported flags were specified */
2076 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2077 		return -EINVAL;
2078 	/* Migration mode query, and we are not doing a migration */
2079 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2080 	if (!peek && !kvm->arch.migration_mode)
2081 		return -EINVAL;
2082 	/* CMMA is disabled or was not used, or the buffer has length zero */
2083 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2084 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2085 		memset(args, 0, sizeof(*args));
2086 		return 0;
2087 	}
2088 	/* We are not peeking, and there are no dirty pages */
2089 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2090 		memset(args, 0, sizeof(*args));
2091 		return 0;
2092 	}
2093 
2094 	values = vmalloc(bufsize);
2095 	if (!values)
2096 		return -ENOMEM;
2097 
2098 	mmap_read_lock(kvm->mm);
2099 	srcu_idx = srcu_read_lock(&kvm->srcu);
2100 	if (peek)
2101 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2102 	else
2103 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2104 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2105 	mmap_read_unlock(kvm->mm);
2106 
2107 	if (kvm->arch.migration_mode)
2108 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2109 	else
2110 		args->remaining = 0;
2111 
2112 	if (copy_to_user((void __user *)args->values, values, args->count))
2113 		ret = -EFAULT;
2114 
2115 	vfree(values);
2116 	return ret;
2117 }
2118 
2119 /*
2120  * This function sets the CMMA attributes for the given pages. If the input
2121  * buffer has zero length, no action is taken, otherwise the attributes are
2122  * set and the mm->context.uses_cmm flag is set.
2123  */
2124 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2125 				  const struct kvm_s390_cmma_log *args)
2126 {
2127 	unsigned long hva, mask, pgstev, i;
2128 	uint8_t *bits;
2129 	int srcu_idx, r = 0;
2130 
2131 	mask = args->mask;
2132 
2133 	if (!kvm->arch.use_cmma)
2134 		return -ENXIO;
2135 	/* invalid/unsupported flags */
2136 	if (args->flags != 0)
2137 		return -EINVAL;
2138 	/* Enforce sane limit on memory allocation */
2139 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2140 		return -EINVAL;
2141 	/* Nothing to do */
2142 	if (args->count == 0)
2143 		return 0;
2144 
2145 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2146 	if (!bits)
2147 		return -ENOMEM;
2148 
2149 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2150 	if (r) {
2151 		r = -EFAULT;
2152 		goto out;
2153 	}
2154 
2155 	mmap_read_lock(kvm->mm);
2156 	srcu_idx = srcu_read_lock(&kvm->srcu);
2157 	for (i = 0; i < args->count; i++) {
2158 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2159 		if (kvm_is_error_hva(hva)) {
2160 			r = -EFAULT;
2161 			break;
2162 		}
2163 
2164 		pgstev = bits[i];
2165 		pgstev = pgstev << 24;
2166 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2167 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2168 	}
2169 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2170 	mmap_read_unlock(kvm->mm);
2171 
2172 	if (!kvm->mm->context.uses_cmm) {
2173 		mmap_write_lock(kvm->mm);
2174 		kvm->mm->context.uses_cmm = 1;
2175 		mmap_write_unlock(kvm->mm);
2176 	}
2177 out:
2178 	vfree(bits);
2179 	return r;
2180 }
2181 
2182 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2183 {
2184 	struct kvm_vcpu *vcpu;
2185 	u16 rc, rrc;
2186 	int ret = 0;
2187 	int i;
2188 
2189 	/*
2190 	 * We ignore failures and try to destroy as many CPUs as possible.
2191 	 * At the same time we must not free the assigned resources when
2192 	 * this fails, as the ultravisor has still access to that memory.
2193 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2194 	 * behind.
2195 	 * We want to return the first failure rc and rrc, though.
2196 	 */
2197 	kvm_for_each_vcpu(i, vcpu, kvm) {
2198 		mutex_lock(&vcpu->mutex);
2199 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2200 			*rcp = rc;
2201 			*rrcp = rrc;
2202 			ret = -EIO;
2203 		}
2204 		mutex_unlock(&vcpu->mutex);
2205 	}
2206 	return ret;
2207 }
2208 
2209 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2210 {
2211 	int i, r = 0;
2212 	u16 dummy;
2213 
2214 	struct kvm_vcpu *vcpu;
2215 
2216 	kvm_for_each_vcpu(i, vcpu, kvm) {
2217 		mutex_lock(&vcpu->mutex);
2218 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2219 		mutex_unlock(&vcpu->mutex);
2220 		if (r)
2221 			break;
2222 	}
2223 	if (r)
2224 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2225 	return r;
2226 }
2227 
2228 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2229 {
2230 	int r = 0;
2231 	u16 dummy;
2232 	void __user *argp = (void __user *)cmd->data;
2233 
2234 	switch (cmd->cmd) {
2235 	case KVM_PV_ENABLE: {
2236 		r = -EINVAL;
2237 		if (kvm_s390_pv_is_protected(kvm))
2238 			break;
2239 
2240 		/*
2241 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2242 		 *  esca, we need no cleanup in the error cases below
2243 		 */
2244 		r = sca_switch_to_extended(kvm);
2245 		if (r)
2246 			break;
2247 
2248 		mmap_write_lock(current->mm);
2249 		r = gmap_mark_unmergeable();
2250 		mmap_write_unlock(current->mm);
2251 		if (r)
2252 			break;
2253 
2254 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2255 		if (r)
2256 			break;
2257 
2258 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2259 		if (r)
2260 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2261 
2262 		/* we need to block service interrupts from now on */
2263 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2264 		break;
2265 	}
2266 	case KVM_PV_DISABLE: {
2267 		r = -EINVAL;
2268 		if (!kvm_s390_pv_is_protected(kvm))
2269 			break;
2270 
2271 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2272 		/*
2273 		 * If a CPU could not be destroyed, destroy VM will also fail.
2274 		 * There is no point in trying to destroy it. Instead return
2275 		 * the rc and rrc from the first CPU that failed destroying.
2276 		 */
2277 		if (r)
2278 			break;
2279 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2280 
2281 		/* no need to block service interrupts any more */
2282 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2283 		break;
2284 	}
2285 	case KVM_PV_SET_SEC_PARMS: {
2286 		struct kvm_s390_pv_sec_parm parms = {};
2287 		void *hdr;
2288 
2289 		r = -EINVAL;
2290 		if (!kvm_s390_pv_is_protected(kvm))
2291 			break;
2292 
2293 		r = -EFAULT;
2294 		if (copy_from_user(&parms, argp, sizeof(parms)))
2295 			break;
2296 
2297 		/* Currently restricted to 8KB */
2298 		r = -EINVAL;
2299 		if (parms.length > PAGE_SIZE * 2)
2300 			break;
2301 
2302 		r = -ENOMEM;
2303 		hdr = vmalloc(parms.length);
2304 		if (!hdr)
2305 			break;
2306 
2307 		r = -EFAULT;
2308 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2309 				    parms.length))
2310 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2311 						      &cmd->rc, &cmd->rrc);
2312 
2313 		vfree(hdr);
2314 		break;
2315 	}
2316 	case KVM_PV_UNPACK: {
2317 		struct kvm_s390_pv_unp unp = {};
2318 
2319 		r = -EINVAL;
2320 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2321 			break;
2322 
2323 		r = -EFAULT;
2324 		if (copy_from_user(&unp, argp, sizeof(unp)))
2325 			break;
2326 
2327 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2328 				       &cmd->rc, &cmd->rrc);
2329 		break;
2330 	}
2331 	case KVM_PV_VERIFY: {
2332 		r = -EINVAL;
2333 		if (!kvm_s390_pv_is_protected(kvm))
2334 			break;
2335 
2336 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2337 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2338 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2339 			     cmd->rrc);
2340 		break;
2341 	}
2342 	case KVM_PV_PREP_RESET: {
2343 		r = -EINVAL;
2344 		if (!kvm_s390_pv_is_protected(kvm))
2345 			break;
2346 
2347 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2348 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2349 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2350 			     cmd->rc, cmd->rrc);
2351 		break;
2352 	}
2353 	case KVM_PV_UNSHARE_ALL: {
2354 		r = -EINVAL;
2355 		if (!kvm_s390_pv_is_protected(kvm))
2356 			break;
2357 
2358 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2359 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2360 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2361 			     cmd->rc, cmd->rrc);
2362 		break;
2363 	}
2364 	default:
2365 		r = -ENOTTY;
2366 	}
2367 	return r;
2368 }
2369 
2370 long kvm_arch_vm_ioctl(struct file *filp,
2371 		       unsigned int ioctl, unsigned long arg)
2372 {
2373 	struct kvm *kvm = filp->private_data;
2374 	void __user *argp = (void __user *)arg;
2375 	struct kvm_device_attr attr;
2376 	int r;
2377 
2378 	switch (ioctl) {
2379 	case KVM_S390_INTERRUPT: {
2380 		struct kvm_s390_interrupt s390int;
2381 
2382 		r = -EFAULT;
2383 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2384 			break;
2385 		r = kvm_s390_inject_vm(kvm, &s390int);
2386 		break;
2387 	}
2388 	case KVM_CREATE_IRQCHIP: {
2389 		struct kvm_irq_routing_entry routing;
2390 
2391 		r = -EINVAL;
2392 		if (kvm->arch.use_irqchip) {
2393 			/* Set up dummy routing. */
2394 			memset(&routing, 0, sizeof(routing));
2395 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2396 		}
2397 		break;
2398 	}
2399 	case KVM_SET_DEVICE_ATTR: {
2400 		r = -EFAULT;
2401 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2402 			break;
2403 		r = kvm_s390_vm_set_attr(kvm, &attr);
2404 		break;
2405 	}
2406 	case KVM_GET_DEVICE_ATTR: {
2407 		r = -EFAULT;
2408 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2409 			break;
2410 		r = kvm_s390_vm_get_attr(kvm, &attr);
2411 		break;
2412 	}
2413 	case KVM_HAS_DEVICE_ATTR: {
2414 		r = -EFAULT;
2415 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2416 			break;
2417 		r = kvm_s390_vm_has_attr(kvm, &attr);
2418 		break;
2419 	}
2420 	case KVM_S390_GET_SKEYS: {
2421 		struct kvm_s390_skeys args;
2422 
2423 		r = -EFAULT;
2424 		if (copy_from_user(&args, argp,
2425 				   sizeof(struct kvm_s390_skeys)))
2426 			break;
2427 		r = kvm_s390_get_skeys(kvm, &args);
2428 		break;
2429 	}
2430 	case KVM_S390_SET_SKEYS: {
2431 		struct kvm_s390_skeys args;
2432 
2433 		r = -EFAULT;
2434 		if (copy_from_user(&args, argp,
2435 				   sizeof(struct kvm_s390_skeys)))
2436 			break;
2437 		r = kvm_s390_set_skeys(kvm, &args);
2438 		break;
2439 	}
2440 	case KVM_S390_GET_CMMA_BITS: {
2441 		struct kvm_s390_cmma_log args;
2442 
2443 		r = -EFAULT;
2444 		if (copy_from_user(&args, argp, sizeof(args)))
2445 			break;
2446 		mutex_lock(&kvm->slots_lock);
2447 		r = kvm_s390_get_cmma_bits(kvm, &args);
2448 		mutex_unlock(&kvm->slots_lock);
2449 		if (!r) {
2450 			r = copy_to_user(argp, &args, sizeof(args));
2451 			if (r)
2452 				r = -EFAULT;
2453 		}
2454 		break;
2455 	}
2456 	case KVM_S390_SET_CMMA_BITS: {
2457 		struct kvm_s390_cmma_log args;
2458 
2459 		r = -EFAULT;
2460 		if (copy_from_user(&args, argp, sizeof(args)))
2461 			break;
2462 		mutex_lock(&kvm->slots_lock);
2463 		r = kvm_s390_set_cmma_bits(kvm, &args);
2464 		mutex_unlock(&kvm->slots_lock);
2465 		break;
2466 	}
2467 	case KVM_S390_PV_COMMAND: {
2468 		struct kvm_pv_cmd args;
2469 
2470 		/* protvirt means user sigp */
2471 		kvm->arch.user_cpu_state_ctrl = 1;
2472 		r = 0;
2473 		if (!is_prot_virt_host()) {
2474 			r = -EINVAL;
2475 			break;
2476 		}
2477 		if (copy_from_user(&args, argp, sizeof(args))) {
2478 			r = -EFAULT;
2479 			break;
2480 		}
2481 		if (args.flags) {
2482 			r = -EINVAL;
2483 			break;
2484 		}
2485 		mutex_lock(&kvm->lock);
2486 		r = kvm_s390_handle_pv(kvm, &args);
2487 		mutex_unlock(&kvm->lock);
2488 		if (copy_to_user(argp, &args, sizeof(args))) {
2489 			r = -EFAULT;
2490 			break;
2491 		}
2492 		break;
2493 	}
2494 	default:
2495 		r = -ENOTTY;
2496 	}
2497 
2498 	return r;
2499 }
2500 
2501 static int kvm_s390_apxa_installed(void)
2502 {
2503 	struct ap_config_info info;
2504 
2505 	if (ap_instructions_available()) {
2506 		if (ap_qci(&info) == 0)
2507 			return info.apxa;
2508 	}
2509 
2510 	return 0;
2511 }
2512 
2513 /*
2514  * The format of the crypto control block (CRYCB) is specified in the 3 low
2515  * order bits of the CRYCB designation (CRYCBD) field as follows:
2516  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2517  *	     AP extended addressing (APXA) facility are installed.
2518  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2519  * Format 2: Both the APXA and MSAX3 facilities are installed
2520  */
2521 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2522 {
2523 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2524 
2525 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2526 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2527 
2528 	/* Check whether MSAX3 is installed */
2529 	if (!test_kvm_facility(kvm, 76))
2530 		return;
2531 
2532 	if (kvm_s390_apxa_installed())
2533 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2534 	else
2535 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2536 }
2537 
2538 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2539 			       unsigned long *aqm, unsigned long *adm)
2540 {
2541 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2542 
2543 	mutex_lock(&kvm->lock);
2544 	kvm_s390_vcpu_block_all(kvm);
2545 
2546 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2547 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2548 		memcpy(crycb->apcb1.apm, apm, 32);
2549 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2550 			 apm[0], apm[1], apm[2], apm[3]);
2551 		memcpy(crycb->apcb1.aqm, aqm, 32);
2552 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2553 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2554 		memcpy(crycb->apcb1.adm, adm, 32);
2555 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2556 			 adm[0], adm[1], adm[2], adm[3]);
2557 		break;
2558 	case CRYCB_FORMAT1:
2559 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2560 		memcpy(crycb->apcb0.apm, apm, 8);
2561 		memcpy(crycb->apcb0.aqm, aqm, 2);
2562 		memcpy(crycb->apcb0.adm, adm, 2);
2563 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2564 			 apm[0], *((unsigned short *)aqm),
2565 			 *((unsigned short *)adm));
2566 		break;
2567 	default:	/* Can not happen */
2568 		break;
2569 	}
2570 
2571 	/* recreate the shadow crycb for each vcpu */
2572 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2573 	kvm_s390_vcpu_unblock_all(kvm);
2574 	mutex_unlock(&kvm->lock);
2575 }
2576 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2577 
2578 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2579 {
2580 	mutex_lock(&kvm->lock);
2581 	kvm_s390_vcpu_block_all(kvm);
2582 
2583 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2584 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2585 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2586 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2587 
2588 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2589 	/* recreate the shadow crycb for each vcpu */
2590 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2591 	kvm_s390_vcpu_unblock_all(kvm);
2592 	mutex_unlock(&kvm->lock);
2593 }
2594 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2595 
2596 static u64 kvm_s390_get_initial_cpuid(void)
2597 {
2598 	struct cpuid cpuid;
2599 
2600 	get_cpu_id(&cpuid);
2601 	cpuid.version = 0xff;
2602 	return *((u64 *) &cpuid);
2603 }
2604 
2605 static void kvm_s390_crypto_init(struct kvm *kvm)
2606 {
2607 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2608 	kvm_s390_set_crycb_format(kvm);
2609 
2610 	if (!test_kvm_facility(kvm, 76))
2611 		return;
2612 
2613 	/* Enable AES/DEA protected key functions by default */
2614 	kvm->arch.crypto.aes_kw = 1;
2615 	kvm->arch.crypto.dea_kw = 1;
2616 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2617 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2618 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2619 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2620 }
2621 
2622 static void sca_dispose(struct kvm *kvm)
2623 {
2624 	if (kvm->arch.use_esca)
2625 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2626 	else
2627 		free_page((unsigned long)(kvm->arch.sca));
2628 	kvm->arch.sca = NULL;
2629 }
2630 
2631 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2632 {
2633 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2634 	int i, rc;
2635 	char debug_name[16];
2636 	static unsigned long sca_offset;
2637 
2638 	rc = -EINVAL;
2639 #ifdef CONFIG_KVM_S390_UCONTROL
2640 	if (type & ~KVM_VM_S390_UCONTROL)
2641 		goto out_err;
2642 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2643 		goto out_err;
2644 #else
2645 	if (type)
2646 		goto out_err;
2647 #endif
2648 
2649 	rc = s390_enable_sie();
2650 	if (rc)
2651 		goto out_err;
2652 
2653 	rc = -ENOMEM;
2654 
2655 	if (!sclp.has_64bscao)
2656 		alloc_flags |= GFP_DMA;
2657 	rwlock_init(&kvm->arch.sca_lock);
2658 	/* start with basic SCA */
2659 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2660 	if (!kvm->arch.sca)
2661 		goto out_err;
2662 	mutex_lock(&kvm_lock);
2663 	sca_offset += 16;
2664 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2665 		sca_offset = 0;
2666 	kvm->arch.sca = (struct bsca_block *)
2667 			((char *) kvm->arch.sca + sca_offset);
2668 	mutex_unlock(&kvm_lock);
2669 
2670 	sprintf(debug_name, "kvm-%u", current->pid);
2671 
2672 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2673 	if (!kvm->arch.dbf)
2674 		goto out_err;
2675 
2676 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2677 	kvm->arch.sie_page2 =
2678 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2679 	if (!kvm->arch.sie_page2)
2680 		goto out_err;
2681 
2682 	kvm->arch.sie_page2->kvm = kvm;
2683 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2684 
2685 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2686 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2687 					      (kvm_s390_fac_base[i] |
2688 					       kvm_s390_fac_ext[i]);
2689 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2690 					      kvm_s390_fac_base[i];
2691 	}
2692 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2693 
2694 	/* we are always in czam mode - even on pre z14 machines */
2695 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2696 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2697 	/* we emulate STHYI in kvm */
2698 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2699 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2700 	if (MACHINE_HAS_TLB_GUEST) {
2701 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2702 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2703 	}
2704 
2705 	if (css_general_characteristics.aiv && test_facility(65))
2706 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2707 
2708 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2709 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2710 
2711 	kvm_s390_crypto_init(kvm);
2712 
2713 	mutex_init(&kvm->arch.float_int.ais_lock);
2714 	spin_lock_init(&kvm->arch.float_int.lock);
2715 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2716 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2717 	init_waitqueue_head(&kvm->arch.ipte_wq);
2718 	mutex_init(&kvm->arch.ipte_mutex);
2719 
2720 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2721 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2722 
2723 	if (type & KVM_VM_S390_UCONTROL) {
2724 		kvm->arch.gmap = NULL;
2725 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2726 	} else {
2727 		if (sclp.hamax == U64_MAX)
2728 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2729 		else
2730 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2731 						    sclp.hamax + 1);
2732 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2733 		if (!kvm->arch.gmap)
2734 			goto out_err;
2735 		kvm->arch.gmap->private = kvm;
2736 		kvm->arch.gmap->pfault_enabled = 0;
2737 	}
2738 
2739 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2740 	kvm->arch.use_skf = sclp.has_skey;
2741 	spin_lock_init(&kvm->arch.start_stop_lock);
2742 	kvm_s390_vsie_init(kvm);
2743 	if (use_gisa)
2744 		kvm_s390_gisa_init(kvm);
2745 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2746 
2747 	return 0;
2748 out_err:
2749 	free_page((unsigned long)kvm->arch.sie_page2);
2750 	debug_unregister(kvm->arch.dbf);
2751 	sca_dispose(kvm);
2752 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2753 	return rc;
2754 }
2755 
2756 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2757 {
2758 	u16 rc, rrc;
2759 
2760 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2761 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2762 	kvm_s390_clear_local_irqs(vcpu);
2763 	kvm_clear_async_pf_completion_queue(vcpu);
2764 	if (!kvm_is_ucontrol(vcpu->kvm))
2765 		sca_del_vcpu(vcpu);
2766 
2767 	if (kvm_is_ucontrol(vcpu->kvm))
2768 		gmap_remove(vcpu->arch.gmap);
2769 
2770 	if (vcpu->kvm->arch.use_cmma)
2771 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2772 	/* We can not hold the vcpu mutex here, we are already dying */
2773 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2774 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2775 	free_page((unsigned long)(vcpu->arch.sie_block));
2776 }
2777 
2778 static void kvm_free_vcpus(struct kvm *kvm)
2779 {
2780 	unsigned int i;
2781 	struct kvm_vcpu *vcpu;
2782 
2783 	kvm_for_each_vcpu(i, vcpu, kvm)
2784 		kvm_vcpu_destroy(vcpu);
2785 
2786 	mutex_lock(&kvm->lock);
2787 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2788 		kvm->vcpus[i] = NULL;
2789 
2790 	atomic_set(&kvm->online_vcpus, 0);
2791 	mutex_unlock(&kvm->lock);
2792 }
2793 
2794 void kvm_arch_destroy_vm(struct kvm *kvm)
2795 {
2796 	u16 rc, rrc;
2797 
2798 	kvm_free_vcpus(kvm);
2799 	sca_dispose(kvm);
2800 	kvm_s390_gisa_destroy(kvm);
2801 	/*
2802 	 * We are already at the end of life and kvm->lock is not taken.
2803 	 * This is ok as the file descriptor is closed by now and nobody
2804 	 * can mess with the pv state. To avoid lockdep_assert_held from
2805 	 * complaining we do not use kvm_s390_pv_is_protected.
2806 	 */
2807 	if (kvm_s390_pv_get_handle(kvm))
2808 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2809 	debug_unregister(kvm->arch.dbf);
2810 	free_page((unsigned long)kvm->arch.sie_page2);
2811 	if (!kvm_is_ucontrol(kvm))
2812 		gmap_remove(kvm->arch.gmap);
2813 	kvm_s390_destroy_adapters(kvm);
2814 	kvm_s390_clear_float_irqs(kvm);
2815 	kvm_s390_vsie_destroy(kvm);
2816 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2817 }
2818 
2819 /* Section: vcpu related */
2820 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2821 {
2822 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2823 	if (!vcpu->arch.gmap)
2824 		return -ENOMEM;
2825 	vcpu->arch.gmap->private = vcpu->kvm;
2826 
2827 	return 0;
2828 }
2829 
2830 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2831 {
2832 	if (!kvm_s390_use_sca_entries())
2833 		return;
2834 	read_lock(&vcpu->kvm->arch.sca_lock);
2835 	if (vcpu->kvm->arch.use_esca) {
2836 		struct esca_block *sca = vcpu->kvm->arch.sca;
2837 
2838 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2839 		sca->cpu[vcpu->vcpu_id].sda = 0;
2840 	} else {
2841 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2842 
2843 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2844 		sca->cpu[vcpu->vcpu_id].sda = 0;
2845 	}
2846 	read_unlock(&vcpu->kvm->arch.sca_lock);
2847 }
2848 
2849 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2850 {
2851 	if (!kvm_s390_use_sca_entries()) {
2852 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2853 
2854 		/* we still need the basic sca for the ipte control */
2855 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2856 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2857 		return;
2858 	}
2859 	read_lock(&vcpu->kvm->arch.sca_lock);
2860 	if (vcpu->kvm->arch.use_esca) {
2861 		struct esca_block *sca = vcpu->kvm->arch.sca;
2862 
2863 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2864 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2865 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2866 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2867 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2868 	} else {
2869 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2870 
2871 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2872 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2873 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2874 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2875 	}
2876 	read_unlock(&vcpu->kvm->arch.sca_lock);
2877 }
2878 
2879 /* Basic SCA to Extended SCA data copy routines */
2880 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2881 {
2882 	d->sda = s->sda;
2883 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2884 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2885 }
2886 
2887 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2888 {
2889 	int i;
2890 
2891 	d->ipte_control = s->ipte_control;
2892 	d->mcn[0] = s->mcn;
2893 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2894 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2895 }
2896 
2897 static int sca_switch_to_extended(struct kvm *kvm)
2898 {
2899 	struct bsca_block *old_sca = kvm->arch.sca;
2900 	struct esca_block *new_sca;
2901 	struct kvm_vcpu *vcpu;
2902 	unsigned int vcpu_idx;
2903 	u32 scaol, scaoh;
2904 
2905 	if (kvm->arch.use_esca)
2906 		return 0;
2907 
2908 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2909 	if (!new_sca)
2910 		return -ENOMEM;
2911 
2912 	scaoh = (u32)((u64)(new_sca) >> 32);
2913 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2914 
2915 	kvm_s390_vcpu_block_all(kvm);
2916 	write_lock(&kvm->arch.sca_lock);
2917 
2918 	sca_copy_b_to_e(new_sca, old_sca);
2919 
2920 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2921 		vcpu->arch.sie_block->scaoh = scaoh;
2922 		vcpu->arch.sie_block->scaol = scaol;
2923 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2924 	}
2925 	kvm->arch.sca = new_sca;
2926 	kvm->arch.use_esca = 1;
2927 
2928 	write_unlock(&kvm->arch.sca_lock);
2929 	kvm_s390_vcpu_unblock_all(kvm);
2930 
2931 	free_page((unsigned long)old_sca);
2932 
2933 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2934 		 old_sca, kvm->arch.sca);
2935 	return 0;
2936 }
2937 
2938 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2939 {
2940 	int rc;
2941 
2942 	if (!kvm_s390_use_sca_entries()) {
2943 		if (id < KVM_MAX_VCPUS)
2944 			return true;
2945 		return false;
2946 	}
2947 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2948 		return true;
2949 	if (!sclp.has_esca || !sclp.has_64bscao)
2950 		return false;
2951 
2952 	mutex_lock(&kvm->lock);
2953 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2954 	mutex_unlock(&kvm->lock);
2955 
2956 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2957 }
2958 
2959 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2960 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2961 {
2962 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2963 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2964 	vcpu->arch.cputm_start = get_tod_clock_fast();
2965 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2966 }
2967 
2968 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2969 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2970 {
2971 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2972 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2973 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2974 	vcpu->arch.cputm_start = 0;
2975 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2976 }
2977 
2978 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2979 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2980 {
2981 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2982 	vcpu->arch.cputm_enabled = true;
2983 	__start_cpu_timer_accounting(vcpu);
2984 }
2985 
2986 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2987 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2988 {
2989 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2990 	__stop_cpu_timer_accounting(vcpu);
2991 	vcpu->arch.cputm_enabled = false;
2992 }
2993 
2994 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2995 {
2996 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2997 	__enable_cpu_timer_accounting(vcpu);
2998 	preempt_enable();
2999 }
3000 
3001 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3002 {
3003 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3004 	__disable_cpu_timer_accounting(vcpu);
3005 	preempt_enable();
3006 }
3007 
3008 /* set the cpu timer - may only be called from the VCPU thread itself */
3009 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3010 {
3011 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3012 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3013 	if (vcpu->arch.cputm_enabled)
3014 		vcpu->arch.cputm_start = get_tod_clock_fast();
3015 	vcpu->arch.sie_block->cputm = cputm;
3016 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3017 	preempt_enable();
3018 }
3019 
3020 /* update and get the cpu timer - can also be called from other VCPU threads */
3021 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3022 {
3023 	unsigned int seq;
3024 	__u64 value;
3025 
3026 	if (unlikely(!vcpu->arch.cputm_enabled))
3027 		return vcpu->arch.sie_block->cputm;
3028 
3029 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3030 	do {
3031 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3032 		/*
3033 		 * If the writer would ever execute a read in the critical
3034 		 * section, e.g. in irq context, we have a deadlock.
3035 		 */
3036 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3037 		value = vcpu->arch.sie_block->cputm;
3038 		/* if cputm_start is 0, accounting is being started/stopped */
3039 		if (likely(vcpu->arch.cputm_start))
3040 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3041 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3042 	preempt_enable();
3043 	return value;
3044 }
3045 
3046 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3047 {
3048 
3049 	gmap_enable(vcpu->arch.enabled_gmap);
3050 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3051 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3052 		__start_cpu_timer_accounting(vcpu);
3053 	vcpu->cpu = cpu;
3054 }
3055 
3056 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3057 {
3058 	vcpu->cpu = -1;
3059 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3060 		__stop_cpu_timer_accounting(vcpu);
3061 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3062 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3063 	gmap_disable(vcpu->arch.enabled_gmap);
3064 
3065 }
3066 
3067 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3068 {
3069 	mutex_lock(&vcpu->kvm->lock);
3070 	preempt_disable();
3071 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3072 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3073 	preempt_enable();
3074 	mutex_unlock(&vcpu->kvm->lock);
3075 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3076 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3077 		sca_add_vcpu(vcpu);
3078 	}
3079 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3080 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3081 	/* make vcpu_load load the right gmap on the first trigger */
3082 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3083 }
3084 
3085 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3086 {
3087 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3088 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3089 		return true;
3090 	return false;
3091 }
3092 
3093 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3094 {
3095 	/* At least one ECC subfunction must be present */
3096 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3097 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3098 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3099 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3100 	       kvm_has_pckmo_subfunc(kvm, 41);
3101 
3102 }
3103 
3104 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3105 {
3106 	/*
3107 	 * If the AP instructions are not being interpreted and the MSAX3
3108 	 * facility is not configured for the guest, there is nothing to set up.
3109 	 */
3110 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3111 		return;
3112 
3113 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3114 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3115 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3116 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3117 
3118 	if (vcpu->kvm->arch.crypto.apie)
3119 		vcpu->arch.sie_block->eca |= ECA_APIE;
3120 
3121 	/* Set up protected key support */
3122 	if (vcpu->kvm->arch.crypto.aes_kw) {
3123 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3124 		/* ecc is also wrapped with AES key */
3125 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3126 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3127 	}
3128 
3129 	if (vcpu->kvm->arch.crypto.dea_kw)
3130 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3131 }
3132 
3133 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3134 {
3135 	free_page(vcpu->arch.sie_block->cbrlo);
3136 	vcpu->arch.sie_block->cbrlo = 0;
3137 }
3138 
3139 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3140 {
3141 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3142 	if (!vcpu->arch.sie_block->cbrlo)
3143 		return -ENOMEM;
3144 	return 0;
3145 }
3146 
3147 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3148 {
3149 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3150 
3151 	vcpu->arch.sie_block->ibc = model->ibc;
3152 	if (test_kvm_facility(vcpu->kvm, 7))
3153 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3154 }
3155 
3156 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3157 {
3158 	int rc = 0;
3159 	u16 uvrc, uvrrc;
3160 
3161 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3162 						    CPUSTAT_SM |
3163 						    CPUSTAT_STOPPED);
3164 
3165 	if (test_kvm_facility(vcpu->kvm, 78))
3166 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3167 	else if (test_kvm_facility(vcpu->kvm, 8))
3168 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3169 
3170 	kvm_s390_vcpu_setup_model(vcpu);
3171 
3172 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3173 	if (MACHINE_HAS_ESOP)
3174 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3175 	if (test_kvm_facility(vcpu->kvm, 9))
3176 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3177 	if (test_kvm_facility(vcpu->kvm, 73))
3178 		vcpu->arch.sie_block->ecb |= ECB_TE;
3179 
3180 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3181 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3182 	if (test_kvm_facility(vcpu->kvm, 130))
3183 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3184 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3185 	if (sclp.has_cei)
3186 		vcpu->arch.sie_block->eca |= ECA_CEI;
3187 	if (sclp.has_ib)
3188 		vcpu->arch.sie_block->eca |= ECA_IB;
3189 	if (sclp.has_siif)
3190 		vcpu->arch.sie_block->eca |= ECA_SII;
3191 	if (sclp.has_sigpif)
3192 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3193 	if (test_kvm_facility(vcpu->kvm, 129)) {
3194 		vcpu->arch.sie_block->eca |= ECA_VX;
3195 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3196 	}
3197 	if (test_kvm_facility(vcpu->kvm, 139))
3198 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3199 	if (test_kvm_facility(vcpu->kvm, 156))
3200 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3201 	if (vcpu->arch.sie_block->gd) {
3202 		vcpu->arch.sie_block->eca |= ECA_AIV;
3203 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3204 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3205 	}
3206 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3207 					| SDNXC;
3208 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3209 
3210 	if (sclp.has_kss)
3211 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3212 	else
3213 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3214 
3215 	if (vcpu->kvm->arch.use_cmma) {
3216 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3217 		if (rc)
3218 			return rc;
3219 	}
3220 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3221 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3222 
3223 	vcpu->arch.sie_block->hpid = HPID_KVM;
3224 
3225 	kvm_s390_vcpu_crypto_setup(vcpu);
3226 
3227 	mutex_lock(&vcpu->kvm->lock);
3228 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3229 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3230 		if (rc)
3231 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3232 	}
3233 	mutex_unlock(&vcpu->kvm->lock);
3234 
3235 	return rc;
3236 }
3237 
3238 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3239 {
3240 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3241 		return -EINVAL;
3242 	return 0;
3243 }
3244 
3245 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3246 {
3247 	struct sie_page *sie_page;
3248 	int rc;
3249 
3250 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3251 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3252 	if (!sie_page)
3253 		return -ENOMEM;
3254 
3255 	vcpu->arch.sie_block = &sie_page->sie_block;
3256 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3257 
3258 	/* the real guest size will always be smaller than msl */
3259 	vcpu->arch.sie_block->mso = 0;
3260 	vcpu->arch.sie_block->msl = sclp.hamax;
3261 
3262 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3263 	spin_lock_init(&vcpu->arch.local_int.lock);
3264 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3265 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3266 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3267 	seqcount_init(&vcpu->arch.cputm_seqcount);
3268 
3269 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3270 	kvm_clear_async_pf_completion_queue(vcpu);
3271 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3272 				    KVM_SYNC_GPRS |
3273 				    KVM_SYNC_ACRS |
3274 				    KVM_SYNC_CRS |
3275 				    KVM_SYNC_ARCH0 |
3276 				    KVM_SYNC_PFAULT |
3277 				    KVM_SYNC_DIAG318;
3278 	kvm_s390_set_prefix(vcpu, 0);
3279 	if (test_kvm_facility(vcpu->kvm, 64))
3280 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3281 	if (test_kvm_facility(vcpu->kvm, 82))
3282 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3283 	if (test_kvm_facility(vcpu->kvm, 133))
3284 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3285 	if (test_kvm_facility(vcpu->kvm, 156))
3286 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3287 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3288 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3289 	 */
3290 	if (MACHINE_HAS_VX)
3291 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3292 	else
3293 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3294 
3295 	if (kvm_is_ucontrol(vcpu->kvm)) {
3296 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3297 		if (rc)
3298 			goto out_free_sie_block;
3299 	}
3300 
3301 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3302 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3303 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3304 
3305 	rc = kvm_s390_vcpu_setup(vcpu);
3306 	if (rc)
3307 		goto out_ucontrol_uninit;
3308 	return 0;
3309 
3310 out_ucontrol_uninit:
3311 	if (kvm_is_ucontrol(vcpu->kvm))
3312 		gmap_remove(vcpu->arch.gmap);
3313 out_free_sie_block:
3314 	free_page((unsigned long)(vcpu->arch.sie_block));
3315 	return rc;
3316 }
3317 
3318 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3319 {
3320 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3321 }
3322 
3323 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3324 {
3325 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3326 }
3327 
3328 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3329 {
3330 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3331 	exit_sie(vcpu);
3332 }
3333 
3334 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3335 {
3336 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3337 }
3338 
3339 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3340 {
3341 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3342 	exit_sie(vcpu);
3343 }
3344 
3345 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3346 {
3347 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3348 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3349 }
3350 
3351 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3352 {
3353 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3354 }
3355 
3356 /*
3357  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3358  * If the CPU is not running (e.g. waiting as idle) the function will
3359  * return immediately. */
3360 void exit_sie(struct kvm_vcpu *vcpu)
3361 {
3362 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3363 	kvm_s390_vsie_kick(vcpu);
3364 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3365 		cpu_relax();
3366 }
3367 
3368 /* Kick a guest cpu out of SIE to process a request synchronously */
3369 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3370 {
3371 	kvm_make_request(req, vcpu);
3372 	kvm_s390_vcpu_request(vcpu);
3373 }
3374 
3375 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3376 			      unsigned long end)
3377 {
3378 	struct kvm *kvm = gmap->private;
3379 	struct kvm_vcpu *vcpu;
3380 	unsigned long prefix;
3381 	int i;
3382 
3383 	if (gmap_is_shadow(gmap))
3384 		return;
3385 	if (start >= 1UL << 31)
3386 		/* We are only interested in prefix pages */
3387 		return;
3388 	kvm_for_each_vcpu(i, vcpu, kvm) {
3389 		/* match against both prefix pages */
3390 		prefix = kvm_s390_get_prefix(vcpu);
3391 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3392 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3393 				   start, end);
3394 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3395 		}
3396 	}
3397 }
3398 
3399 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3400 {
3401 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3402 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3403 	    halt_poll_max_steal) {
3404 		vcpu->stat.halt_no_poll_steal++;
3405 		return true;
3406 	}
3407 	return false;
3408 }
3409 
3410 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3411 {
3412 	/* kvm common code refers to this, but never calls it */
3413 	BUG();
3414 	return 0;
3415 }
3416 
3417 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3418 					   struct kvm_one_reg *reg)
3419 {
3420 	int r = -EINVAL;
3421 
3422 	switch (reg->id) {
3423 	case KVM_REG_S390_TODPR:
3424 		r = put_user(vcpu->arch.sie_block->todpr,
3425 			     (u32 __user *)reg->addr);
3426 		break;
3427 	case KVM_REG_S390_EPOCHDIFF:
3428 		r = put_user(vcpu->arch.sie_block->epoch,
3429 			     (u64 __user *)reg->addr);
3430 		break;
3431 	case KVM_REG_S390_CPU_TIMER:
3432 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3433 			     (u64 __user *)reg->addr);
3434 		break;
3435 	case KVM_REG_S390_CLOCK_COMP:
3436 		r = put_user(vcpu->arch.sie_block->ckc,
3437 			     (u64 __user *)reg->addr);
3438 		break;
3439 	case KVM_REG_S390_PFTOKEN:
3440 		r = put_user(vcpu->arch.pfault_token,
3441 			     (u64 __user *)reg->addr);
3442 		break;
3443 	case KVM_REG_S390_PFCOMPARE:
3444 		r = put_user(vcpu->arch.pfault_compare,
3445 			     (u64 __user *)reg->addr);
3446 		break;
3447 	case KVM_REG_S390_PFSELECT:
3448 		r = put_user(vcpu->arch.pfault_select,
3449 			     (u64 __user *)reg->addr);
3450 		break;
3451 	case KVM_REG_S390_PP:
3452 		r = put_user(vcpu->arch.sie_block->pp,
3453 			     (u64 __user *)reg->addr);
3454 		break;
3455 	case KVM_REG_S390_GBEA:
3456 		r = put_user(vcpu->arch.sie_block->gbea,
3457 			     (u64 __user *)reg->addr);
3458 		break;
3459 	default:
3460 		break;
3461 	}
3462 
3463 	return r;
3464 }
3465 
3466 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3467 					   struct kvm_one_reg *reg)
3468 {
3469 	int r = -EINVAL;
3470 	__u64 val;
3471 
3472 	switch (reg->id) {
3473 	case KVM_REG_S390_TODPR:
3474 		r = get_user(vcpu->arch.sie_block->todpr,
3475 			     (u32 __user *)reg->addr);
3476 		break;
3477 	case KVM_REG_S390_EPOCHDIFF:
3478 		r = get_user(vcpu->arch.sie_block->epoch,
3479 			     (u64 __user *)reg->addr);
3480 		break;
3481 	case KVM_REG_S390_CPU_TIMER:
3482 		r = get_user(val, (u64 __user *)reg->addr);
3483 		if (!r)
3484 			kvm_s390_set_cpu_timer(vcpu, val);
3485 		break;
3486 	case KVM_REG_S390_CLOCK_COMP:
3487 		r = get_user(vcpu->arch.sie_block->ckc,
3488 			     (u64 __user *)reg->addr);
3489 		break;
3490 	case KVM_REG_S390_PFTOKEN:
3491 		r = get_user(vcpu->arch.pfault_token,
3492 			     (u64 __user *)reg->addr);
3493 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3494 			kvm_clear_async_pf_completion_queue(vcpu);
3495 		break;
3496 	case KVM_REG_S390_PFCOMPARE:
3497 		r = get_user(vcpu->arch.pfault_compare,
3498 			     (u64 __user *)reg->addr);
3499 		break;
3500 	case KVM_REG_S390_PFSELECT:
3501 		r = get_user(vcpu->arch.pfault_select,
3502 			     (u64 __user *)reg->addr);
3503 		break;
3504 	case KVM_REG_S390_PP:
3505 		r = get_user(vcpu->arch.sie_block->pp,
3506 			     (u64 __user *)reg->addr);
3507 		break;
3508 	case KVM_REG_S390_GBEA:
3509 		r = get_user(vcpu->arch.sie_block->gbea,
3510 			     (u64 __user *)reg->addr);
3511 		break;
3512 	default:
3513 		break;
3514 	}
3515 
3516 	return r;
3517 }
3518 
3519 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3520 {
3521 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3522 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3523 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3524 
3525 	kvm_clear_async_pf_completion_queue(vcpu);
3526 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3527 		kvm_s390_vcpu_stop(vcpu);
3528 	kvm_s390_clear_local_irqs(vcpu);
3529 }
3530 
3531 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3532 {
3533 	/* Initial reset is a superset of the normal reset */
3534 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3535 
3536 	/*
3537 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3538 	 * We do not only reset the internal data, but also ...
3539 	 */
3540 	vcpu->arch.sie_block->gpsw.mask = 0;
3541 	vcpu->arch.sie_block->gpsw.addr = 0;
3542 	kvm_s390_set_prefix(vcpu, 0);
3543 	kvm_s390_set_cpu_timer(vcpu, 0);
3544 	vcpu->arch.sie_block->ckc = 0;
3545 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3546 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3547 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3548 
3549 	/* ... the data in sync regs */
3550 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3551 	vcpu->run->s.regs.ckc = 0;
3552 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3553 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3554 	vcpu->run->psw_addr = 0;
3555 	vcpu->run->psw_mask = 0;
3556 	vcpu->run->s.regs.todpr = 0;
3557 	vcpu->run->s.regs.cputm = 0;
3558 	vcpu->run->s.regs.ckc = 0;
3559 	vcpu->run->s.regs.pp = 0;
3560 	vcpu->run->s.regs.gbea = 1;
3561 	vcpu->run->s.regs.fpc = 0;
3562 	/*
3563 	 * Do not reset these registers in the protected case, as some of
3564 	 * them are overlayed and they are not accessible in this case
3565 	 * anyway.
3566 	 */
3567 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3568 		vcpu->arch.sie_block->gbea = 1;
3569 		vcpu->arch.sie_block->pp = 0;
3570 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3571 		vcpu->arch.sie_block->todpr = 0;
3572 	}
3573 }
3574 
3575 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3576 {
3577 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3578 
3579 	/* Clear reset is a superset of the initial reset */
3580 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3581 
3582 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3583 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3584 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3585 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3586 
3587 	regs->etoken = 0;
3588 	regs->etoken_extension = 0;
3589 }
3590 
3591 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3592 {
3593 	vcpu_load(vcpu);
3594 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3595 	vcpu_put(vcpu);
3596 	return 0;
3597 }
3598 
3599 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3600 {
3601 	vcpu_load(vcpu);
3602 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3603 	vcpu_put(vcpu);
3604 	return 0;
3605 }
3606 
3607 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3608 				  struct kvm_sregs *sregs)
3609 {
3610 	vcpu_load(vcpu);
3611 
3612 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3613 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3614 
3615 	vcpu_put(vcpu);
3616 	return 0;
3617 }
3618 
3619 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3620 				  struct kvm_sregs *sregs)
3621 {
3622 	vcpu_load(vcpu);
3623 
3624 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3625 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3626 
3627 	vcpu_put(vcpu);
3628 	return 0;
3629 }
3630 
3631 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3632 {
3633 	int ret = 0;
3634 
3635 	vcpu_load(vcpu);
3636 
3637 	if (test_fp_ctl(fpu->fpc)) {
3638 		ret = -EINVAL;
3639 		goto out;
3640 	}
3641 	vcpu->run->s.regs.fpc = fpu->fpc;
3642 	if (MACHINE_HAS_VX)
3643 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3644 				 (freg_t *) fpu->fprs);
3645 	else
3646 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3647 
3648 out:
3649 	vcpu_put(vcpu);
3650 	return ret;
3651 }
3652 
3653 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3654 {
3655 	vcpu_load(vcpu);
3656 
3657 	/* make sure we have the latest values */
3658 	save_fpu_regs();
3659 	if (MACHINE_HAS_VX)
3660 		convert_vx_to_fp((freg_t *) fpu->fprs,
3661 				 (__vector128 *) vcpu->run->s.regs.vrs);
3662 	else
3663 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3664 	fpu->fpc = vcpu->run->s.regs.fpc;
3665 
3666 	vcpu_put(vcpu);
3667 	return 0;
3668 }
3669 
3670 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3671 {
3672 	int rc = 0;
3673 
3674 	if (!is_vcpu_stopped(vcpu))
3675 		rc = -EBUSY;
3676 	else {
3677 		vcpu->run->psw_mask = psw.mask;
3678 		vcpu->run->psw_addr = psw.addr;
3679 	}
3680 	return rc;
3681 }
3682 
3683 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3684 				  struct kvm_translation *tr)
3685 {
3686 	return -EINVAL; /* not implemented yet */
3687 }
3688 
3689 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3690 			      KVM_GUESTDBG_USE_HW_BP | \
3691 			      KVM_GUESTDBG_ENABLE)
3692 
3693 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3694 					struct kvm_guest_debug *dbg)
3695 {
3696 	int rc = 0;
3697 
3698 	vcpu_load(vcpu);
3699 
3700 	vcpu->guest_debug = 0;
3701 	kvm_s390_clear_bp_data(vcpu);
3702 
3703 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3704 		rc = -EINVAL;
3705 		goto out;
3706 	}
3707 	if (!sclp.has_gpere) {
3708 		rc = -EINVAL;
3709 		goto out;
3710 	}
3711 
3712 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3713 		vcpu->guest_debug = dbg->control;
3714 		/* enforce guest PER */
3715 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3716 
3717 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3718 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3719 	} else {
3720 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3721 		vcpu->arch.guestdbg.last_bp = 0;
3722 	}
3723 
3724 	if (rc) {
3725 		vcpu->guest_debug = 0;
3726 		kvm_s390_clear_bp_data(vcpu);
3727 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3728 	}
3729 
3730 out:
3731 	vcpu_put(vcpu);
3732 	return rc;
3733 }
3734 
3735 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3736 				    struct kvm_mp_state *mp_state)
3737 {
3738 	int ret;
3739 
3740 	vcpu_load(vcpu);
3741 
3742 	/* CHECK_STOP and LOAD are not supported yet */
3743 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3744 				      KVM_MP_STATE_OPERATING;
3745 
3746 	vcpu_put(vcpu);
3747 	return ret;
3748 }
3749 
3750 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3751 				    struct kvm_mp_state *mp_state)
3752 {
3753 	int rc = 0;
3754 
3755 	vcpu_load(vcpu);
3756 
3757 	/* user space knows about this interface - let it control the state */
3758 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3759 
3760 	switch (mp_state->mp_state) {
3761 	case KVM_MP_STATE_STOPPED:
3762 		rc = kvm_s390_vcpu_stop(vcpu);
3763 		break;
3764 	case KVM_MP_STATE_OPERATING:
3765 		rc = kvm_s390_vcpu_start(vcpu);
3766 		break;
3767 	case KVM_MP_STATE_LOAD:
3768 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3769 			rc = -ENXIO;
3770 			break;
3771 		}
3772 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3773 		break;
3774 	case KVM_MP_STATE_CHECK_STOP:
3775 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3776 	default:
3777 		rc = -ENXIO;
3778 	}
3779 
3780 	vcpu_put(vcpu);
3781 	return rc;
3782 }
3783 
3784 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3785 {
3786 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3787 }
3788 
3789 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3790 {
3791 retry:
3792 	kvm_s390_vcpu_request_handled(vcpu);
3793 	if (!kvm_request_pending(vcpu))
3794 		return 0;
3795 	/*
3796 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3797 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3798 	 * This ensures that the ipte instruction for this request has
3799 	 * already finished. We might race against a second unmapper that
3800 	 * wants to set the blocking bit. Lets just retry the request loop.
3801 	 */
3802 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3803 		int rc;
3804 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3805 					  kvm_s390_get_prefix(vcpu),
3806 					  PAGE_SIZE * 2, PROT_WRITE);
3807 		if (rc) {
3808 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3809 			return rc;
3810 		}
3811 		goto retry;
3812 	}
3813 
3814 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3815 		vcpu->arch.sie_block->ihcpu = 0xffff;
3816 		goto retry;
3817 	}
3818 
3819 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3820 		if (!ibs_enabled(vcpu)) {
3821 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3822 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3823 		}
3824 		goto retry;
3825 	}
3826 
3827 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3828 		if (ibs_enabled(vcpu)) {
3829 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3830 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3831 		}
3832 		goto retry;
3833 	}
3834 
3835 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3836 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3837 		goto retry;
3838 	}
3839 
3840 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3841 		/*
3842 		 * Disable CMM virtualization; we will emulate the ESSA
3843 		 * instruction manually, in order to provide additional
3844 		 * functionalities needed for live migration.
3845 		 */
3846 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3847 		goto retry;
3848 	}
3849 
3850 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3851 		/*
3852 		 * Re-enable CMM virtualization if CMMA is available and
3853 		 * CMM has been used.
3854 		 */
3855 		if ((vcpu->kvm->arch.use_cmma) &&
3856 		    (vcpu->kvm->mm->context.uses_cmm))
3857 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3858 		goto retry;
3859 	}
3860 
3861 	/* nothing to do, just clear the request */
3862 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3863 	/* we left the vsie handler, nothing to do, just clear the request */
3864 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3865 
3866 	return 0;
3867 }
3868 
3869 void kvm_s390_set_tod_clock(struct kvm *kvm,
3870 			    const struct kvm_s390_vm_tod_clock *gtod)
3871 {
3872 	struct kvm_vcpu *vcpu;
3873 	union tod_clock clk;
3874 	int i;
3875 
3876 	mutex_lock(&kvm->lock);
3877 	preempt_disable();
3878 
3879 	store_tod_clock_ext(&clk);
3880 
3881 	kvm->arch.epoch = gtod->tod - clk.tod;
3882 	kvm->arch.epdx = 0;
3883 	if (test_kvm_facility(kvm, 139)) {
3884 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3885 		if (kvm->arch.epoch > gtod->tod)
3886 			kvm->arch.epdx -= 1;
3887 	}
3888 
3889 	kvm_s390_vcpu_block_all(kvm);
3890 	kvm_for_each_vcpu(i, vcpu, kvm) {
3891 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3892 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3893 	}
3894 
3895 	kvm_s390_vcpu_unblock_all(kvm);
3896 	preempt_enable();
3897 	mutex_unlock(&kvm->lock);
3898 }
3899 
3900 /**
3901  * kvm_arch_fault_in_page - fault-in guest page if necessary
3902  * @vcpu: The corresponding virtual cpu
3903  * @gpa: Guest physical address
3904  * @writable: Whether the page should be writable or not
3905  *
3906  * Make sure that a guest page has been faulted-in on the host.
3907  *
3908  * Return: Zero on success, negative error code otherwise.
3909  */
3910 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3911 {
3912 	return gmap_fault(vcpu->arch.gmap, gpa,
3913 			  writable ? FAULT_FLAG_WRITE : 0);
3914 }
3915 
3916 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3917 				      unsigned long token)
3918 {
3919 	struct kvm_s390_interrupt inti;
3920 	struct kvm_s390_irq irq;
3921 
3922 	if (start_token) {
3923 		irq.u.ext.ext_params2 = token;
3924 		irq.type = KVM_S390_INT_PFAULT_INIT;
3925 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3926 	} else {
3927 		inti.type = KVM_S390_INT_PFAULT_DONE;
3928 		inti.parm64 = token;
3929 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3930 	}
3931 }
3932 
3933 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3934 				     struct kvm_async_pf *work)
3935 {
3936 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3937 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3938 
3939 	return true;
3940 }
3941 
3942 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3943 				 struct kvm_async_pf *work)
3944 {
3945 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3946 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3947 }
3948 
3949 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3950 			       struct kvm_async_pf *work)
3951 {
3952 	/* s390 will always inject the page directly */
3953 }
3954 
3955 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3956 {
3957 	/*
3958 	 * s390 will always inject the page directly,
3959 	 * but we still want check_async_completion to cleanup
3960 	 */
3961 	return true;
3962 }
3963 
3964 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3965 {
3966 	hva_t hva;
3967 	struct kvm_arch_async_pf arch;
3968 
3969 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3970 		return false;
3971 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3972 	    vcpu->arch.pfault_compare)
3973 		return false;
3974 	if (psw_extint_disabled(vcpu))
3975 		return false;
3976 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3977 		return false;
3978 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3979 		return false;
3980 	if (!vcpu->arch.gmap->pfault_enabled)
3981 		return false;
3982 
3983 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3984 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3985 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3986 		return false;
3987 
3988 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3989 }
3990 
3991 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3992 {
3993 	int rc, cpuflags;
3994 
3995 	/*
3996 	 * On s390 notifications for arriving pages will be delivered directly
3997 	 * to the guest but the house keeping for completed pfaults is
3998 	 * handled outside the worker.
3999 	 */
4000 	kvm_check_async_pf_completion(vcpu);
4001 
4002 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4003 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4004 
4005 	if (need_resched())
4006 		schedule();
4007 
4008 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4009 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4010 		if (rc)
4011 			return rc;
4012 	}
4013 
4014 	rc = kvm_s390_handle_requests(vcpu);
4015 	if (rc)
4016 		return rc;
4017 
4018 	if (guestdbg_enabled(vcpu)) {
4019 		kvm_s390_backup_guest_per_regs(vcpu);
4020 		kvm_s390_patch_guest_per_regs(vcpu);
4021 	}
4022 
4023 	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4024 
4025 	vcpu->arch.sie_block->icptcode = 0;
4026 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4027 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4028 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4029 
4030 	return 0;
4031 }
4032 
4033 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4034 {
4035 	struct kvm_s390_pgm_info pgm_info = {
4036 		.code = PGM_ADDRESSING,
4037 	};
4038 	u8 opcode, ilen;
4039 	int rc;
4040 
4041 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4042 	trace_kvm_s390_sie_fault(vcpu);
4043 
4044 	/*
4045 	 * We want to inject an addressing exception, which is defined as a
4046 	 * suppressing or terminating exception. However, since we came here
4047 	 * by a DAT access exception, the PSW still points to the faulting
4048 	 * instruction since DAT exceptions are nullifying. So we've got
4049 	 * to look up the current opcode to get the length of the instruction
4050 	 * to be able to forward the PSW.
4051 	 */
4052 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4053 	ilen = insn_length(opcode);
4054 	if (rc < 0) {
4055 		return rc;
4056 	} else if (rc) {
4057 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4058 		 * Forward by arbitrary ilc, injection will take care of
4059 		 * nullification if necessary.
4060 		 */
4061 		pgm_info = vcpu->arch.pgm;
4062 		ilen = 4;
4063 	}
4064 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4065 	kvm_s390_forward_psw(vcpu, ilen);
4066 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4067 }
4068 
4069 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4070 {
4071 	struct mcck_volatile_info *mcck_info;
4072 	struct sie_page *sie_page;
4073 
4074 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4075 		   vcpu->arch.sie_block->icptcode);
4076 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4077 
4078 	if (guestdbg_enabled(vcpu))
4079 		kvm_s390_restore_guest_per_regs(vcpu);
4080 
4081 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4082 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4083 
4084 	if (exit_reason == -EINTR) {
4085 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4086 		sie_page = container_of(vcpu->arch.sie_block,
4087 					struct sie_page, sie_block);
4088 		mcck_info = &sie_page->mcck_info;
4089 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4090 		return 0;
4091 	}
4092 
4093 	if (vcpu->arch.sie_block->icptcode > 0) {
4094 		int rc = kvm_handle_sie_intercept(vcpu);
4095 
4096 		if (rc != -EOPNOTSUPP)
4097 			return rc;
4098 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4099 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4100 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4101 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4102 		return -EREMOTE;
4103 	} else if (exit_reason != -EFAULT) {
4104 		vcpu->stat.exit_null++;
4105 		return 0;
4106 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4107 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4108 		vcpu->run->s390_ucontrol.trans_exc_code =
4109 						current->thread.gmap_addr;
4110 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4111 		return -EREMOTE;
4112 	} else if (current->thread.gmap_pfault) {
4113 		trace_kvm_s390_major_guest_pfault(vcpu);
4114 		current->thread.gmap_pfault = 0;
4115 		if (kvm_arch_setup_async_pf(vcpu))
4116 			return 0;
4117 		vcpu->stat.pfault_sync++;
4118 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4119 	}
4120 	return vcpu_post_run_fault_in_sie(vcpu);
4121 }
4122 
4123 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4124 static int __vcpu_run(struct kvm_vcpu *vcpu)
4125 {
4126 	int rc, exit_reason;
4127 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4128 
4129 	/*
4130 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4131 	 * ning the guest), so that memslots (and other stuff) are protected
4132 	 */
4133 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4134 
4135 	do {
4136 		rc = vcpu_pre_run(vcpu);
4137 		if (rc)
4138 			break;
4139 
4140 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4141 		/*
4142 		 * As PF_VCPU will be used in fault handler, between
4143 		 * guest_enter and guest_exit should be no uaccess.
4144 		 */
4145 		local_irq_disable();
4146 		guest_enter_irqoff();
4147 		__disable_cpu_timer_accounting(vcpu);
4148 		local_irq_enable();
4149 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4150 			memcpy(sie_page->pv_grregs,
4151 			       vcpu->run->s.regs.gprs,
4152 			       sizeof(sie_page->pv_grregs));
4153 		}
4154 		if (test_cpu_flag(CIF_FPU))
4155 			load_fpu_regs();
4156 		exit_reason = sie64a(vcpu->arch.sie_block,
4157 				     vcpu->run->s.regs.gprs);
4158 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4159 			memcpy(vcpu->run->s.regs.gprs,
4160 			       sie_page->pv_grregs,
4161 			       sizeof(sie_page->pv_grregs));
4162 			/*
4163 			 * We're not allowed to inject interrupts on intercepts
4164 			 * that leave the guest state in an "in-between" state
4165 			 * where the next SIE entry will do a continuation.
4166 			 * Fence interrupts in our "internal" PSW.
4167 			 */
4168 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4169 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4170 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4171 			}
4172 		}
4173 		local_irq_disable();
4174 		__enable_cpu_timer_accounting(vcpu);
4175 		guest_exit_irqoff();
4176 		local_irq_enable();
4177 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4178 
4179 		rc = vcpu_post_run(vcpu, exit_reason);
4180 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4181 
4182 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4183 	return rc;
4184 }
4185 
4186 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4187 {
4188 	struct kvm_run *kvm_run = vcpu->run;
4189 	struct runtime_instr_cb *riccb;
4190 	struct gs_cb *gscb;
4191 
4192 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4193 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4194 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4195 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4196 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4197 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4198 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4199 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4200 	}
4201 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4202 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4203 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4204 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4205 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4206 			kvm_clear_async_pf_completion_queue(vcpu);
4207 	}
4208 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4209 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4210 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4211 	}
4212 	/*
4213 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4214 	 * we should enable RI here instead of doing the lazy enablement.
4215 	 */
4216 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4217 	    test_kvm_facility(vcpu->kvm, 64) &&
4218 	    riccb->v &&
4219 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4220 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4221 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4222 	}
4223 	/*
4224 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4225 	 * we should enable GS here instead of doing the lazy enablement.
4226 	 */
4227 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4228 	    test_kvm_facility(vcpu->kvm, 133) &&
4229 	    gscb->gssm &&
4230 	    !vcpu->arch.gs_enabled) {
4231 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4232 		vcpu->arch.sie_block->ecb |= ECB_GS;
4233 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4234 		vcpu->arch.gs_enabled = 1;
4235 	}
4236 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4237 	    test_kvm_facility(vcpu->kvm, 82)) {
4238 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4239 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4240 	}
4241 	if (MACHINE_HAS_GS) {
4242 		preempt_disable();
4243 		__ctl_set_bit(2, 4);
4244 		if (current->thread.gs_cb) {
4245 			vcpu->arch.host_gscb = current->thread.gs_cb;
4246 			save_gs_cb(vcpu->arch.host_gscb);
4247 		}
4248 		if (vcpu->arch.gs_enabled) {
4249 			current->thread.gs_cb = (struct gs_cb *)
4250 						&vcpu->run->s.regs.gscb;
4251 			restore_gs_cb(current->thread.gs_cb);
4252 		}
4253 		preempt_enable();
4254 	}
4255 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4256 }
4257 
4258 static void sync_regs(struct kvm_vcpu *vcpu)
4259 {
4260 	struct kvm_run *kvm_run = vcpu->run;
4261 
4262 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4263 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4264 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4265 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4266 		/* some control register changes require a tlb flush */
4267 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4268 	}
4269 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4270 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4271 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4272 	}
4273 	save_access_regs(vcpu->arch.host_acrs);
4274 	restore_access_regs(vcpu->run->s.regs.acrs);
4275 	/* save host (userspace) fprs/vrs */
4276 	save_fpu_regs();
4277 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4278 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4279 	if (MACHINE_HAS_VX)
4280 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4281 	else
4282 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4283 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4284 	if (test_fp_ctl(current->thread.fpu.fpc))
4285 		/* User space provided an invalid FPC, let's clear it */
4286 		current->thread.fpu.fpc = 0;
4287 
4288 	/* Sync fmt2 only data */
4289 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4290 		sync_regs_fmt2(vcpu);
4291 	} else {
4292 		/*
4293 		 * In several places we have to modify our internal view to
4294 		 * not do things that are disallowed by the ultravisor. For
4295 		 * example we must not inject interrupts after specific exits
4296 		 * (e.g. 112 prefix page not secure). We do this by turning
4297 		 * off the machine check, external and I/O interrupt bits
4298 		 * of our PSW copy. To avoid getting validity intercepts, we
4299 		 * do only accept the condition code from userspace.
4300 		 */
4301 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4302 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4303 						   PSW_MASK_CC;
4304 	}
4305 
4306 	kvm_run->kvm_dirty_regs = 0;
4307 }
4308 
4309 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4310 {
4311 	struct kvm_run *kvm_run = vcpu->run;
4312 
4313 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4314 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4315 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4316 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4317 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4318 	if (MACHINE_HAS_GS) {
4319 		preempt_disable();
4320 		__ctl_set_bit(2, 4);
4321 		if (vcpu->arch.gs_enabled)
4322 			save_gs_cb(current->thread.gs_cb);
4323 		current->thread.gs_cb = vcpu->arch.host_gscb;
4324 		restore_gs_cb(vcpu->arch.host_gscb);
4325 		if (!vcpu->arch.host_gscb)
4326 			__ctl_clear_bit(2, 4);
4327 		vcpu->arch.host_gscb = NULL;
4328 		preempt_enable();
4329 	}
4330 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4331 }
4332 
4333 static void store_regs(struct kvm_vcpu *vcpu)
4334 {
4335 	struct kvm_run *kvm_run = vcpu->run;
4336 
4337 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4338 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4339 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4340 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4341 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4342 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4343 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4344 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4345 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4346 	save_access_regs(vcpu->run->s.regs.acrs);
4347 	restore_access_regs(vcpu->arch.host_acrs);
4348 	/* Save guest register state */
4349 	save_fpu_regs();
4350 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4351 	/* Restore will be done lazily at return */
4352 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4353 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4354 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4355 		store_regs_fmt2(vcpu);
4356 }
4357 
4358 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4359 {
4360 	struct kvm_run *kvm_run = vcpu->run;
4361 	int rc;
4362 
4363 	if (kvm_run->immediate_exit)
4364 		return -EINTR;
4365 
4366 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4367 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4368 		return -EINVAL;
4369 
4370 	vcpu_load(vcpu);
4371 
4372 	if (guestdbg_exit_pending(vcpu)) {
4373 		kvm_s390_prepare_debug_exit(vcpu);
4374 		rc = 0;
4375 		goto out;
4376 	}
4377 
4378 	kvm_sigset_activate(vcpu);
4379 
4380 	/*
4381 	 * no need to check the return value of vcpu_start as it can only have
4382 	 * an error for protvirt, but protvirt means user cpu state
4383 	 */
4384 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4385 		kvm_s390_vcpu_start(vcpu);
4386 	} else if (is_vcpu_stopped(vcpu)) {
4387 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4388 				   vcpu->vcpu_id);
4389 		rc = -EINVAL;
4390 		goto out;
4391 	}
4392 
4393 	sync_regs(vcpu);
4394 	enable_cpu_timer_accounting(vcpu);
4395 
4396 	might_fault();
4397 	rc = __vcpu_run(vcpu);
4398 
4399 	if (signal_pending(current) && !rc) {
4400 		kvm_run->exit_reason = KVM_EXIT_INTR;
4401 		rc = -EINTR;
4402 	}
4403 
4404 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4405 		kvm_s390_prepare_debug_exit(vcpu);
4406 		rc = 0;
4407 	}
4408 
4409 	if (rc == -EREMOTE) {
4410 		/* userspace support is needed, kvm_run has been prepared */
4411 		rc = 0;
4412 	}
4413 
4414 	disable_cpu_timer_accounting(vcpu);
4415 	store_regs(vcpu);
4416 
4417 	kvm_sigset_deactivate(vcpu);
4418 
4419 	vcpu->stat.exit_userspace++;
4420 out:
4421 	vcpu_put(vcpu);
4422 	return rc;
4423 }
4424 
4425 /*
4426  * store status at address
4427  * we use have two special cases:
4428  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4429  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4430  */
4431 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4432 {
4433 	unsigned char archmode = 1;
4434 	freg_t fprs[NUM_FPRS];
4435 	unsigned int px;
4436 	u64 clkcomp, cputm;
4437 	int rc;
4438 
4439 	px = kvm_s390_get_prefix(vcpu);
4440 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4441 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4442 			return -EFAULT;
4443 		gpa = 0;
4444 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4445 		if (write_guest_real(vcpu, 163, &archmode, 1))
4446 			return -EFAULT;
4447 		gpa = px;
4448 	} else
4449 		gpa -= __LC_FPREGS_SAVE_AREA;
4450 
4451 	/* manually convert vector registers if necessary */
4452 	if (MACHINE_HAS_VX) {
4453 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4454 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4455 				     fprs, 128);
4456 	} else {
4457 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4458 				     vcpu->run->s.regs.fprs, 128);
4459 	}
4460 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4461 			      vcpu->run->s.regs.gprs, 128);
4462 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4463 			      &vcpu->arch.sie_block->gpsw, 16);
4464 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4465 			      &px, 4);
4466 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4467 			      &vcpu->run->s.regs.fpc, 4);
4468 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4469 			      &vcpu->arch.sie_block->todpr, 4);
4470 	cputm = kvm_s390_get_cpu_timer(vcpu);
4471 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4472 			      &cputm, 8);
4473 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4474 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4475 			      &clkcomp, 8);
4476 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4477 			      &vcpu->run->s.regs.acrs, 64);
4478 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4479 			      &vcpu->arch.sie_block->gcr, 128);
4480 	return rc ? -EFAULT : 0;
4481 }
4482 
4483 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4484 {
4485 	/*
4486 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4487 	 * switch in the run ioctl. Let's update our copies before we save
4488 	 * it into the save area
4489 	 */
4490 	save_fpu_regs();
4491 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4492 	save_access_regs(vcpu->run->s.regs.acrs);
4493 
4494 	return kvm_s390_store_status_unloaded(vcpu, addr);
4495 }
4496 
4497 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4498 {
4499 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4500 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4501 }
4502 
4503 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4504 {
4505 	unsigned int i;
4506 	struct kvm_vcpu *vcpu;
4507 
4508 	kvm_for_each_vcpu(i, vcpu, kvm) {
4509 		__disable_ibs_on_vcpu(vcpu);
4510 	}
4511 }
4512 
4513 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4514 {
4515 	if (!sclp.has_ibs)
4516 		return;
4517 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4518 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4519 }
4520 
4521 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4522 {
4523 	int i, online_vcpus, r = 0, started_vcpus = 0;
4524 
4525 	if (!is_vcpu_stopped(vcpu))
4526 		return 0;
4527 
4528 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4529 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4530 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4531 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4532 
4533 	/* Let's tell the UV that we want to change into the operating state */
4534 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4535 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4536 		if (r) {
4537 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4538 			return r;
4539 		}
4540 	}
4541 
4542 	for (i = 0; i < online_vcpus; i++) {
4543 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4544 			started_vcpus++;
4545 	}
4546 
4547 	if (started_vcpus == 0) {
4548 		/* we're the only active VCPU -> speed it up */
4549 		__enable_ibs_on_vcpu(vcpu);
4550 	} else if (started_vcpus == 1) {
4551 		/*
4552 		 * As we are starting a second VCPU, we have to disable
4553 		 * the IBS facility on all VCPUs to remove potentially
4554 		 * outstanding ENABLE requests.
4555 		 */
4556 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4557 	}
4558 
4559 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4560 	/*
4561 	 * The real PSW might have changed due to a RESTART interpreted by the
4562 	 * ultravisor. We block all interrupts and let the next sie exit
4563 	 * refresh our view.
4564 	 */
4565 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4566 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4567 	/*
4568 	 * Another VCPU might have used IBS while we were offline.
4569 	 * Let's play safe and flush the VCPU at startup.
4570 	 */
4571 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4572 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4573 	return 0;
4574 }
4575 
4576 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4577 {
4578 	int i, online_vcpus, r = 0, started_vcpus = 0;
4579 	struct kvm_vcpu *started_vcpu = NULL;
4580 
4581 	if (is_vcpu_stopped(vcpu))
4582 		return 0;
4583 
4584 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4585 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4586 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4587 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4588 
4589 	/* Let's tell the UV that we want to change into the stopped state */
4590 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4591 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4592 		if (r) {
4593 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4594 			return r;
4595 		}
4596 	}
4597 
4598 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4599 	kvm_s390_clear_stop_irq(vcpu);
4600 
4601 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4602 	__disable_ibs_on_vcpu(vcpu);
4603 
4604 	for (i = 0; i < online_vcpus; i++) {
4605 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4606 			started_vcpus++;
4607 			started_vcpu = vcpu->kvm->vcpus[i];
4608 		}
4609 	}
4610 
4611 	if (started_vcpus == 1) {
4612 		/*
4613 		 * As we only have one VCPU left, we want to enable the
4614 		 * IBS facility for that VCPU to speed it up.
4615 		 */
4616 		__enable_ibs_on_vcpu(started_vcpu);
4617 	}
4618 
4619 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4620 	return 0;
4621 }
4622 
4623 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4624 				     struct kvm_enable_cap *cap)
4625 {
4626 	int r;
4627 
4628 	if (cap->flags)
4629 		return -EINVAL;
4630 
4631 	switch (cap->cap) {
4632 	case KVM_CAP_S390_CSS_SUPPORT:
4633 		if (!vcpu->kvm->arch.css_support) {
4634 			vcpu->kvm->arch.css_support = 1;
4635 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4636 			trace_kvm_s390_enable_css(vcpu->kvm);
4637 		}
4638 		r = 0;
4639 		break;
4640 	default:
4641 		r = -EINVAL;
4642 		break;
4643 	}
4644 	return r;
4645 }
4646 
4647 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4648 				   struct kvm_s390_mem_op *mop)
4649 {
4650 	void __user *uaddr = (void __user *)mop->buf;
4651 	int r = 0;
4652 
4653 	if (mop->flags || !mop->size)
4654 		return -EINVAL;
4655 	if (mop->size + mop->sida_offset < mop->size)
4656 		return -EINVAL;
4657 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4658 		return -E2BIG;
4659 
4660 	switch (mop->op) {
4661 	case KVM_S390_MEMOP_SIDA_READ:
4662 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4663 				 mop->sida_offset), mop->size))
4664 			r = -EFAULT;
4665 
4666 		break;
4667 	case KVM_S390_MEMOP_SIDA_WRITE:
4668 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4669 				   mop->sida_offset), uaddr, mop->size))
4670 			r = -EFAULT;
4671 		break;
4672 	}
4673 	return r;
4674 }
4675 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4676 				  struct kvm_s390_mem_op *mop)
4677 {
4678 	void __user *uaddr = (void __user *)mop->buf;
4679 	void *tmpbuf = NULL;
4680 	int r = 0;
4681 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4682 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4683 
4684 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4685 		return -EINVAL;
4686 
4687 	if (mop->size > MEM_OP_MAX_SIZE)
4688 		return -E2BIG;
4689 
4690 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4691 		return -EINVAL;
4692 
4693 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4694 		tmpbuf = vmalloc(mop->size);
4695 		if (!tmpbuf)
4696 			return -ENOMEM;
4697 	}
4698 
4699 	switch (mop->op) {
4700 	case KVM_S390_MEMOP_LOGICAL_READ:
4701 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4702 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4703 					    mop->size, GACC_FETCH);
4704 			break;
4705 		}
4706 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4707 		if (r == 0) {
4708 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4709 				r = -EFAULT;
4710 		}
4711 		break;
4712 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4713 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4714 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4715 					    mop->size, GACC_STORE);
4716 			break;
4717 		}
4718 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4719 			r = -EFAULT;
4720 			break;
4721 		}
4722 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4723 		break;
4724 	}
4725 
4726 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4727 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4728 
4729 	vfree(tmpbuf);
4730 	return r;
4731 }
4732 
4733 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4734 				      struct kvm_s390_mem_op *mop)
4735 {
4736 	int r, srcu_idx;
4737 
4738 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4739 
4740 	switch (mop->op) {
4741 	case KVM_S390_MEMOP_LOGICAL_READ:
4742 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4743 		r = kvm_s390_guest_mem_op(vcpu, mop);
4744 		break;
4745 	case KVM_S390_MEMOP_SIDA_READ:
4746 	case KVM_S390_MEMOP_SIDA_WRITE:
4747 		/* we are locked against sida going away by the vcpu->mutex */
4748 		r = kvm_s390_guest_sida_op(vcpu, mop);
4749 		break;
4750 	default:
4751 		r = -EINVAL;
4752 	}
4753 
4754 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4755 	return r;
4756 }
4757 
4758 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4759 			       unsigned int ioctl, unsigned long arg)
4760 {
4761 	struct kvm_vcpu *vcpu = filp->private_data;
4762 	void __user *argp = (void __user *)arg;
4763 
4764 	switch (ioctl) {
4765 	case KVM_S390_IRQ: {
4766 		struct kvm_s390_irq s390irq;
4767 
4768 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4769 			return -EFAULT;
4770 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4771 	}
4772 	case KVM_S390_INTERRUPT: {
4773 		struct kvm_s390_interrupt s390int;
4774 		struct kvm_s390_irq s390irq = {};
4775 
4776 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4777 			return -EFAULT;
4778 		if (s390int_to_s390irq(&s390int, &s390irq))
4779 			return -EINVAL;
4780 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4781 	}
4782 	}
4783 	return -ENOIOCTLCMD;
4784 }
4785 
4786 long kvm_arch_vcpu_ioctl(struct file *filp,
4787 			 unsigned int ioctl, unsigned long arg)
4788 {
4789 	struct kvm_vcpu *vcpu = filp->private_data;
4790 	void __user *argp = (void __user *)arg;
4791 	int idx;
4792 	long r;
4793 	u16 rc, rrc;
4794 
4795 	vcpu_load(vcpu);
4796 
4797 	switch (ioctl) {
4798 	case KVM_S390_STORE_STATUS:
4799 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4800 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4801 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4802 		break;
4803 	case KVM_S390_SET_INITIAL_PSW: {
4804 		psw_t psw;
4805 
4806 		r = -EFAULT;
4807 		if (copy_from_user(&psw, argp, sizeof(psw)))
4808 			break;
4809 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4810 		break;
4811 	}
4812 	case KVM_S390_CLEAR_RESET:
4813 		r = 0;
4814 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4815 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4816 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4817 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4818 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4819 				   rc, rrc);
4820 		}
4821 		break;
4822 	case KVM_S390_INITIAL_RESET:
4823 		r = 0;
4824 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4825 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4826 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4827 					  UVC_CMD_CPU_RESET_INITIAL,
4828 					  &rc, &rrc);
4829 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4830 				   rc, rrc);
4831 		}
4832 		break;
4833 	case KVM_S390_NORMAL_RESET:
4834 		r = 0;
4835 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4836 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4837 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4838 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4839 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4840 				   rc, rrc);
4841 		}
4842 		break;
4843 	case KVM_SET_ONE_REG:
4844 	case KVM_GET_ONE_REG: {
4845 		struct kvm_one_reg reg;
4846 		r = -EINVAL;
4847 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4848 			break;
4849 		r = -EFAULT;
4850 		if (copy_from_user(&reg, argp, sizeof(reg)))
4851 			break;
4852 		if (ioctl == KVM_SET_ONE_REG)
4853 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4854 		else
4855 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4856 		break;
4857 	}
4858 #ifdef CONFIG_KVM_S390_UCONTROL
4859 	case KVM_S390_UCAS_MAP: {
4860 		struct kvm_s390_ucas_mapping ucasmap;
4861 
4862 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4863 			r = -EFAULT;
4864 			break;
4865 		}
4866 
4867 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4868 			r = -EINVAL;
4869 			break;
4870 		}
4871 
4872 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4873 				     ucasmap.vcpu_addr, ucasmap.length);
4874 		break;
4875 	}
4876 	case KVM_S390_UCAS_UNMAP: {
4877 		struct kvm_s390_ucas_mapping ucasmap;
4878 
4879 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4880 			r = -EFAULT;
4881 			break;
4882 		}
4883 
4884 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4885 			r = -EINVAL;
4886 			break;
4887 		}
4888 
4889 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4890 			ucasmap.length);
4891 		break;
4892 	}
4893 #endif
4894 	case KVM_S390_VCPU_FAULT: {
4895 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4896 		break;
4897 	}
4898 	case KVM_ENABLE_CAP:
4899 	{
4900 		struct kvm_enable_cap cap;
4901 		r = -EFAULT;
4902 		if (copy_from_user(&cap, argp, sizeof(cap)))
4903 			break;
4904 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4905 		break;
4906 	}
4907 	case KVM_S390_MEM_OP: {
4908 		struct kvm_s390_mem_op mem_op;
4909 
4910 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4911 			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4912 		else
4913 			r = -EFAULT;
4914 		break;
4915 	}
4916 	case KVM_S390_SET_IRQ_STATE: {
4917 		struct kvm_s390_irq_state irq_state;
4918 
4919 		r = -EFAULT;
4920 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4921 			break;
4922 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4923 		    irq_state.len == 0 ||
4924 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4925 			r = -EINVAL;
4926 			break;
4927 		}
4928 		/* do not use irq_state.flags, it will break old QEMUs */
4929 		r = kvm_s390_set_irq_state(vcpu,
4930 					   (void __user *) irq_state.buf,
4931 					   irq_state.len);
4932 		break;
4933 	}
4934 	case KVM_S390_GET_IRQ_STATE: {
4935 		struct kvm_s390_irq_state irq_state;
4936 
4937 		r = -EFAULT;
4938 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4939 			break;
4940 		if (irq_state.len == 0) {
4941 			r = -EINVAL;
4942 			break;
4943 		}
4944 		/* do not use irq_state.flags, it will break old QEMUs */
4945 		r = kvm_s390_get_irq_state(vcpu,
4946 					   (__u8 __user *)  irq_state.buf,
4947 					   irq_state.len);
4948 		break;
4949 	}
4950 	default:
4951 		r = -ENOTTY;
4952 	}
4953 
4954 	vcpu_put(vcpu);
4955 	return r;
4956 }
4957 
4958 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4959 {
4960 #ifdef CONFIG_KVM_S390_UCONTROL
4961 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4962 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4963 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4964 		get_page(vmf->page);
4965 		return 0;
4966 	}
4967 #endif
4968 	return VM_FAULT_SIGBUS;
4969 }
4970 
4971 /* Section: memory related */
4972 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4973 				   struct kvm_memory_slot *memslot,
4974 				   const struct kvm_userspace_memory_region *mem,
4975 				   enum kvm_mr_change change)
4976 {
4977 	/* A few sanity checks. We can have memory slots which have to be
4978 	   located/ended at a segment boundary (1MB). The memory in userland is
4979 	   ok to be fragmented into various different vmas. It is okay to mmap()
4980 	   and munmap() stuff in this slot after doing this call at any time */
4981 
4982 	if (mem->userspace_addr & 0xffffful)
4983 		return -EINVAL;
4984 
4985 	if (mem->memory_size & 0xffffful)
4986 		return -EINVAL;
4987 
4988 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4989 		return -EINVAL;
4990 
4991 	/* When we are protected, we should not change the memory slots */
4992 	if (kvm_s390_pv_get_handle(kvm))
4993 		return -EINVAL;
4994 	return 0;
4995 }
4996 
4997 void kvm_arch_commit_memory_region(struct kvm *kvm,
4998 				const struct kvm_userspace_memory_region *mem,
4999 				struct kvm_memory_slot *old,
5000 				const struct kvm_memory_slot *new,
5001 				enum kvm_mr_change change)
5002 {
5003 	int rc = 0;
5004 
5005 	switch (change) {
5006 	case KVM_MR_DELETE:
5007 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5008 					old->npages * PAGE_SIZE);
5009 		break;
5010 	case KVM_MR_MOVE:
5011 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5012 					old->npages * PAGE_SIZE);
5013 		if (rc)
5014 			break;
5015 		fallthrough;
5016 	case KVM_MR_CREATE:
5017 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5018 				      mem->guest_phys_addr, mem->memory_size);
5019 		break;
5020 	case KVM_MR_FLAGS_ONLY:
5021 		break;
5022 	default:
5023 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5024 	}
5025 	if (rc)
5026 		pr_warn("failed to commit memory region\n");
5027 	return;
5028 }
5029 
5030 static inline unsigned long nonhyp_mask(int i)
5031 {
5032 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5033 
5034 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5035 }
5036 
5037 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5038 {
5039 	vcpu->valid_wakeup = false;
5040 }
5041 
5042 static int __init kvm_s390_init(void)
5043 {
5044 	int i;
5045 
5046 	if (!sclp.has_sief2) {
5047 		pr_info("SIE is not available\n");
5048 		return -ENODEV;
5049 	}
5050 
5051 	if (nested && hpage) {
5052 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5053 		return -EINVAL;
5054 	}
5055 
5056 	for (i = 0; i < 16; i++)
5057 		kvm_s390_fac_base[i] |=
5058 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5059 
5060 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5061 }
5062 
5063 static void __exit kvm_s390_exit(void)
5064 {
5065 	kvm_exit();
5066 }
5067 
5068 module_init(kvm_s390_init);
5069 module_exit(kvm_s390_exit);
5070 
5071 /*
5072  * Enable autoloading of the kvm module.
5073  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5074  * since x86 takes a different approach.
5075  */
5076 #include <linux/miscdevice.h>
5077 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5078 MODULE_ALIAS("devname:kvm");
5079