xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision b737eecd)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51 
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55 
56 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
60 
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 	VCPU_STAT("userspace_handled", exit_userspace),
63 	VCPU_STAT("exit_null", exit_null),
64 	VCPU_STAT("pfault_sync", pfault_sync),
65 	VCPU_STAT("exit_validity", exit_validity),
66 	VCPU_STAT("exit_stop_request", exit_stop_request),
67 	VCPU_STAT("exit_external_request", exit_external_request),
68 	VCPU_STAT("exit_io_request", exit_io_request),
69 	VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
70 	VCPU_STAT("exit_instruction", exit_instruction),
71 	VCPU_STAT("exit_pei", exit_pei),
72 	VCPU_STAT("exit_program_interruption", exit_program_interruption),
73 	VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
74 	VCPU_STAT("exit_operation_exception", exit_operation_exception),
75 	VCPU_STAT("halt_successful_poll", halt_successful_poll),
76 	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
77 	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
78 	VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
79 	VCPU_STAT("halt_wakeup", halt_wakeup),
80 	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
81 	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
82 	VCPU_STAT("instruction_lctlg", instruction_lctlg),
83 	VCPU_STAT("instruction_lctl", instruction_lctl),
84 	VCPU_STAT("instruction_stctl", instruction_stctl),
85 	VCPU_STAT("instruction_stctg", instruction_stctg),
86 	VCPU_STAT("deliver_ckc", deliver_ckc),
87 	VCPU_STAT("deliver_cputm", deliver_cputm),
88 	VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
89 	VCPU_STAT("deliver_external_call", deliver_external_call),
90 	VCPU_STAT("deliver_service_signal", deliver_service_signal),
91 	VCPU_STAT("deliver_virtio", deliver_virtio),
92 	VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
93 	VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
94 	VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
95 	VCPU_STAT("deliver_program", deliver_program),
96 	VCPU_STAT("deliver_io", deliver_io),
97 	VCPU_STAT("deliver_machine_check", deliver_machine_check),
98 	VCPU_STAT("exit_wait_state", exit_wait_state),
99 	VCPU_STAT("inject_ckc", inject_ckc),
100 	VCPU_STAT("inject_cputm", inject_cputm),
101 	VCPU_STAT("inject_external_call", inject_external_call),
102 	VM_STAT("inject_float_mchk", inject_float_mchk),
103 	VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
104 	VM_STAT("inject_io", inject_io),
105 	VCPU_STAT("inject_mchk", inject_mchk),
106 	VM_STAT("inject_pfault_done", inject_pfault_done),
107 	VCPU_STAT("inject_program", inject_program),
108 	VCPU_STAT("inject_restart", inject_restart),
109 	VM_STAT("inject_service_signal", inject_service_signal),
110 	VCPU_STAT("inject_set_prefix", inject_set_prefix),
111 	VCPU_STAT("inject_stop_signal", inject_stop_signal),
112 	VCPU_STAT("inject_pfault_init", inject_pfault_init),
113 	VM_STAT("inject_virtio", inject_virtio),
114 	VCPU_STAT("instruction_epsw", instruction_epsw),
115 	VCPU_STAT("instruction_gs", instruction_gs),
116 	VCPU_STAT("instruction_io_other", instruction_io_other),
117 	VCPU_STAT("instruction_lpsw", instruction_lpsw),
118 	VCPU_STAT("instruction_lpswe", instruction_lpswe),
119 	VCPU_STAT("instruction_pfmf", instruction_pfmf),
120 	VCPU_STAT("instruction_ptff", instruction_ptff),
121 	VCPU_STAT("instruction_stidp", instruction_stidp),
122 	VCPU_STAT("instruction_sck", instruction_sck),
123 	VCPU_STAT("instruction_sckpf", instruction_sckpf),
124 	VCPU_STAT("instruction_spx", instruction_spx),
125 	VCPU_STAT("instruction_stpx", instruction_stpx),
126 	VCPU_STAT("instruction_stap", instruction_stap),
127 	VCPU_STAT("instruction_iske", instruction_iske),
128 	VCPU_STAT("instruction_ri", instruction_ri),
129 	VCPU_STAT("instruction_rrbe", instruction_rrbe),
130 	VCPU_STAT("instruction_sske", instruction_sske),
131 	VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
132 	VCPU_STAT("instruction_essa", instruction_essa),
133 	VCPU_STAT("instruction_stsi", instruction_stsi),
134 	VCPU_STAT("instruction_stfl", instruction_stfl),
135 	VCPU_STAT("instruction_tb", instruction_tb),
136 	VCPU_STAT("instruction_tpi", instruction_tpi),
137 	VCPU_STAT("instruction_tprot", instruction_tprot),
138 	VCPU_STAT("instruction_tsch", instruction_tsch),
139 	VCPU_STAT("instruction_sthyi", instruction_sthyi),
140 	VCPU_STAT("instruction_sie", instruction_sie),
141 	VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
142 	VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
143 	VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
144 	VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
145 	VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
146 	VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
147 	VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
148 	VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
149 	VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
150 	VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
151 	VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
152 	VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
153 	VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
154 	VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
155 	VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
156 	VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
157 	VCPU_STAT("instruction_diag_10", diagnose_10),
158 	VCPU_STAT("instruction_diag_44", diagnose_44),
159 	VCPU_STAT("instruction_diag_9c", diagnose_9c),
160 	VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
161 	VCPU_STAT("instruction_diag_258", diagnose_258),
162 	VCPU_STAT("instruction_diag_308", diagnose_308),
163 	VCPU_STAT("instruction_diag_500", diagnose_500),
164 	VCPU_STAT("instruction_diag_other", diagnose_other),
165 	{ NULL }
166 };
167 
168 /* allow nested virtualization in KVM (if enabled by user space) */
169 static int nested;
170 module_param(nested, int, S_IRUGO);
171 MODULE_PARM_DESC(nested, "Nested virtualization support");
172 
173 /* allow 1m huge page guest backing, if !nested */
174 static int hpage;
175 module_param(hpage, int, 0444);
176 MODULE_PARM_DESC(hpage, "1m huge page backing support");
177 
178 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
179 static u8 halt_poll_max_steal = 10;
180 module_param(halt_poll_max_steal, byte, 0644);
181 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
182 
183 /* if set to true, the GISA will be initialized and used if available */
184 static bool use_gisa  = true;
185 module_param(use_gisa, bool, 0644);
186 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
187 
188 /*
189  * For now we handle at most 16 double words as this is what the s390 base
190  * kernel handles and stores in the prefix page. If we ever need to go beyond
191  * this, this requires changes to code, but the external uapi can stay.
192  */
193 #define SIZE_INTERNAL 16
194 
195 /*
196  * Base feature mask that defines default mask for facilities. Consists of the
197  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
198  */
199 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
200 /*
201  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
202  * and defines the facilities that can be enabled via a cpu model.
203  */
204 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
205 
206 static unsigned long kvm_s390_fac_size(void)
207 {
208 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
209 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
210 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
211 		sizeof(S390_lowcore.stfle_fac_list));
212 
213 	return SIZE_INTERNAL;
214 }
215 
216 /* available cpu features supported by kvm */
217 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
218 /* available subfunctions indicated via query / "test bit" */
219 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
220 
221 static struct gmap_notifier gmap_notifier;
222 static struct gmap_notifier vsie_gmap_notifier;
223 debug_info_t *kvm_s390_dbf;
224 debug_info_t *kvm_s390_dbf_uv;
225 
226 /* Section: not file related */
227 int kvm_arch_hardware_enable(void)
228 {
229 	/* every s390 is virtualization enabled ;-) */
230 	return 0;
231 }
232 
233 int kvm_arch_check_processor_compat(void *opaque)
234 {
235 	return 0;
236 }
237 
238 /* forward declarations */
239 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
240 			      unsigned long end);
241 static int sca_switch_to_extended(struct kvm *kvm);
242 
243 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
244 {
245 	u8 delta_idx = 0;
246 
247 	/*
248 	 * The TOD jumps by delta, we have to compensate this by adding
249 	 * -delta to the epoch.
250 	 */
251 	delta = -delta;
252 
253 	/* sign-extension - we're adding to signed values below */
254 	if ((s64)delta < 0)
255 		delta_idx = -1;
256 
257 	scb->epoch += delta;
258 	if (scb->ecd & ECD_MEF) {
259 		scb->epdx += delta_idx;
260 		if (scb->epoch < delta)
261 			scb->epdx += 1;
262 	}
263 }
264 
265 /*
266  * This callback is executed during stop_machine(). All CPUs are therefore
267  * temporarily stopped. In order not to change guest behavior, we have to
268  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
269  * so a CPU won't be stopped while calculating with the epoch.
270  */
271 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
272 			  void *v)
273 {
274 	struct kvm *kvm;
275 	struct kvm_vcpu *vcpu;
276 	int i;
277 	unsigned long long *delta = v;
278 
279 	list_for_each_entry(kvm, &vm_list, vm_list) {
280 		kvm_for_each_vcpu(i, vcpu, kvm) {
281 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
282 			if (i == 0) {
283 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
284 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
285 			}
286 			if (vcpu->arch.cputm_enabled)
287 				vcpu->arch.cputm_start += *delta;
288 			if (vcpu->arch.vsie_block)
289 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
290 						   *delta);
291 		}
292 	}
293 	return NOTIFY_OK;
294 }
295 
296 static struct notifier_block kvm_clock_notifier = {
297 	.notifier_call = kvm_clock_sync,
298 };
299 
300 int kvm_arch_hardware_setup(void *opaque)
301 {
302 	gmap_notifier.notifier_call = kvm_gmap_notifier;
303 	gmap_register_pte_notifier(&gmap_notifier);
304 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
305 	gmap_register_pte_notifier(&vsie_gmap_notifier);
306 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
307 				       &kvm_clock_notifier);
308 	return 0;
309 }
310 
311 void kvm_arch_hardware_unsetup(void)
312 {
313 	gmap_unregister_pte_notifier(&gmap_notifier);
314 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
315 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
316 					 &kvm_clock_notifier);
317 }
318 
319 static void allow_cpu_feat(unsigned long nr)
320 {
321 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
322 }
323 
324 static inline int plo_test_bit(unsigned char nr)
325 {
326 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
327 	int cc;
328 
329 	asm volatile(
330 		/* Parameter registers are ignored for "test bit" */
331 		"	plo	0,0,0,0(0)\n"
332 		"	ipm	%0\n"
333 		"	srl	%0,28\n"
334 		: "=d" (cc)
335 		: "d" (r0)
336 		: "cc");
337 	return cc == 0;
338 }
339 
340 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
341 {
342 	register unsigned long r0 asm("0") = 0;	/* query function */
343 	register unsigned long r1 asm("1") = (unsigned long) query;
344 
345 	asm volatile(
346 		/* Parameter regs are ignored */
347 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
348 		:
349 		: "d" (r0), "a" (r1), [opc] "i" (opcode)
350 		: "cc", "memory");
351 }
352 
353 #define INSN_SORTL 0xb938
354 #define INSN_DFLTCC 0xb939
355 
356 static void kvm_s390_cpu_feat_init(void)
357 {
358 	int i;
359 
360 	for (i = 0; i < 256; ++i) {
361 		if (plo_test_bit(i))
362 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
363 	}
364 
365 	if (test_facility(28)) /* TOD-clock steering */
366 		ptff(kvm_s390_available_subfunc.ptff,
367 		     sizeof(kvm_s390_available_subfunc.ptff),
368 		     PTFF_QAF);
369 
370 	if (test_facility(17)) { /* MSA */
371 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
372 			      kvm_s390_available_subfunc.kmac);
373 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
374 			      kvm_s390_available_subfunc.kmc);
375 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
376 			      kvm_s390_available_subfunc.km);
377 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
378 			      kvm_s390_available_subfunc.kimd);
379 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
380 			      kvm_s390_available_subfunc.klmd);
381 	}
382 	if (test_facility(76)) /* MSA3 */
383 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
384 			      kvm_s390_available_subfunc.pckmo);
385 	if (test_facility(77)) { /* MSA4 */
386 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
387 			      kvm_s390_available_subfunc.kmctr);
388 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
389 			      kvm_s390_available_subfunc.kmf);
390 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
391 			      kvm_s390_available_subfunc.kmo);
392 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
393 			      kvm_s390_available_subfunc.pcc);
394 	}
395 	if (test_facility(57)) /* MSA5 */
396 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
397 			      kvm_s390_available_subfunc.ppno);
398 
399 	if (test_facility(146)) /* MSA8 */
400 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
401 			      kvm_s390_available_subfunc.kma);
402 
403 	if (test_facility(155)) /* MSA9 */
404 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
405 			      kvm_s390_available_subfunc.kdsa);
406 
407 	if (test_facility(150)) /* SORTL */
408 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
409 
410 	if (test_facility(151)) /* DFLTCC */
411 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
412 
413 	if (MACHINE_HAS_ESOP)
414 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
415 	/*
416 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
417 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
418 	 */
419 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
420 	    !test_facility(3) || !nested)
421 		return;
422 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
423 	if (sclp.has_64bscao)
424 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
425 	if (sclp.has_siif)
426 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
427 	if (sclp.has_gpere)
428 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
429 	if (sclp.has_gsls)
430 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
431 	if (sclp.has_ib)
432 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
433 	if (sclp.has_cei)
434 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
435 	if (sclp.has_ibs)
436 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
437 	if (sclp.has_kss)
438 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
439 	/*
440 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
441 	 * all skey handling functions read/set the skey from the PGSTE
442 	 * instead of the real storage key.
443 	 *
444 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
445 	 * pages being detected as preserved although they are resident.
446 	 *
447 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
448 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
449 	 *
450 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
451 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
452 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
453 	 *
454 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
455 	 * cannot easily shadow the SCA because of the ipte lock.
456 	 */
457 }
458 
459 int kvm_arch_init(void *opaque)
460 {
461 	int rc = -ENOMEM;
462 
463 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
464 	if (!kvm_s390_dbf)
465 		return -ENOMEM;
466 
467 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
468 	if (!kvm_s390_dbf_uv)
469 		goto out;
470 
471 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
472 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
473 		goto out;
474 
475 	kvm_s390_cpu_feat_init();
476 
477 	/* Register floating interrupt controller interface. */
478 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
479 	if (rc) {
480 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
481 		goto out;
482 	}
483 
484 	rc = kvm_s390_gib_init(GAL_ISC);
485 	if (rc)
486 		goto out;
487 
488 	return 0;
489 
490 out:
491 	kvm_arch_exit();
492 	return rc;
493 }
494 
495 void kvm_arch_exit(void)
496 {
497 	kvm_s390_gib_destroy();
498 	debug_unregister(kvm_s390_dbf);
499 	debug_unregister(kvm_s390_dbf_uv);
500 }
501 
502 /* Section: device related */
503 long kvm_arch_dev_ioctl(struct file *filp,
504 			unsigned int ioctl, unsigned long arg)
505 {
506 	if (ioctl == KVM_S390_ENABLE_SIE)
507 		return s390_enable_sie();
508 	return -EINVAL;
509 }
510 
511 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
512 {
513 	int r;
514 
515 	switch (ext) {
516 	case KVM_CAP_S390_PSW:
517 	case KVM_CAP_S390_GMAP:
518 	case KVM_CAP_SYNC_MMU:
519 #ifdef CONFIG_KVM_S390_UCONTROL
520 	case KVM_CAP_S390_UCONTROL:
521 #endif
522 	case KVM_CAP_ASYNC_PF:
523 	case KVM_CAP_SYNC_REGS:
524 	case KVM_CAP_ONE_REG:
525 	case KVM_CAP_ENABLE_CAP:
526 	case KVM_CAP_S390_CSS_SUPPORT:
527 	case KVM_CAP_IOEVENTFD:
528 	case KVM_CAP_DEVICE_CTRL:
529 	case KVM_CAP_S390_IRQCHIP:
530 	case KVM_CAP_VM_ATTRIBUTES:
531 	case KVM_CAP_MP_STATE:
532 	case KVM_CAP_IMMEDIATE_EXIT:
533 	case KVM_CAP_S390_INJECT_IRQ:
534 	case KVM_CAP_S390_USER_SIGP:
535 	case KVM_CAP_S390_USER_STSI:
536 	case KVM_CAP_S390_SKEYS:
537 	case KVM_CAP_S390_IRQ_STATE:
538 	case KVM_CAP_S390_USER_INSTR0:
539 	case KVM_CAP_S390_CMMA_MIGRATION:
540 	case KVM_CAP_S390_AIS:
541 	case KVM_CAP_S390_AIS_MIGRATION:
542 	case KVM_CAP_S390_VCPU_RESETS:
543 	case KVM_CAP_SET_GUEST_DEBUG:
544 	case KVM_CAP_S390_DIAG318:
545 		r = 1;
546 		break;
547 	case KVM_CAP_S390_HPAGE_1M:
548 		r = 0;
549 		if (hpage && !kvm_is_ucontrol(kvm))
550 			r = 1;
551 		break;
552 	case KVM_CAP_S390_MEM_OP:
553 		r = MEM_OP_MAX_SIZE;
554 		break;
555 	case KVM_CAP_NR_VCPUS:
556 	case KVM_CAP_MAX_VCPUS:
557 	case KVM_CAP_MAX_VCPU_ID:
558 		r = KVM_S390_BSCA_CPU_SLOTS;
559 		if (!kvm_s390_use_sca_entries())
560 			r = KVM_MAX_VCPUS;
561 		else if (sclp.has_esca && sclp.has_64bscao)
562 			r = KVM_S390_ESCA_CPU_SLOTS;
563 		break;
564 	case KVM_CAP_S390_COW:
565 		r = MACHINE_HAS_ESOP;
566 		break;
567 	case KVM_CAP_S390_VECTOR_REGISTERS:
568 		r = MACHINE_HAS_VX;
569 		break;
570 	case KVM_CAP_S390_RI:
571 		r = test_facility(64);
572 		break;
573 	case KVM_CAP_S390_GS:
574 		r = test_facility(133);
575 		break;
576 	case KVM_CAP_S390_BPB:
577 		r = test_facility(82);
578 		break;
579 	case KVM_CAP_S390_PROTECTED:
580 		r = is_prot_virt_host();
581 		break;
582 	default:
583 		r = 0;
584 	}
585 	return r;
586 }
587 
588 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
589 {
590 	int i;
591 	gfn_t cur_gfn, last_gfn;
592 	unsigned long gaddr, vmaddr;
593 	struct gmap *gmap = kvm->arch.gmap;
594 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
595 
596 	/* Loop over all guest segments */
597 	cur_gfn = memslot->base_gfn;
598 	last_gfn = memslot->base_gfn + memslot->npages;
599 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
600 		gaddr = gfn_to_gpa(cur_gfn);
601 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
602 		if (kvm_is_error_hva(vmaddr))
603 			continue;
604 
605 		bitmap_zero(bitmap, _PAGE_ENTRIES);
606 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
607 		for (i = 0; i < _PAGE_ENTRIES; i++) {
608 			if (test_bit(i, bitmap))
609 				mark_page_dirty(kvm, cur_gfn + i);
610 		}
611 
612 		if (fatal_signal_pending(current))
613 			return;
614 		cond_resched();
615 	}
616 }
617 
618 /* Section: vm related */
619 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
620 
621 /*
622  * Get (and clear) the dirty memory log for a memory slot.
623  */
624 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
625 			       struct kvm_dirty_log *log)
626 {
627 	int r;
628 	unsigned long n;
629 	struct kvm_memory_slot *memslot;
630 	int is_dirty;
631 
632 	if (kvm_is_ucontrol(kvm))
633 		return -EINVAL;
634 
635 	mutex_lock(&kvm->slots_lock);
636 
637 	r = -EINVAL;
638 	if (log->slot >= KVM_USER_MEM_SLOTS)
639 		goto out;
640 
641 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
642 	if (r)
643 		goto out;
644 
645 	/* Clear the dirty log */
646 	if (is_dirty) {
647 		n = kvm_dirty_bitmap_bytes(memslot);
648 		memset(memslot->dirty_bitmap, 0, n);
649 	}
650 	r = 0;
651 out:
652 	mutex_unlock(&kvm->slots_lock);
653 	return r;
654 }
655 
656 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
657 {
658 	unsigned int i;
659 	struct kvm_vcpu *vcpu;
660 
661 	kvm_for_each_vcpu(i, vcpu, kvm) {
662 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
663 	}
664 }
665 
666 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
667 {
668 	int r;
669 
670 	if (cap->flags)
671 		return -EINVAL;
672 
673 	switch (cap->cap) {
674 	case KVM_CAP_S390_IRQCHIP:
675 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
676 		kvm->arch.use_irqchip = 1;
677 		r = 0;
678 		break;
679 	case KVM_CAP_S390_USER_SIGP:
680 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
681 		kvm->arch.user_sigp = 1;
682 		r = 0;
683 		break;
684 	case KVM_CAP_S390_VECTOR_REGISTERS:
685 		mutex_lock(&kvm->lock);
686 		if (kvm->created_vcpus) {
687 			r = -EBUSY;
688 		} else if (MACHINE_HAS_VX) {
689 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
690 			set_kvm_facility(kvm->arch.model.fac_list, 129);
691 			if (test_facility(134)) {
692 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
693 				set_kvm_facility(kvm->arch.model.fac_list, 134);
694 			}
695 			if (test_facility(135)) {
696 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
697 				set_kvm_facility(kvm->arch.model.fac_list, 135);
698 			}
699 			if (test_facility(148)) {
700 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
701 				set_kvm_facility(kvm->arch.model.fac_list, 148);
702 			}
703 			if (test_facility(152)) {
704 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
705 				set_kvm_facility(kvm->arch.model.fac_list, 152);
706 			}
707 			r = 0;
708 		} else
709 			r = -EINVAL;
710 		mutex_unlock(&kvm->lock);
711 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
712 			 r ? "(not available)" : "(success)");
713 		break;
714 	case KVM_CAP_S390_RI:
715 		r = -EINVAL;
716 		mutex_lock(&kvm->lock);
717 		if (kvm->created_vcpus) {
718 			r = -EBUSY;
719 		} else if (test_facility(64)) {
720 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
721 			set_kvm_facility(kvm->arch.model.fac_list, 64);
722 			r = 0;
723 		}
724 		mutex_unlock(&kvm->lock);
725 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
726 			 r ? "(not available)" : "(success)");
727 		break;
728 	case KVM_CAP_S390_AIS:
729 		mutex_lock(&kvm->lock);
730 		if (kvm->created_vcpus) {
731 			r = -EBUSY;
732 		} else {
733 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
734 			set_kvm_facility(kvm->arch.model.fac_list, 72);
735 			r = 0;
736 		}
737 		mutex_unlock(&kvm->lock);
738 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
739 			 r ? "(not available)" : "(success)");
740 		break;
741 	case KVM_CAP_S390_GS:
742 		r = -EINVAL;
743 		mutex_lock(&kvm->lock);
744 		if (kvm->created_vcpus) {
745 			r = -EBUSY;
746 		} else if (test_facility(133)) {
747 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
748 			set_kvm_facility(kvm->arch.model.fac_list, 133);
749 			r = 0;
750 		}
751 		mutex_unlock(&kvm->lock);
752 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
753 			 r ? "(not available)" : "(success)");
754 		break;
755 	case KVM_CAP_S390_HPAGE_1M:
756 		mutex_lock(&kvm->lock);
757 		if (kvm->created_vcpus)
758 			r = -EBUSY;
759 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
760 			r = -EINVAL;
761 		else {
762 			r = 0;
763 			mmap_write_lock(kvm->mm);
764 			kvm->mm->context.allow_gmap_hpage_1m = 1;
765 			mmap_write_unlock(kvm->mm);
766 			/*
767 			 * We might have to create fake 4k page
768 			 * tables. To avoid that the hardware works on
769 			 * stale PGSTEs, we emulate these instructions.
770 			 */
771 			kvm->arch.use_skf = 0;
772 			kvm->arch.use_pfmfi = 0;
773 		}
774 		mutex_unlock(&kvm->lock);
775 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
776 			 r ? "(not available)" : "(success)");
777 		break;
778 	case KVM_CAP_S390_USER_STSI:
779 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
780 		kvm->arch.user_stsi = 1;
781 		r = 0;
782 		break;
783 	case KVM_CAP_S390_USER_INSTR0:
784 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
785 		kvm->arch.user_instr0 = 1;
786 		icpt_operexc_on_all_vcpus(kvm);
787 		r = 0;
788 		break;
789 	default:
790 		r = -EINVAL;
791 		break;
792 	}
793 	return r;
794 }
795 
796 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
797 {
798 	int ret;
799 
800 	switch (attr->attr) {
801 	case KVM_S390_VM_MEM_LIMIT_SIZE:
802 		ret = 0;
803 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
804 			 kvm->arch.mem_limit);
805 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
806 			ret = -EFAULT;
807 		break;
808 	default:
809 		ret = -ENXIO;
810 		break;
811 	}
812 	return ret;
813 }
814 
815 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
816 {
817 	int ret;
818 	unsigned int idx;
819 	switch (attr->attr) {
820 	case KVM_S390_VM_MEM_ENABLE_CMMA:
821 		ret = -ENXIO;
822 		if (!sclp.has_cmma)
823 			break;
824 
825 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
826 		mutex_lock(&kvm->lock);
827 		if (kvm->created_vcpus)
828 			ret = -EBUSY;
829 		else if (kvm->mm->context.allow_gmap_hpage_1m)
830 			ret = -EINVAL;
831 		else {
832 			kvm->arch.use_cmma = 1;
833 			/* Not compatible with cmma. */
834 			kvm->arch.use_pfmfi = 0;
835 			ret = 0;
836 		}
837 		mutex_unlock(&kvm->lock);
838 		break;
839 	case KVM_S390_VM_MEM_CLR_CMMA:
840 		ret = -ENXIO;
841 		if (!sclp.has_cmma)
842 			break;
843 		ret = -EINVAL;
844 		if (!kvm->arch.use_cmma)
845 			break;
846 
847 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
848 		mutex_lock(&kvm->lock);
849 		idx = srcu_read_lock(&kvm->srcu);
850 		s390_reset_cmma(kvm->arch.gmap->mm);
851 		srcu_read_unlock(&kvm->srcu, idx);
852 		mutex_unlock(&kvm->lock);
853 		ret = 0;
854 		break;
855 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
856 		unsigned long new_limit;
857 
858 		if (kvm_is_ucontrol(kvm))
859 			return -EINVAL;
860 
861 		if (get_user(new_limit, (u64 __user *)attr->addr))
862 			return -EFAULT;
863 
864 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
865 		    new_limit > kvm->arch.mem_limit)
866 			return -E2BIG;
867 
868 		if (!new_limit)
869 			return -EINVAL;
870 
871 		/* gmap_create takes last usable address */
872 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
873 			new_limit -= 1;
874 
875 		ret = -EBUSY;
876 		mutex_lock(&kvm->lock);
877 		if (!kvm->created_vcpus) {
878 			/* gmap_create will round the limit up */
879 			struct gmap *new = gmap_create(current->mm, new_limit);
880 
881 			if (!new) {
882 				ret = -ENOMEM;
883 			} else {
884 				gmap_remove(kvm->arch.gmap);
885 				new->private = kvm;
886 				kvm->arch.gmap = new;
887 				ret = 0;
888 			}
889 		}
890 		mutex_unlock(&kvm->lock);
891 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
892 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
893 			 (void *) kvm->arch.gmap->asce);
894 		break;
895 	}
896 	default:
897 		ret = -ENXIO;
898 		break;
899 	}
900 	return ret;
901 }
902 
903 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
904 
905 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
906 {
907 	struct kvm_vcpu *vcpu;
908 	int i;
909 
910 	kvm_s390_vcpu_block_all(kvm);
911 
912 	kvm_for_each_vcpu(i, vcpu, kvm) {
913 		kvm_s390_vcpu_crypto_setup(vcpu);
914 		/* recreate the shadow crycb by leaving the VSIE handler */
915 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
916 	}
917 
918 	kvm_s390_vcpu_unblock_all(kvm);
919 }
920 
921 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
922 {
923 	mutex_lock(&kvm->lock);
924 	switch (attr->attr) {
925 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
926 		if (!test_kvm_facility(kvm, 76)) {
927 			mutex_unlock(&kvm->lock);
928 			return -EINVAL;
929 		}
930 		get_random_bytes(
931 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
932 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
933 		kvm->arch.crypto.aes_kw = 1;
934 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
935 		break;
936 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
937 		if (!test_kvm_facility(kvm, 76)) {
938 			mutex_unlock(&kvm->lock);
939 			return -EINVAL;
940 		}
941 		get_random_bytes(
942 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
943 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
944 		kvm->arch.crypto.dea_kw = 1;
945 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
946 		break;
947 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
948 		if (!test_kvm_facility(kvm, 76)) {
949 			mutex_unlock(&kvm->lock);
950 			return -EINVAL;
951 		}
952 		kvm->arch.crypto.aes_kw = 0;
953 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
954 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
955 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
956 		break;
957 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
958 		if (!test_kvm_facility(kvm, 76)) {
959 			mutex_unlock(&kvm->lock);
960 			return -EINVAL;
961 		}
962 		kvm->arch.crypto.dea_kw = 0;
963 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
964 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
965 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
966 		break;
967 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
968 		if (!ap_instructions_available()) {
969 			mutex_unlock(&kvm->lock);
970 			return -EOPNOTSUPP;
971 		}
972 		kvm->arch.crypto.apie = 1;
973 		break;
974 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
975 		if (!ap_instructions_available()) {
976 			mutex_unlock(&kvm->lock);
977 			return -EOPNOTSUPP;
978 		}
979 		kvm->arch.crypto.apie = 0;
980 		break;
981 	default:
982 		mutex_unlock(&kvm->lock);
983 		return -ENXIO;
984 	}
985 
986 	kvm_s390_vcpu_crypto_reset_all(kvm);
987 	mutex_unlock(&kvm->lock);
988 	return 0;
989 }
990 
991 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
992 {
993 	int cx;
994 	struct kvm_vcpu *vcpu;
995 
996 	kvm_for_each_vcpu(cx, vcpu, kvm)
997 		kvm_s390_sync_request(req, vcpu);
998 }
999 
1000 /*
1001  * Must be called with kvm->srcu held to avoid races on memslots, and with
1002  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1003  */
1004 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1005 {
1006 	struct kvm_memory_slot *ms;
1007 	struct kvm_memslots *slots;
1008 	unsigned long ram_pages = 0;
1009 	int slotnr;
1010 
1011 	/* migration mode already enabled */
1012 	if (kvm->arch.migration_mode)
1013 		return 0;
1014 	slots = kvm_memslots(kvm);
1015 	if (!slots || !slots->used_slots)
1016 		return -EINVAL;
1017 
1018 	if (!kvm->arch.use_cmma) {
1019 		kvm->arch.migration_mode = 1;
1020 		return 0;
1021 	}
1022 	/* mark all the pages in active slots as dirty */
1023 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1024 		ms = slots->memslots + slotnr;
1025 		if (!ms->dirty_bitmap)
1026 			return -EINVAL;
1027 		/*
1028 		 * The second half of the bitmap is only used on x86,
1029 		 * and would be wasted otherwise, so we put it to good
1030 		 * use here to keep track of the state of the storage
1031 		 * attributes.
1032 		 */
1033 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1034 		ram_pages += ms->npages;
1035 	}
1036 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1037 	kvm->arch.migration_mode = 1;
1038 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1039 	return 0;
1040 }
1041 
1042 /*
1043  * Must be called with kvm->slots_lock to avoid races with ourselves and
1044  * kvm_s390_vm_start_migration.
1045  */
1046 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1047 {
1048 	/* migration mode already disabled */
1049 	if (!kvm->arch.migration_mode)
1050 		return 0;
1051 	kvm->arch.migration_mode = 0;
1052 	if (kvm->arch.use_cmma)
1053 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1054 	return 0;
1055 }
1056 
1057 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1058 				     struct kvm_device_attr *attr)
1059 {
1060 	int res = -ENXIO;
1061 
1062 	mutex_lock(&kvm->slots_lock);
1063 	switch (attr->attr) {
1064 	case KVM_S390_VM_MIGRATION_START:
1065 		res = kvm_s390_vm_start_migration(kvm);
1066 		break;
1067 	case KVM_S390_VM_MIGRATION_STOP:
1068 		res = kvm_s390_vm_stop_migration(kvm);
1069 		break;
1070 	default:
1071 		break;
1072 	}
1073 	mutex_unlock(&kvm->slots_lock);
1074 
1075 	return res;
1076 }
1077 
1078 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1079 				     struct kvm_device_attr *attr)
1080 {
1081 	u64 mig = kvm->arch.migration_mode;
1082 
1083 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1084 		return -ENXIO;
1085 
1086 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1087 		return -EFAULT;
1088 	return 0;
1089 }
1090 
1091 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1092 {
1093 	struct kvm_s390_vm_tod_clock gtod;
1094 
1095 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1096 		return -EFAULT;
1097 
1098 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1099 		return -EINVAL;
1100 	kvm_s390_set_tod_clock(kvm, &gtod);
1101 
1102 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1103 		gtod.epoch_idx, gtod.tod);
1104 
1105 	return 0;
1106 }
1107 
1108 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1109 {
1110 	u8 gtod_high;
1111 
1112 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1113 					   sizeof(gtod_high)))
1114 		return -EFAULT;
1115 
1116 	if (gtod_high != 0)
1117 		return -EINVAL;
1118 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1119 
1120 	return 0;
1121 }
1122 
1123 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1124 {
1125 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1126 
1127 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1128 			   sizeof(gtod.tod)))
1129 		return -EFAULT;
1130 
1131 	kvm_s390_set_tod_clock(kvm, &gtod);
1132 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1133 	return 0;
1134 }
1135 
1136 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1137 {
1138 	int ret;
1139 
1140 	if (attr->flags)
1141 		return -EINVAL;
1142 
1143 	switch (attr->attr) {
1144 	case KVM_S390_VM_TOD_EXT:
1145 		ret = kvm_s390_set_tod_ext(kvm, attr);
1146 		break;
1147 	case KVM_S390_VM_TOD_HIGH:
1148 		ret = kvm_s390_set_tod_high(kvm, attr);
1149 		break;
1150 	case KVM_S390_VM_TOD_LOW:
1151 		ret = kvm_s390_set_tod_low(kvm, attr);
1152 		break;
1153 	default:
1154 		ret = -ENXIO;
1155 		break;
1156 	}
1157 	return ret;
1158 }
1159 
1160 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1161 				   struct kvm_s390_vm_tod_clock *gtod)
1162 {
1163 	union tod_clock clk;
1164 
1165 	preempt_disable();
1166 
1167 	store_tod_clock_ext(&clk);
1168 
1169 	gtod->tod = clk.tod + kvm->arch.epoch;
1170 	gtod->epoch_idx = 0;
1171 	if (test_kvm_facility(kvm, 139)) {
1172 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1173 		if (gtod->tod < clk.tod)
1174 			gtod->epoch_idx += 1;
1175 	}
1176 
1177 	preempt_enable();
1178 }
1179 
1180 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1181 {
1182 	struct kvm_s390_vm_tod_clock gtod;
1183 
1184 	memset(&gtod, 0, sizeof(gtod));
1185 	kvm_s390_get_tod_clock(kvm, &gtod);
1186 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1187 		return -EFAULT;
1188 
1189 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1190 		gtod.epoch_idx, gtod.tod);
1191 	return 0;
1192 }
1193 
1194 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1195 {
1196 	u8 gtod_high = 0;
1197 
1198 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1199 					 sizeof(gtod_high)))
1200 		return -EFAULT;
1201 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1202 
1203 	return 0;
1204 }
1205 
1206 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1207 {
1208 	u64 gtod;
1209 
1210 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1211 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1212 		return -EFAULT;
1213 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1214 
1215 	return 0;
1216 }
1217 
1218 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1219 {
1220 	int ret;
1221 
1222 	if (attr->flags)
1223 		return -EINVAL;
1224 
1225 	switch (attr->attr) {
1226 	case KVM_S390_VM_TOD_EXT:
1227 		ret = kvm_s390_get_tod_ext(kvm, attr);
1228 		break;
1229 	case KVM_S390_VM_TOD_HIGH:
1230 		ret = kvm_s390_get_tod_high(kvm, attr);
1231 		break;
1232 	case KVM_S390_VM_TOD_LOW:
1233 		ret = kvm_s390_get_tod_low(kvm, attr);
1234 		break;
1235 	default:
1236 		ret = -ENXIO;
1237 		break;
1238 	}
1239 	return ret;
1240 }
1241 
1242 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1243 {
1244 	struct kvm_s390_vm_cpu_processor *proc;
1245 	u16 lowest_ibc, unblocked_ibc;
1246 	int ret = 0;
1247 
1248 	mutex_lock(&kvm->lock);
1249 	if (kvm->created_vcpus) {
1250 		ret = -EBUSY;
1251 		goto out;
1252 	}
1253 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1254 	if (!proc) {
1255 		ret = -ENOMEM;
1256 		goto out;
1257 	}
1258 	if (!copy_from_user(proc, (void __user *)attr->addr,
1259 			    sizeof(*proc))) {
1260 		kvm->arch.model.cpuid = proc->cpuid;
1261 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1262 		unblocked_ibc = sclp.ibc & 0xfff;
1263 		if (lowest_ibc && proc->ibc) {
1264 			if (proc->ibc > unblocked_ibc)
1265 				kvm->arch.model.ibc = unblocked_ibc;
1266 			else if (proc->ibc < lowest_ibc)
1267 				kvm->arch.model.ibc = lowest_ibc;
1268 			else
1269 				kvm->arch.model.ibc = proc->ibc;
1270 		}
1271 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1272 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1273 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1274 			 kvm->arch.model.ibc,
1275 			 kvm->arch.model.cpuid);
1276 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1277 			 kvm->arch.model.fac_list[0],
1278 			 kvm->arch.model.fac_list[1],
1279 			 kvm->arch.model.fac_list[2]);
1280 	} else
1281 		ret = -EFAULT;
1282 	kfree(proc);
1283 out:
1284 	mutex_unlock(&kvm->lock);
1285 	return ret;
1286 }
1287 
1288 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1289 				       struct kvm_device_attr *attr)
1290 {
1291 	struct kvm_s390_vm_cpu_feat data;
1292 
1293 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1294 		return -EFAULT;
1295 	if (!bitmap_subset((unsigned long *) data.feat,
1296 			   kvm_s390_available_cpu_feat,
1297 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1298 		return -EINVAL;
1299 
1300 	mutex_lock(&kvm->lock);
1301 	if (kvm->created_vcpus) {
1302 		mutex_unlock(&kvm->lock);
1303 		return -EBUSY;
1304 	}
1305 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1306 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1307 	mutex_unlock(&kvm->lock);
1308 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1309 			 data.feat[0],
1310 			 data.feat[1],
1311 			 data.feat[2]);
1312 	return 0;
1313 }
1314 
1315 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1316 					  struct kvm_device_attr *attr)
1317 {
1318 	mutex_lock(&kvm->lock);
1319 	if (kvm->created_vcpus) {
1320 		mutex_unlock(&kvm->lock);
1321 		return -EBUSY;
1322 	}
1323 
1324 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1325 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1326 		mutex_unlock(&kvm->lock);
1327 		return -EFAULT;
1328 	}
1329 	mutex_unlock(&kvm->lock);
1330 
1331 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1332 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1333 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1334 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1335 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1336 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1337 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1338 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1339 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1340 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1341 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1342 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1343 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1344 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1345 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1346 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1347 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1348 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1349 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1350 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1351 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1352 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1353 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1354 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1355 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1356 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1357 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1358 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1359 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1360 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1361 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1363 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1366 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1369 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1372 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1375 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1378 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1381 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1382 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1383 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1384 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1385 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1386 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1387 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1388 
1389 	return 0;
1390 }
1391 
1392 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1393 {
1394 	int ret = -ENXIO;
1395 
1396 	switch (attr->attr) {
1397 	case KVM_S390_VM_CPU_PROCESSOR:
1398 		ret = kvm_s390_set_processor(kvm, attr);
1399 		break;
1400 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1401 		ret = kvm_s390_set_processor_feat(kvm, attr);
1402 		break;
1403 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1404 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1405 		break;
1406 	}
1407 	return ret;
1408 }
1409 
1410 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1411 {
1412 	struct kvm_s390_vm_cpu_processor *proc;
1413 	int ret = 0;
1414 
1415 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1416 	if (!proc) {
1417 		ret = -ENOMEM;
1418 		goto out;
1419 	}
1420 	proc->cpuid = kvm->arch.model.cpuid;
1421 	proc->ibc = kvm->arch.model.ibc;
1422 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1423 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1424 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1425 		 kvm->arch.model.ibc,
1426 		 kvm->arch.model.cpuid);
1427 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1428 		 kvm->arch.model.fac_list[0],
1429 		 kvm->arch.model.fac_list[1],
1430 		 kvm->arch.model.fac_list[2]);
1431 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1432 		ret = -EFAULT;
1433 	kfree(proc);
1434 out:
1435 	return ret;
1436 }
1437 
1438 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1439 {
1440 	struct kvm_s390_vm_cpu_machine *mach;
1441 	int ret = 0;
1442 
1443 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1444 	if (!mach) {
1445 		ret = -ENOMEM;
1446 		goto out;
1447 	}
1448 	get_cpu_id((struct cpuid *) &mach->cpuid);
1449 	mach->ibc = sclp.ibc;
1450 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1451 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1452 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1453 	       sizeof(S390_lowcore.stfle_fac_list));
1454 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1455 		 kvm->arch.model.ibc,
1456 		 kvm->arch.model.cpuid);
1457 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1458 		 mach->fac_mask[0],
1459 		 mach->fac_mask[1],
1460 		 mach->fac_mask[2]);
1461 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1462 		 mach->fac_list[0],
1463 		 mach->fac_list[1],
1464 		 mach->fac_list[2]);
1465 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1466 		ret = -EFAULT;
1467 	kfree(mach);
1468 out:
1469 	return ret;
1470 }
1471 
1472 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1473 				       struct kvm_device_attr *attr)
1474 {
1475 	struct kvm_s390_vm_cpu_feat data;
1476 
1477 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1478 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1479 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1480 		return -EFAULT;
1481 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1482 			 data.feat[0],
1483 			 data.feat[1],
1484 			 data.feat[2]);
1485 	return 0;
1486 }
1487 
1488 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1489 				     struct kvm_device_attr *attr)
1490 {
1491 	struct kvm_s390_vm_cpu_feat data;
1492 
1493 	bitmap_copy((unsigned long *) data.feat,
1494 		    kvm_s390_available_cpu_feat,
1495 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1496 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1497 		return -EFAULT;
1498 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1499 			 data.feat[0],
1500 			 data.feat[1],
1501 			 data.feat[2]);
1502 	return 0;
1503 }
1504 
1505 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1506 					  struct kvm_device_attr *attr)
1507 {
1508 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1509 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1510 		return -EFAULT;
1511 
1512 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1513 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1514 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1515 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1516 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1517 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1518 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1519 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1520 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1521 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1522 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1523 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1524 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1525 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1526 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1527 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1528 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1529 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1530 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1531 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1532 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1533 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1534 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1535 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1536 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1537 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1538 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1539 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1540 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1541 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1542 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1544 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1547 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1550 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1553 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1556 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1559 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1562 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1563 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1564 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1565 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1566 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1567 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1568 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1569 
1570 	return 0;
1571 }
1572 
1573 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1574 					struct kvm_device_attr *attr)
1575 {
1576 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1577 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1578 		return -EFAULT;
1579 
1580 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1581 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1582 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1583 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1584 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1585 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1586 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1587 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1588 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1589 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1590 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1591 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1592 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1593 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1594 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1595 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1596 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1597 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1598 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1599 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1600 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1601 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1602 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1603 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1604 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1605 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1606 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1607 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1608 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1609 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1610 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1612 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1615 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1618 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1621 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1624 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1627 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1630 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1631 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1632 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1633 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1634 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1635 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1636 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1637 
1638 	return 0;
1639 }
1640 
1641 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1642 {
1643 	int ret = -ENXIO;
1644 
1645 	switch (attr->attr) {
1646 	case KVM_S390_VM_CPU_PROCESSOR:
1647 		ret = kvm_s390_get_processor(kvm, attr);
1648 		break;
1649 	case KVM_S390_VM_CPU_MACHINE:
1650 		ret = kvm_s390_get_machine(kvm, attr);
1651 		break;
1652 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1653 		ret = kvm_s390_get_processor_feat(kvm, attr);
1654 		break;
1655 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1656 		ret = kvm_s390_get_machine_feat(kvm, attr);
1657 		break;
1658 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1659 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1660 		break;
1661 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1662 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1663 		break;
1664 	}
1665 	return ret;
1666 }
1667 
1668 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1669 {
1670 	int ret;
1671 
1672 	switch (attr->group) {
1673 	case KVM_S390_VM_MEM_CTRL:
1674 		ret = kvm_s390_set_mem_control(kvm, attr);
1675 		break;
1676 	case KVM_S390_VM_TOD:
1677 		ret = kvm_s390_set_tod(kvm, attr);
1678 		break;
1679 	case KVM_S390_VM_CPU_MODEL:
1680 		ret = kvm_s390_set_cpu_model(kvm, attr);
1681 		break;
1682 	case KVM_S390_VM_CRYPTO:
1683 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1684 		break;
1685 	case KVM_S390_VM_MIGRATION:
1686 		ret = kvm_s390_vm_set_migration(kvm, attr);
1687 		break;
1688 	default:
1689 		ret = -ENXIO;
1690 		break;
1691 	}
1692 
1693 	return ret;
1694 }
1695 
1696 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1697 {
1698 	int ret;
1699 
1700 	switch (attr->group) {
1701 	case KVM_S390_VM_MEM_CTRL:
1702 		ret = kvm_s390_get_mem_control(kvm, attr);
1703 		break;
1704 	case KVM_S390_VM_TOD:
1705 		ret = kvm_s390_get_tod(kvm, attr);
1706 		break;
1707 	case KVM_S390_VM_CPU_MODEL:
1708 		ret = kvm_s390_get_cpu_model(kvm, attr);
1709 		break;
1710 	case KVM_S390_VM_MIGRATION:
1711 		ret = kvm_s390_vm_get_migration(kvm, attr);
1712 		break;
1713 	default:
1714 		ret = -ENXIO;
1715 		break;
1716 	}
1717 
1718 	return ret;
1719 }
1720 
1721 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1722 {
1723 	int ret;
1724 
1725 	switch (attr->group) {
1726 	case KVM_S390_VM_MEM_CTRL:
1727 		switch (attr->attr) {
1728 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1729 		case KVM_S390_VM_MEM_CLR_CMMA:
1730 			ret = sclp.has_cmma ? 0 : -ENXIO;
1731 			break;
1732 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1733 			ret = 0;
1734 			break;
1735 		default:
1736 			ret = -ENXIO;
1737 			break;
1738 		}
1739 		break;
1740 	case KVM_S390_VM_TOD:
1741 		switch (attr->attr) {
1742 		case KVM_S390_VM_TOD_LOW:
1743 		case KVM_S390_VM_TOD_HIGH:
1744 			ret = 0;
1745 			break;
1746 		default:
1747 			ret = -ENXIO;
1748 			break;
1749 		}
1750 		break;
1751 	case KVM_S390_VM_CPU_MODEL:
1752 		switch (attr->attr) {
1753 		case KVM_S390_VM_CPU_PROCESSOR:
1754 		case KVM_S390_VM_CPU_MACHINE:
1755 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1756 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1757 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1758 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1759 			ret = 0;
1760 			break;
1761 		default:
1762 			ret = -ENXIO;
1763 			break;
1764 		}
1765 		break;
1766 	case KVM_S390_VM_CRYPTO:
1767 		switch (attr->attr) {
1768 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1769 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1770 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1771 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1772 			ret = 0;
1773 			break;
1774 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1775 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1776 			ret = ap_instructions_available() ? 0 : -ENXIO;
1777 			break;
1778 		default:
1779 			ret = -ENXIO;
1780 			break;
1781 		}
1782 		break;
1783 	case KVM_S390_VM_MIGRATION:
1784 		ret = 0;
1785 		break;
1786 	default:
1787 		ret = -ENXIO;
1788 		break;
1789 	}
1790 
1791 	return ret;
1792 }
1793 
1794 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1795 {
1796 	uint8_t *keys;
1797 	uint64_t hva;
1798 	int srcu_idx, i, r = 0;
1799 
1800 	if (args->flags != 0)
1801 		return -EINVAL;
1802 
1803 	/* Is this guest using storage keys? */
1804 	if (!mm_uses_skeys(current->mm))
1805 		return KVM_S390_GET_SKEYS_NONE;
1806 
1807 	/* Enforce sane limit on memory allocation */
1808 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1809 		return -EINVAL;
1810 
1811 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1812 	if (!keys)
1813 		return -ENOMEM;
1814 
1815 	mmap_read_lock(current->mm);
1816 	srcu_idx = srcu_read_lock(&kvm->srcu);
1817 	for (i = 0; i < args->count; i++) {
1818 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1819 		if (kvm_is_error_hva(hva)) {
1820 			r = -EFAULT;
1821 			break;
1822 		}
1823 
1824 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1825 		if (r)
1826 			break;
1827 	}
1828 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1829 	mmap_read_unlock(current->mm);
1830 
1831 	if (!r) {
1832 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1833 				 sizeof(uint8_t) * args->count);
1834 		if (r)
1835 			r = -EFAULT;
1836 	}
1837 
1838 	kvfree(keys);
1839 	return r;
1840 }
1841 
1842 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1843 {
1844 	uint8_t *keys;
1845 	uint64_t hva;
1846 	int srcu_idx, i, r = 0;
1847 	bool unlocked;
1848 
1849 	if (args->flags != 0)
1850 		return -EINVAL;
1851 
1852 	/* Enforce sane limit on memory allocation */
1853 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1854 		return -EINVAL;
1855 
1856 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1857 	if (!keys)
1858 		return -ENOMEM;
1859 
1860 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1861 			   sizeof(uint8_t) * args->count);
1862 	if (r) {
1863 		r = -EFAULT;
1864 		goto out;
1865 	}
1866 
1867 	/* Enable storage key handling for the guest */
1868 	r = s390_enable_skey();
1869 	if (r)
1870 		goto out;
1871 
1872 	i = 0;
1873 	mmap_read_lock(current->mm);
1874 	srcu_idx = srcu_read_lock(&kvm->srcu);
1875         while (i < args->count) {
1876 		unlocked = false;
1877 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1878 		if (kvm_is_error_hva(hva)) {
1879 			r = -EFAULT;
1880 			break;
1881 		}
1882 
1883 		/* Lowest order bit is reserved */
1884 		if (keys[i] & 0x01) {
1885 			r = -EINVAL;
1886 			break;
1887 		}
1888 
1889 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1890 		if (r) {
1891 			r = fixup_user_fault(current->mm, hva,
1892 					     FAULT_FLAG_WRITE, &unlocked);
1893 			if (r)
1894 				break;
1895 		}
1896 		if (!r)
1897 			i++;
1898 	}
1899 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1900 	mmap_read_unlock(current->mm);
1901 out:
1902 	kvfree(keys);
1903 	return r;
1904 }
1905 
1906 /*
1907  * Base address and length must be sent at the start of each block, therefore
1908  * it's cheaper to send some clean data, as long as it's less than the size of
1909  * two longs.
1910  */
1911 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1912 /* for consistency */
1913 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1914 
1915 /*
1916  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1917  * address falls in a hole. In that case the index of one of the memslots
1918  * bordering the hole is returned.
1919  */
1920 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1921 {
1922 	int start = 0, end = slots->used_slots;
1923 	int slot = atomic_read(&slots->lru_slot);
1924 	struct kvm_memory_slot *memslots = slots->memslots;
1925 
1926 	if (gfn >= memslots[slot].base_gfn &&
1927 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1928 		return slot;
1929 
1930 	while (start < end) {
1931 		slot = start + (end - start) / 2;
1932 
1933 		if (gfn >= memslots[slot].base_gfn)
1934 			end = slot;
1935 		else
1936 			start = slot + 1;
1937 	}
1938 
1939 	if (start >= slots->used_slots)
1940 		return slots->used_slots - 1;
1941 
1942 	if (gfn >= memslots[start].base_gfn &&
1943 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1944 		atomic_set(&slots->lru_slot, start);
1945 	}
1946 
1947 	return start;
1948 }
1949 
1950 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1951 			      u8 *res, unsigned long bufsize)
1952 {
1953 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1954 
1955 	args->count = 0;
1956 	while (args->count < bufsize) {
1957 		hva = gfn_to_hva(kvm, cur_gfn);
1958 		/*
1959 		 * We return an error if the first value was invalid, but we
1960 		 * return successfully if at least one value was copied.
1961 		 */
1962 		if (kvm_is_error_hva(hva))
1963 			return args->count ? 0 : -EFAULT;
1964 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1965 			pgstev = 0;
1966 		res[args->count++] = (pgstev >> 24) & 0x43;
1967 		cur_gfn++;
1968 	}
1969 
1970 	return 0;
1971 }
1972 
1973 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1974 					      unsigned long cur_gfn)
1975 {
1976 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1977 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1978 	unsigned long ofs = cur_gfn - ms->base_gfn;
1979 
1980 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1981 		slotidx--;
1982 		/* If we are above the highest slot, wrap around */
1983 		if (slotidx < 0)
1984 			slotidx = slots->used_slots - 1;
1985 
1986 		ms = slots->memslots + slotidx;
1987 		ofs = 0;
1988 	}
1989 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1990 	while ((slotidx > 0) && (ofs >= ms->npages)) {
1991 		slotidx--;
1992 		ms = slots->memslots + slotidx;
1993 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1994 	}
1995 	return ms->base_gfn + ofs;
1996 }
1997 
1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1999 			     u8 *res, unsigned long bufsize)
2000 {
2001 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2002 	struct kvm_memslots *slots = kvm_memslots(kvm);
2003 	struct kvm_memory_slot *ms;
2004 
2005 	if (unlikely(!slots->used_slots))
2006 		return 0;
2007 
2008 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2009 	ms = gfn_to_memslot(kvm, cur_gfn);
2010 	args->count = 0;
2011 	args->start_gfn = cur_gfn;
2012 	if (!ms)
2013 		return 0;
2014 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2015 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2016 
2017 	while (args->count < bufsize) {
2018 		hva = gfn_to_hva(kvm, cur_gfn);
2019 		if (kvm_is_error_hva(hva))
2020 			return 0;
2021 		/* Decrement only if we actually flipped the bit to 0 */
2022 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2023 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2024 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2025 			pgstev = 0;
2026 		/* Save the value */
2027 		res[args->count++] = (pgstev >> 24) & 0x43;
2028 		/* If the next bit is too far away, stop. */
2029 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2030 			return 0;
2031 		/* If we reached the previous "next", find the next one */
2032 		if (cur_gfn == next_gfn)
2033 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2034 		/* Reached the end of memory or of the buffer, stop */
2035 		if ((next_gfn >= mem_end) ||
2036 		    (next_gfn - args->start_gfn >= bufsize))
2037 			return 0;
2038 		cur_gfn++;
2039 		/* Reached the end of the current memslot, take the next one. */
2040 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2041 			ms = gfn_to_memslot(kvm, cur_gfn);
2042 			if (!ms)
2043 				return 0;
2044 		}
2045 	}
2046 	return 0;
2047 }
2048 
2049 /*
2050  * This function searches for the next page with dirty CMMA attributes, and
2051  * saves the attributes in the buffer up to either the end of the buffer or
2052  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2053  * no trailing clean bytes are saved.
2054  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2055  * output buffer will indicate 0 as length.
2056  */
2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2058 				  struct kvm_s390_cmma_log *args)
2059 {
2060 	unsigned long bufsize;
2061 	int srcu_idx, peek, ret;
2062 	u8 *values;
2063 
2064 	if (!kvm->arch.use_cmma)
2065 		return -ENXIO;
2066 	/* Invalid/unsupported flags were specified */
2067 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2068 		return -EINVAL;
2069 	/* Migration mode query, and we are not doing a migration */
2070 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2071 	if (!peek && !kvm->arch.migration_mode)
2072 		return -EINVAL;
2073 	/* CMMA is disabled or was not used, or the buffer has length zero */
2074 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2075 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2076 		memset(args, 0, sizeof(*args));
2077 		return 0;
2078 	}
2079 	/* We are not peeking, and there are no dirty pages */
2080 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2081 		memset(args, 0, sizeof(*args));
2082 		return 0;
2083 	}
2084 
2085 	values = vmalloc(bufsize);
2086 	if (!values)
2087 		return -ENOMEM;
2088 
2089 	mmap_read_lock(kvm->mm);
2090 	srcu_idx = srcu_read_lock(&kvm->srcu);
2091 	if (peek)
2092 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2093 	else
2094 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2095 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2096 	mmap_read_unlock(kvm->mm);
2097 
2098 	if (kvm->arch.migration_mode)
2099 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2100 	else
2101 		args->remaining = 0;
2102 
2103 	if (copy_to_user((void __user *)args->values, values, args->count))
2104 		ret = -EFAULT;
2105 
2106 	vfree(values);
2107 	return ret;
2108 }
2109 
2110 /*
2111  * This function sets the CMMA attributes for the given pages. If the input
2112  * buffer has zero length, no action is taken, otherwise the attributes are
2113  * set and the mm->context.uses_cmm flag is set.
2114  */
2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2116 				  const struct kvm_s390_cmma_log *args)
2117 {
2118 	unsigned long hva, mask, pgstev, i;
2119 	uint8_t *bits;
2120 	int srcu_idx, r = 0;
2121 
2122 	mask = args->mask;
2123 
2124 	if (!kvm->arch.use_cmma)
2125 		return -ENXIO;
2126 	/* invalid/unsupported flags */
2127 	if (args->flags != 0)
2128 		return -EINVAL;
2129 	/* Enforce sane limit on memory allocation */
2130 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2131 		return -EINVAL;
2132 	/* Nothing to do */
2133 	if (args->count == 0)
2134 		return 0;
2135 
2136 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2137 	if (!bits)
2138 		return -ENOMEM;
2139 
2140 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2141 	if (r) {
2142 		r = -EFAULT;
2143 		goto out;
2144 	}
2145 
2146 	mmap_read_lock(kvm->mm);
2147 	srcu_idx = srcu_read_lock(&kvm->srcu);
2148 	for (i = 0; i < args->count; i++) {
2149 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2150 		if (kvm_is_error_hva(hva)) {
2151 			r = -EFAULT;
2152 			break;
2153 		}
2154 
2155 		pgstev = bits[i];
2156 		pgstev = pgstev << 24;
2157 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2158 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2159 	}
2160 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2161 	mmap_read_unlock(kvm->mm);
2162 
2163 	if (!kvm->mm->context.uses_cmm) {
2164 		mmap_write_lock(kvm->mm);
2165 		kvm->mm->context.uses_cmm = 1;
2166 		mmap_write_unlock(kvm->mm);
2167 	}
2168 out:
2169 	vfree(bits);
2170 	return r;
2171 }
2172 
2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2174 {
2175 	struct kvm_vcpu *vcpu;
2176 	u16 rc, rrc;
2177 	int ret = 0;
2178 	int i;
2179 
2180 	/*
2181 	 * We ignore failures and try to destroy as many CPUs as possible.
2182 	 * At the same time we must not free the assigned resources when
2183 	 * this fails, as the ultravisor has still access to that memory.
2184 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2185 	 * behind.
2186 	 * We want to return the first failure rc and rrc, though.
2187 	 */
2188 	kvm_for_each_vcpu(i, vcpu, kvm) {
2189 		mutex_lock(&vcpu->mutex);
2190 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2191 			*rcp = rc;
2192 			*rrcp = rrc;
2193 			ret = -EIO;
2194 		}
2195 		mutex_unlock(&vcpu->mutex);
2196 	}
2197 	return ret;
2198 }
2199 
2200 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2201 {
2202 	int i, r = 0;
2203 	u16 dummy;
2204 
2205 	struct kvm_vcpu *vcpu;
2206 
2207 	kvm_for_each_vcpu(i, vcpu, kvm) {
2208 		mutex_lock(&vcpu->mutex);
2209 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2210 		mutex_unlock(&vcpu->mutex);
2211 		if (r)
2212 			break;
2213 	}
2214 	if (r)
2215 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2216 	return r;
2217 }
2218 
2219 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2220 {
2221 	int r = 0;
2222 	u16 dummy;
2223 	void __user *argp = (void __user *)cmd->data;
2224 
2225 	switch (cmd->cmd) {
2226 	case KVM_PV_ENABLE: {
2227 		r = -EINVAL;
2228 		if (kvm_s390_pv_is_protected(kvm))
2229 			break;
2230 
2231 		/*
2232 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2233 		 *  esca, we need no cleanup in the error cases below
2234 		 */
2235 		r = sca_switch_to_extended(kvm);
2236 		if (r)
2237 			break;
2238 
2239 		mmap_write_lock(current->mm);
2240 		r = gmap_mark_unmergeable();
2241 		mmap_write_unlock(current->mm);
2242 		if (r)
2243 			break;
2244 
2245 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2246 		if (r)
2247 			break;
2248 
2249 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2250 		if (r)
2251 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2252 
2253 		/* we need to block service interrupts from now on */
2254 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2255 		break;
2256 	}
2257 	case KVM_PV_DISABLE: {
2258 		r = -EINVAL;
2259 		if (!kvm_s390_pv_is_protected(kvm))
2260 			break;
2261 
2262 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2263 		/*
2264 		 * If a CPU could not be destroyed, destroy VM will also fail.
2265 		 * There is no point in trying to destroy it. Instead return
2266 		 * the rc and rrc from the first CPU that failed destroying.
2267 		 */
2268 		if (r)
2269 			break;
2270 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2271 
2272 		/* no need to block service interrupts any more */
2273 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2274 		break;
2275 	}
2276 	case KVM_PV_SET_SEC_PARMS: {
2277 		struct kvm_s390_pv_sec_parm parms = {};
2278 		void *hdr;
2279 
2280 		r = -EINVAL;
2281 		if (!kvm_s390_pv_is_protected(kvm))
2282 			break;
2283 
2284 		r = -EFAULT;
2285 		if (copy_from_user(&parms, argp, sizeof(parms)))
2286 			break;
2287 
2288 		/* Currently restricted to 8KB */
2289 		r = -EINVAL;
2290 		if (parms.length > PAGE_SIZE * 2)
2291 			break;
2292 
2293 		r = -ENOMEM;
2294 		hdr = vmalloc(parms.length);
2295 		if (!hdr)
2296 			break;
2297 
2298 		r = -EFAULT;
2299 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2300 				    parms.length))
2301 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2302 						      &cmd->rc, &cmd->rrc);
2303 
2304 		vfree(hdr);
2305 		break;
2306 	}
2307 	case KVM_PV_UNPACK: {
2308 		struct kvm_s390_pv_unp unp = {};
2309 
2310 		r = -EINVAL;
2311 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2312 			break;
2313 
2314 		r = -EFAULT;
2315 		if (copy_from_user(&unp, argp, sizeof(unp)))
2316 			break;
2317 
2318 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2319 				       &cmd->rc, &cmd->rrc);
2320 		break;
2321 	}
2322 	case KVM_PV_VERIFY: {
2323 		r = -EINVAL;
2324 		if (!kvm_s390_pv_is_protected(kvm))
2325 			break;
2326 
2327 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2328 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2329 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2330 			     cmd->rrc);
2331 		break;
2332 	}
2333 	case KVM_PV_PREP_RESET: {
2334 		r = -EINVAL;
2335 		if (!kvm_s390_pv_is_protected(kvm))
2336 			break;
2337 
2338 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2339 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2340 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2341 			     cmd->rc, cmd->rrc);
2342 		break;
2343 	}
2344 	case KVM_PV_UNSHARE_ALL: {
2345 		r = -EINVAL;
2346 		if (!kvm_s390_pv_is_protected(kvm))
2347 			break;
2348 
2349 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2350 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2351 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2352 			     cmd->rc, cmd->rrc);
2353 		break;
2354 	}
2355 	default:
2356 		r = -ENOTTY;
2357 	}
2358 	return r;
2359 }
2360 
2361 long kvm_arch_vm_ioctl(struct file *filp,
2362 		       unsigned int ioctl, unsigned long arg)
2363 {
2364 	struct kvm *kvm = filp->private_data;
2365 	void __user *argp = (void __user *)arg;
2366 	struct kvm_device_attr attr;
2367 	int r;
2368 
2369 	switch (ioctl) {
2370 	case KVM_S390_INTERRUPT: {
2371 		struct kvm_s390_interrupt s390int;
2372 
2373 		r = -EFAULT;
2374 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2375 			break;
2376 		r = kvm_s390_inject_vm(kvm, &s390int);
2377 		break;
2378 	}
2379 	case KVM_CREATE_IRQCHIP: {
2380 		struct kvm_irq_routing_entry routing;
2381 
2382 		r = -EINVAL;
2383 		if (kvm->arch.use_irqchip) {
2384 			/* Set up dummy routing. */
2385 			memset(&routing, 0, sizeof(routing));
2386 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2387 		}
2388 		break;
2389 	}
2390 	case KVM_SET_DEVICE_ATTR: {
2391 		r = -EFAULT;
2392 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2393 			break;
2394 		r = kvm_s390_vm_set_attr(kvm, &attr);
2395 		break;
2396 	}
2397 	case KVM_GET_DEVICE_ATTR: {
2398 		r = -EFAULT;
2399 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2400 			break;
2401 		r = kvm_s390_vm_get_attr(kvm, &attr);
2402 		break;
2403 	}
2404 	case KVM_HAS_DEVICE_ATTR: {
2405 		r = -EFAULT;
2406 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2407 			break;
2408 		r = kvm_s390_vm_has_attr(kvm, &attr);
2409 		break;
2410 	}
2411 	case KVM_S390_GET_SKEYS: {
2412 		struct kvm_s390_skeys args;
2413 
2414 		r = -EFAULT;
2415 		if (copy_from_user(&args, argp,
2416 				   sizeof(struct kvm_s390_skeys)))
2417 			break;
2418 		r = kvm_s390_get_skeys(kvm, &args);
2419 		break;
2420 	}
2421 	case KVM_S390_SET_SKEYS: {
2422 		struct kvm_s390_skeys args;
2423 
2424 		r = -EFAULT;
2425 		if (copy_from_user(&args, argp,
2426 				   sizeof(struct kvm_s390_skeys)))
2427 			break;
2428 		r = kvm_s390_set_skeys(kvm, &args);
2429 		break;
2430 	}
2431 	case KVM_S390_GET_CMMA_BITS: {
2432 		struct kvm_s390_cmma_log args;
2433 
2434 		r = -EFAULT;
2435 		if (copy_from_user(&args, argp, sizeof(args)))
2436 			break;
2437 		mutex_lock(&kvm->slots_lock);
2438 		r = kvm_s390_get_cmma_bits(kvm, &args);
2439 		mutex_unlock(&kvm->slots_lock);
2440 		if (!r) {
2441 			r = copy_to_user(argp, &args, sizeof(args));
2442 			if (r)
2443 				r = -EFAULT;
2444 		}
2445 		break;
2446 	}
2447 	case KVM_S390_SET_CMMA_BITS: {
2448 		struct kvm_s390_cmma_log args;
2449 
2450 		r = -EFAULT;
2451 		if (copy_from_user(&args, argp, sizeof(args)))
2452 			break;
2453 		mutex_lock(&kvm->slots_lock);
2454 		r = kvm_s390_set_cmma_bits(kvm, &args);
2455 		mutex_unlock(&kvm->slots_lock);
2456 		break;
2457 	}
2458 	case KVM_S390_PV_COMMAND: {
2459 		struct kvm_pv_cmd args;
2460 
2461 		/* protvirt means user sigp */
2462 		kvm->arch.user_cpu_state_ctrl = 1;
2463 		r = 0;
2464 		if (!is_prot_virt_host()) {
2465 			r = -EINVAL;
2466 			break;
2467 		}
2468 		if (copy_from_user(&args, argp, sizeof(args))) {
2469 			r = -EFAULT;
2470 			break;
2471 		}
2472 		if (args.flags) {
2473 			r = -EINVAL;
2474 			break;
2475 		}
2476 		mutex_lock(&kvm->lock);
2477 		r = kvm_s390_handle_pv(kvm, &args);
2478 		mutex_unlock(&kvm->lock);
2479 		if (copy_to_user(argp, &args, sizeof(args))) {
2480 			r = -EFAULT;
2481 			break;
2482 		}
2483 		break;
2484 	}
2485 	default:
2486 		r = -ENOTTY;
2487 	}
2488 
2489 	return r;
2490 }
2491 
2492 static int kvm_s390_apxa_installed(void)
2493 {
2494 	struct ap_config_info info;
2495 
2496 	if (ap_instructions_available()) {
2497 		if (ap_qci(&info) == 0)
2498 			return info.apxa;
2499 	}
2500 
2501 	return 0;
2502 }
2503 
2504 /*
2505  * The format of the crypto control block (CRYCB) is specified in the 3 low
2506  * order bits of the CRYCB designation (CRYCBD) field as follows:
2507  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2508  *	     AP extended addressing (APXA) facility are installed.
2509  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2510  * Format 2: Both the APXA and MSAX3 facilities are installed
2511  */
2512 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2513 {
2514 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2515 
2516 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2517 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2518 
2519 	/* Check whether MSAX3 is installed */
2520 	if (!test_kvm_facility(kvm, 76))
2521 		return;
2522 
2523 	if (kvm_s390_apxa_installed())
2524 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2525 	else
2526 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2527 }
2528 
2529 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2530 			       unsigned long *aqm, unsigned long *adm)
2531 {
2532 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2533 
2534 	mutex_lock(&kvm->lock);
2535 	kvm_s390_vcpu_block_all(kvm);
2536 
2537 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2538 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2539 		memcpy(crycb->apcb1.apm, apm, 32);
2540 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2541 			 apm[0], apm[1], apm[2], apm[3]);
2542 		memcpy(crycb->apcb1.aqm, aqm, 32);
2543 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2544 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2545 		memcpy(crycb->apcb1.adm, adm, 32);
2546 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2547 			 adm[0], adm[1], adm[2], adm[3]);
2548 		break;
2549 	case CRYCB_FORMAT1:
2550 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2551 		memcpy(crycb->apcb0.apm, apm, 8);
2552 		memcpy(crycb->apcb0.aqm, aqm, 2);
2553 		memcpy(crycb->apcb0.adm, adm, 2);
2554 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2555 			 apm[0], *((unsigned short *)aqm),
2556 			 *((unsigned short *)adm));
2557 		break;
2558 	default:	/* Can not happen */
2559 		break;
2560 	}
2561 
2562 	/* recreate the shadow crycb for each vcpu */
2563 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2564 	kvm_s390_vcpu_unblock_all(kvm);
2565 	mutex_unlock(&kvm->lock);
2566 }
2567 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2568 
2569 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2570 {
2571 	mutex_lock(&kvm->lock);
2572 	kvm_s390_vcpu_block_all(kvm);
2573 
2574 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2575 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2576 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2577 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2578 
2579 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2580 	/* recreate the shadow crycb for each vcpu */
2581 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2582 	kvm_s390_vcpu_unblock_all(kvm);
2583 	mutex_unlock(&kvm->lock);
2584 }
2585 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2586 
2587 static u64 kvm_s390_get_initial_cpuid(void)
2588 {
2589 	struct cpuid cpuid;
2590 
2591 	get_cpu_id(&cpuid);
2592 	cpuid.version = 0xff;
2593 	return *((u64 *) &cpuid);
2594 }
2595 
2596 static void kvm_s390_crypto_init(struct kvm *kvm)
2597 {
2598 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2599 	kvm_s390_set_crycb_format(kvm);
2600 
2601 	if (!test_kvm_facility(kvm, 76))
2602 		return;
2603 
2604 	/* Enable AES/DEA protected key functions by default */
2605 	kvm->arch.crypto.aes_kw = 1;
2606 	kvm->arch.crypto.dea_kw = 1;
2607 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2608 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2609 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2610 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2611 }
2612 
2613 static void sca_dispose(struct kvm *kvm)
2614 {
2615 	if (kvm->arch.use_esca)
2616 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2617 	else
2618 		free_page((unsigned long)(kvm->arch.sca));
2619 	kvm->arch.sca = NULL;
2620 }
2621 
2622 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2623 {
2624 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2625 	int i, rc;
2626 	char debug_name[16];
2627 	static unsigned long sca_offset;
2628 
2629 	rc = -EINVAL;
2630 #ifdef CONFIG_KVM_S390_UCONTROL
2631 	if (type & ~KVM_VM_S390_UCONTROL)
2632 		goto out_err;
2633 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2634 		goto out_err;
2635 #else
2636 	if (type)
2637 		goto out_err;
2638 #endif
2639 
2640 	rc = s390_enable_sie();
2641 	if (rc)
2642 		goto out_err;
2643 
2644 	rc = -ENOMEM;
2645 
2646 	if (!sclp.has_64bscao)
2647 		alloc_flags |= GFP_DMA;
2648 	rwlock_init(&kvm->arch.sca_lock);
2649 	/* start with basic SCA */
2650 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2651 	if (!kvm->arch.sca)
2652 		goto out_err;
2653 	mutex_lock(&kvm_lock);
2654 	sca_offset += 16;
2655 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2656 		sca_offset = 0;
2657 	kvm->arch.sca = (struct bsca_block *)
2658 			((char *) kvm->arch.sca + sca_offset);
2659 	mutex_unlock(&kvm_lock);
2660 
2661 	sprintf(debug_name, "kvm-%u", current->pid);
2662 
2663 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2664 	if (!kvm->arch.dbf)
2665 		goto out_err;
2666 
2667 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2668 	kvm->arch.sie_page2 =
2669 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2670 	if (!kvm->arch.sie_page2)
2671 		goto out_err;
2672 
2673 	kvm->arch.sie_page2->kvm = kvm;
2674 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2675 
2676 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2677 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2678 					      (kvm_s390_fac_base[i] |
2679 					       kvm_s390_fac_ext[i]);
2680 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2681 					      kvm_s390_fac_base[i];
2682 	}
2683 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2684 
2685 	/* we are always in czam mode - even on pre z14 machines */
2686 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2687 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2688 	/* we emulate STHYI in kvm */
2689 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2690 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2691 	if (MACHINE_HAS_TLB_GUEST) {
2692 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2693 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2694 	}
2695 
2696 	if (css_general_characteristics.aiv && test_facility(65))
2697 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2698 
2699 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2700 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2701 
2702 	kvm_s390_crypto_init(kvm);
2703 
2704 	mutex_init(&kvm->arch.float_int.ais_lock);
2705 	spin_lock_init(&kvm->arch.float_int.lock);
2706 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2707 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2708 	init_waitqueue_head(&kvm->arch.ipte_wq);
2709 	mutex_init(&kvm->arch.ipte_mutex);
2710 
2711 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2712 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2713 
2714 	if (type & KVM_VM_S390_UCONTROL) {
2715 		kvm->arch.gmap = NULL;
2716 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2717 	} else {
2718 		if (sclp.hamax == U64_MAX)
2719 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2720 		else
2721 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2722 						    sclp.hamax + 1);
2723 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2724 		if (!kvm->arch.gmap)
2725 			goto out_err;
2726 		kvm->arch.gmap->private = kvm;
2727 		kvm->arch.gmap->pfault_enabled = 0;
2728 	}
2729 
2730 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2731 	kvm->arch.use_skf = sclp.has_skey;
2732 	spin_lock_init(&kvm->arch.start_stop_lock);
2733 	kvm_s390_vsie_init(kvm);
2734 	if (use_gisa)
2735 		kvm_s390_gisa_init(kvm);
2736 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2737 
2738 	return 0;
2739 out_err:
2740 	free_page((unsigned long)kvm->arch.sie_page2);
2741 	debug_unregister(kvm->arch.dbf);
2742 	sca_dispose(kvm);
2743 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2744 	return rc;
2745 }
2746 
2747 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2748 {
2749 	u16 rc, rrc;
2750 
2751 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2752 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2753 	kvm_s390_clear_local_irqs(vcpu);
2754 	kvm_clear_async_pf_completion_queue(vcpu);
2755 	if (!kvm_is_ucontrol(vcpu->kvm))
2756 		sca_del_vcpu(vcpu);
2757 
2758 	if (kvm_is_ucontrol(vcpu->kvm))
2759 		gmap_remove(vcpu->arch.gmap);
2760 
2761 	if (vcpu->kvm->arch.use_cmma)
2762 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2763 	/* We can not hold the vcpu mutex here, we are already dying */
2764 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2765 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2766 	free_page((unsigned long)(vcpu->arch.sie_block));
2767 }
2768 
2769 static void kvm_free_vcpus(struct kvm *kvm)
2770 {
2771 	unsigned int i;
2772 	struct kvm_vcpu *vcpu;
2773 
2774 	kvm_for_each_vcpu(i, vcpu, kvm)
2775 		kvm_vcpu_destroy(vcpu);
2776 
2777 	mutex_lock(&kvm->lock);
2778 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2779 		kvm->vcpus[i] = NULL;
2780 
2781 	atomic_set(&kvm->online_vcpus, 0);
2782 	mutex_unlock(&kvm->lock);
2783 }
2784 
2785 void kvm_arch_destroy_vm(struct kvm *kvm)
2786 {
2787 	u16 rc, rrc;
2788 
2789 	kvm_free_vcpus(kvm);
2790 	sca_dispose(kvm);
2791 	kvm_s390_gisa_destroy(kvm);
2792 	/*
2793 	 * We are already at the end of life and kvm->lock is not taken.
2794 	 * This is ok as the file descriptor is closed by now and nobody
2795 	 * can mess with the pv state. To avoid lockdep_assert_held from
2796 	 * complaining we do not use kvm_s390_pv_is_protected.
2797 	 */
2798 	if (kvm_s390_pv_get_handle(kvm))
2799 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2800 	debug_unregister(kvm->arch.dbf);
2801 	free_page((unsigned long)kvm->arch.sie_page2);
2802 	if (!kvm_is_ucontrol(kvm))
2803 		gmap_remove(kvm->arch.gmap);
2804 	kvm_s390_destroy_adapters(kvm);
2805 	kvm_s390_clear_float_irqs(kvm);
2806 	kvm_s390_vsie_destroy(kvm);
2807 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2808 }
2809 
2810 /* Section: vcpu related */
2811 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2812 {
2813 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2814 	if (!vcpu->arch.gmap)
2815 		return -ENOMEM;
2816 	vcpu->arch.gmap->private = vcpu->kvm;
2817 
2818 	return 0;
2819 }
2820 
2821 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2822 {
2823 	if (!kvm_s390_use_sca_entries())
2824 		return;
2825 	read_lock(&vcpu->kvm->arch.sca_lock);
2826 	if (vcpu->kvm->arch.use_esca) {
2827 		struct esca_block *sca = vcpu->kvm->arch.sca;
2828 
2829 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2830 		sca->cpu[vcpu->vcpu_id].sda = 0;
2831 	} else {
2832 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2833 
2834 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2835 		sca->cpu[vcpu->vcpu_id].sda = 0;
2836 	}
2837 	read_unlock(&vcpu->kvm->arch.sca_lock);
2838 }
2839 
2840 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2841 {
2842 	if (!kvm_s390_use_sca_entries()) {
2843 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2844 
2845 		/* we still need the basic sca for the ipte control */
2846 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2847 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2848 		return;
2849 	}
2850 	read_lock(&vcpu->kvm->arch.sca_lock);
2851 	if (vcpu->kvm->arch.use_esca) {
2852 		struct esca_block *sca = vcpu->kvm->arch.sca;
2853 
2854 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2855 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2856 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2857 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2858 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2859 	} else {
2860 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2861 
2862 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2863 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2864 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2865 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2866 	}
2867 	read_unlock(&vcpu->kvm->arch.sca_lock);
2868 }
2869 
2870 /* Basic SCA to Extended SCA data copy routines */
2871 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2872 {
2873 	d->sda = s->sda;
2874 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2875 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2876 }
2877 
2878 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2879 {
2880 	int i;
2881 
2882 	d->ipte_control = s->ipte_control;
2883 	d->mcn[0] = s->mcn;
2884 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2885 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2886 }
2887 
2888 static int sca_switch_to_extended(struct kvm *kvm)
2889 {
2890 	struct bsca_block *old_sca = kvm->arch.sca;
2891 	struct esca_block *new_sca;
2892 	struct kvm_vcpu *vcpu;
2893 	unsigned int vcpu_idx;
2894 	u32 scaol, scaoh;
2895 
2896 	if (kvm->arch.use_esca)
2897 		return 0;
2898 
2899 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2900 	if (!new_sca)
2901 		return -ENOMEM;
2902 
2903 	scaoh = (u32)((u64)(new_sca) >> 32);
2904 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2905 
2906 	kvm_s390_vcpu_block_all(kvm);
2907 	write_lock(&kvm->arch.sca_lock);
2908 
2909 	sca_copy_b_to_e(new_sca, old_sca);
2910 
2911 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2912 		vcpu->arch.sie_block->scaoh = scaoh;
2913 		vcpu->arch.sie_block->scaol = scaol;
2914 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2915 	}
2916 	kvm->arch.sca = new_sca;
2917 	kvm->arch.use_esca = 1;
2918 
2919 	write_unlock(&kvm->arch.sca_lock);
2920 	kvm_s390_vcpu_unblock_all(kvm);
2921 
2922 	free_page((unsigned long)old_sca);
2923 
2924 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2925 		 old_sca, kvm->arch.sca);
2926 	return 0;
2927 }
2928 
2929 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2930 {
2931 	int rc;
2932 
2933 	if (!kvm_s390_use_sca_entries()) {
2934 		if (id < KVM_MAX_VCPUS)
2935 			return true;
2936 		return false;
2937 	}
2938 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2939 		return true;
2940 	if (!sclp.has_esca || !sclp.has_64bscao)
2941 		return false;
2942 
2943 	mutex_lock(&kvm->lock);
2944 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2945 	mutex_unlock(&kvm->lock);
2946 
2947 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2948 }
2949 
2950 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2951 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2952 {
2953 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2954 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2955 	vcpu->arch.cputm_start = get_tod_clock_fast();
2956 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2957 }
2958 
2959 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2960 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2961 {
2962 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2963 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2964 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2965 	vcpu->arch.cputm_start = 0;
2966 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2967 }
2968 
2969 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2970 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2971 {
2972 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2973 	vcpu->arch.cputm_enabled = true;
2974 	__start_cpu_timer_accounting(vcpu);
2975 }
2976 
2977 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2978 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2979 {
2980 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2981 	__stop_cpu_timer_accounting(vcpu);
2982 	vcpu->arch.cputm_enabled = false;
2983 }
2984 
2985 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2986 {
2987 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2988 	__enable_cpu_timer_accounting(vcpu);
2989 	preempt_enable();
2990 }
2991 
2992 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2993 {
2994 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2995 	__disable_cpu_timer_accounting(vcpu);
2996 	preempt_enable();
2997 }
2998 
2999 /* set the cpu timer - may only be called from the VCPU thread itself */
3000 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3001 {
3002 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3003 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3004 	if (vcpu->arch.cputm_enabled)
3005 		vcpu->arch.cputm_start = get_tod_clock_fast();
3006 	vcpu->arch.sie_block->cputm = cputm;
3007 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3008 	preempt_enable();
3009 }
3010 
3011 /* update and get the cpu timer - can also be called from other VCPU threads */
3012 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3013 {
3014 	unsigned int seq;
3015 	__u64 value;
3016 
3017 	if (unlikely(!vcpu->arch.cputm_enabled))
3018 		return vcpu->arch.sie_block->cputm;
3019 
3020 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3021 	do {
3022 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3023 		/*
3024 		 * If the writer would ever execute a read in the critical
3025 		 * section, e.g. in irq context, we have a deadlock.
3026 		 */
3027 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3028 		value = vcpu->arch.sie_block->cputm;
3029 		/* if cputm_start is 0, accounting is being started/stopped */
3030 		if (likely(vcpu->arch.cputm_start))
3031 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3032 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3033 	preempt_enable();
3034 	return value;
3035 }
3036 
3037 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3038 {
3039 
3040 	gmap_enable(vcpu->arch.enabled_gmap);
3041 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3042 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3043 		__start_cpu_timer_accounting(vcpu);
3044 	vcpu->cpu = cpu;
3045 }
3046 
3047 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3048 {
3049 	vcpu->cpu = -1;
3050 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3051 		__stop_cpu_timer_accounting(vcpu);
3052 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3053 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3054 	gmap_disable(vcpu->arch.enabled_gmap);
3055 
3056 }
3057 
3058 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3059 {
3060 	mutex_lock(&vcpu->kvm->lock);
3061 	preempt_disable();
3062 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3063 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3064 	preempt_enable();
3065 	mutex_unlock(&vcpu->kvm->lock);
3066 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3067 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3068 		sca_add_vcpu(vcpu);
3069 	}
3070 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3071 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3072 	/* make vcpu_load load the right gmap on the first trigger */
3073 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3074 }
3075 
3076 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3077 {
3078 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3079 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3080 		return true;
3081 	return false;
3082 }
3083 
3084 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3085 {
3086 	/* At least one ECC subfunction must be present */
3087 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3088 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3089 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3090 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3091 	       kvm_has_pckmo_subfunc(kvm, 41);
3092 
3093 }
3094 
3095 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3096 {
3097 	/*
3098 	 * If the AP instructions are not being interpreted and the MSAX3
3099 	 * facility is not configured for the guest, there is nothing to set up.
3100 	 */
3101 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3102 		return;
3103 
3104 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3105 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3106 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3107 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3108 
3109 	if (vcpu->kvm->arch.crypto.apie)
3110 		vcpu->arch.sie_block->eca |= ECA_APIE;
3111 
3112 	/* Set up protected key support */
3113 	if (vcpu->kvm->arch.crypto.aes_kw) {
3114 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3115 		/* ecc is also wrapped with AES key */
3116 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3117 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3118 	}
3119 
3120 	if (vcpu->kvm->arch.crypto.dea_kw)
3121 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3122 }
3123 
3124 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3125 {
3126 	free_page(vcpu->arch.sie_block->cbrlo);
3127 	vcpu->arch.sie_block->cbrlo = 0;
3128 }
3129 
3130 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3131 {
3132 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3133 	if (!vcpu->arch.sie_block->cbrlo)
3134 		return -ENOMEM;
3135 	return 0;
3136 }
3137 
3138 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3139 {
3140 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3141 
3142 	vcpu->arch.sie_block->ibc = model->ibc;
3143 	if (test_kvm_facility(vcpu->kvm, 7))
3144 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3145 }
3146 
3147 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3148 {
3149 	int rc = 0;
3150 	u16 uvrc, uvrrc;
3151 
3152 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3153 						    CPUSTAT_SM |
3154 						    CPUSTAT_STOPPED);
3155 
3156 	if (test_kvm_facility(vcpu->kvm, 78))
3157 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3158 	else if (test_kvm_facility(vcpu->kvm, 8))
3159 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3160 
3161 	kvm_s390_vcpu_setup_model(vcpu);
3162 
3163 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3164 	if (MACHINE_HAS_ESOP)
3165 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3166 	if (test_kvm_facility(vcpu->kvm, 9))
3167 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3168 	if (test_kvm_facility(vcpu->kvm, 73))
3169 		vcpu->arch.sie_block->ecb |= ECB_TE;
3170 
3171 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3172 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3173 	if (test_kvm_facility(vcpu->kvm, 130))
3174 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3175 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3176 	if (sclp.has_cei)
3177 		vcpu->arch.sie_block->eca |= ECA_CEI;
3178 	if (sclp.has_ib)
3179 		vcpu->arch.sie_block->eca |= ECA_IB;
3180 	if (sclp.has_siif)
3181 		vcpu->arch.sie_block->eca |= ECA_SII;
3182 	if (sclp.has_sigpif)
3183 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3184 	if (test_kvm_facility(vcpu->kvm, 129)) {
3185 		vcpu->arch.sie_block->eca |= ECA_VX;
3186 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3187 	}
3188 	if (test_kvm_facility(vcpu->kvm, 139))
3189 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3190 	if (test_kvm_facility(vcpu->kvm, 156))
3191 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3192 	if (vcpu->arch.sie_block->gd) {
3193 		vcpu->arch.sie_block->eca |= ECA_AIV;
3194 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3195 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3196 	}
3197 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3198 					| SDNXC;
3199 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3200 
3201 	if (sclp.has_kss)
3202 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3203 	else
3204 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3205 
3206 	if (vcpu->kvm->arch.use_cmma) {
3207 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3208 		if (rc)
3209 			return rc;
3210 	}
3211 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3212 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3213 
3214 	vcpu->arch.sie_block->hpid = HPID_KVM;
3215 
3216 	kvm_s390_vcpu_crypto_setup(vcpu);
3217 
3218 	mutex_lock(&vcpu->kvm->lock);
3219 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3220 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3221 		if (rc)
3222 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3223 	}
3224 	mutex_unlock(&vcpu->kvm->lock);
3225 
3226 	return rc;
3227 }
3228 
3229 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3230 {
3231 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3232 		return -EINVAL;
3233 	return 0;
3234 }
3235 
3236 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3237 {
3238 	struct sie_page *sie_page;
3239 	int rc;
3240 
3241 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3242 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3243 	if (!sie_page)
3244 		return -ENOMEM;
3245 
3246 	vcpu->arch.sie_block = &sie_page->sie_block;
3247 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3248 
3249 	/* the real guest size will always be smaller than msl */
3250 	vcpu->arch.sie_block->mso = 0;
3251 	vcpu->arch.sie_block->msl = sclp.hamax;
3252 
3253 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3254 	spin_lock_init(&vcpu->arch.local_int.lock);
3255 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3256 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3257 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3258 	seqcount_init(&vcpu->arch.cputm_seqcount);
3259 
3260 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3261 	kvm_clear_async_pf_completion_queue(vcpu);
3262 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3263 				    KVM_SYNC_GPRS |
3264 				    KVM_SYNC_ACRS |
3265 				    KVM_SYNC_CRS |
3266 				    KVM_SYNC_ARCH0 |
3267 				    KVM_SYNC_PFAULT |
3268 				    KVM_SYNC_DIAG318;
3269 	kvm_s390_set_prefix(vcpu, 0);
3270 	if (test_kvm_facility(vcpu->kvm, 64))
3271 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3272 	if (test_kvm_facility(vcpu->kvm, 82))
3273 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3274 	if (test_kvm_facility(vcpu->kvm, 133))
3275 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3276 	if (test_kvm_facility(vcpu->kvm, 156))
3277 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3278 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3279 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3280 	 */
3281 	if (MACHINE_HAS_VX)
3282 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3283 	else
3284 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3285 
3286 	if (kvm_is_ucontrol(vcpu->kvm)) {
3287 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3288 		if (rc)
3289 			goto out_free_sie_block;
3290 	}
3291 
3292 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3293 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3294 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3295 
3296 	rc = kvm_s390_vcpu_setup(vcpu);
3297 	if (rc)
3298 		goto out_ucontrol_uninit;
3299 	return 0;
3300 
3301 out_ucontrol_uninit:
3302 	if (kvm_is_ucontrol(vcpu->kvm))
3303 		gmap_remove(vcpu->arch.gmap);
3304 out_free_sie_block:
3305 	free_page((unsigned long)(vcpu->arch.sie_block));
3306 	return rc;
3307 }
3308 
3309 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3310 {
3311 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3312 }
3313 
3314 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3315 {
3316 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3317 }
3318 
3319 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3320 {
3321 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3322 	exit_sie(vcpu);
3323 }
3324 
3325 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3326 {
3327 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3328 }
3329 
3330 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3331 {
3332 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3333 	exit_sie(vcpu);
3334 }
3335 
3336 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3337 {
3338 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3339 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3340 }
3341 
3342 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3343 {
3344 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3345 }
3346 
3347 /*
3348  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3349  * If the CPU is not running (e.g. waiting as idle) the function will
3350  * return immediately. */
3351 void exit_sie(struct kvm_vcpu *vcpu)
3352 {
3353 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3354 	kvm_s390_vsie_kick(vcpu);
3355 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3356 		cpu_relax();
3357 }
3358 
3359 /* Kick a guest cpu out of SIE to process a request synchronously */
3360 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3361 {
3362 	kvm_make_request(req, vcpu);
3363 	kvm_s390_vcpu_request(vcpu);
3364 }
3365 
3366 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3367 			      unsigned long end)
3368 {
3369 	struct kvm *kvm = gmap->private;
3370 	struct kvm_vcpu *vcpu;
3371 	unsigned long prefix;
3372 	int i;
3373 
3374 	if (gmap_is_shadow(gmap))
3375 		return;
3376 	if (start >= 1UL << 31)
3377 		/* We are only interested in prefix pages */
3378 		return;
3379 	kvm_for_each_vcpu(i, vcpu, kvm) {
3380 		/* match against both prefix pages */
3381 		prefix = kvm_s390_get_prefix(vcpu);
3382 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3383 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3384 				   start, end);
3385 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3386 		}
3387 	}
3388 }
3389 
3390 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3391 {
3392 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3393 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3394 	    halt_poll_max_steal) {
3395 		vcpu->stat.halt_no_poll_steal++;
3396 		return true;
3397 	}
3398 	return false;
3399 }
3400 
3401 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3402 {
3403 	/* kvm common code refers to this, but never calls it */
3404 	BUG();
3405 	return 0;
3406 }
3407 
3408 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3409 					   struct kvm_one_reg *reg)
3410 {
3411 	int r = -EINVAL;
3412 
3413 	switch (reg->id) {
3414 	case KVM_REG_S390_TODPR:
3415 		r = put_user(vcpu->arch.sie_block->todpr,
3416 			     (u32 __user *)reg->addr);
3417 		break;
3418 	case KVM_REG_S390_EPOCHDIFF:
3419 		r = put_user(vcpu->arch.sie_block->epoch,
3420 			     (u64 __user *)reg->addr);
3421 		break;
3422 	case KVM_REG_S390_CPU_TIMER:
3423 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3424 			     (u64 __user *)reg->addr);
3425 		break;
3426 	case KVM_REG_S390_CLOCK_COMP:
3427 		r = put_user(vcpu->arch.sie_block->ckc,
3428 			     (u64 __user *)reg->addr);
3429 		break;
3430 	case KVM_REG_S390_PFTOKEN:
3431 		r = put_user(vcpu->arch.pfault_token,
3432 			     (u64 __user *)reg->addr);
3433 		break;
3434 	case KVM_REG_S390_PFCOMPARE:
3435 		r = put_user(vcpu->arch.pfault_compare,
3436 			     (u64 __user *)reg->addr);
3437 		break;
3438 	case KVM_REG_S390_PFSELECT:
3439 		r = put_user(vcpu->arch.pfault_select,
3440 			     (u64 __user *)reg->addr);
3441 		break;
3442 	case KVM_REG_S390_PP:
3443 		r = put_user(vcpu->arch.sie_block->pp,
3444 			     (u64 __user *)reg->addr);
3445 		break;
3446 	case KVM_REG_S390_GBEA:
3447 		r = put_user(vcpu->arch.sie_block->gbea,
3448 			     (u64 __user *)reg->addr);
3449 		break;
3450 	default:
3451 		break;
3452 	}
3453 
3454 	return r;
3455 }
3456 
3457 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3458 					   struct kvm_one_reg *reg)
3459 {
3460 	int r = -EINVAL;
3461 	__u64 val;
3462 
3463 	switch (reg->id) {
3464 	case KVM_REG_S390_TODPR:
3465 		r = get_user(vcpu->arch.sie_block->todpr,
3466 			     (u32 __user *)reg->addr);
3467 		break;
3468 	case KVM_REG_S390_EPOCHDIFF:
3469 		r = get_user(vcpu->arch.sie_block->epoch,
3470 			     (u64 __user *)reg->addr);
3471 		break;
3472 	case KVM_REG_S390_CPU_TIMER:
3473 		r = get_user(val, (u64 __user *)reg->addr);
3474 		if (!r)
3475 			kvm_s390_set_cpu_timer(vcpu, val);
3476 		break;
3477 	case KVM_REG_S390_CLOCK_COMP:
3478 		r = get_user(vcpu->arch.sie_block->ckc,
3479 			     (u64 __user *)reg->addr);
3480 		break;
3481 	case KVM_REG_S390_PFTOKEN:
3482 		r = get_user(vcpu->arch.pfault_token,
3483 			     (u64 __user *)reg->addr);
3484 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3485 			kvm_clear_async_pf_completion_queue(vcpu);
3486 		break;
3487 	case KVM_REG_S390_PFCOMPARE:
3488 		r = get_user(vcpu->arch.pfault_compare,
3489 			     (u64 __user *)reg->addr);
3490 		break;
3491 	case KVM_REG_S390_PFSELECT:
3492 		r = get_user(vcpu->arch.pfault_select,
3493 			     (u64 __user *)reg->addr);
3494 		break;
3495 	case KVM_REG_S390_PP:
3496 		r = get_user(vcpu->arch.sie_block->pp,
3497 			     (u64 __user *)reg->addr);
3498 		break;
3499 	case KVM_REG_S390_GBEA:
3500 		r = get_user(vcpu->arch.sie_block->gbea,
3501 			     (u64 __user *)reg->addr);
3502 		break;
3503 	default:
3504 		break;
3505 	}
3506 
3507 	return r;
3508 }
3509 
3510 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3511 {
3512 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3513 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3514 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3515 
3516 	kvm_clear_async_pf_completion_queue(vcpu);
3517 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3518 		kvm_s390_vcpu_stop(vcpu);
3519 	kvm_s390_clear_local_irqs(vcpu);
3520 }
3521 
3522 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3523 {
3524 	/* Initial reset is a superset of the normal reset */
3525 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3526 
3527 	/*
3528 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3529 	 * We do not only reset the internal data, but also ...
3530 	 */
3531 	vcpu->arch.sie_block->gpsw.mask = 0;
3532 	vcpu->arch.sie_block->gpsw.addr = 0;
3533 	kvm_s390_set_prefix(vcpu, 0);
3534 	kvm_s390_set_cpu_timer(vcpu, 0);
3535 	vcpu->arch.sie_block->ckc = 0;
3536 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3537 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3538 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3539 
3540 	/* ... the data in sync regs */
3541 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3542 	vcpu->run->s.regs.ckc = 0;
3543 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3544 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3545 	vcpu->run->psw_addr = 0;
3546 	vcpu->run->psw_mask = 0;
3547 	vcpu->run->s.regs.todpr = 0;
3548 	vcpu->run->s.regs.cputm = 0;
3549 	vcpu->run->s.regs.ckc = 0;
3550 	vcpu->run->s.regs.pp = 0;
3551 	vcpu->run->s.regs.gbea = 1;
3552 	vcpu->run->s.regs.fpc = 0;
3553 	/*
3554 	 * Do not reset these registers in the protected case, as some of
3555 	 * them are overlayed and they are not accessible in this case
3556 	 * anyway.
3557 	 */
3558 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3559 		vcpu->arch.sie_block->gbea = 1;
3560 		vcpu->arch.sie_block->pp = 0;
3561 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3562 		vcpu->arch.sie_block->todpr = 0;
3563 	}
3564 }
3565 
3566 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3567 {
3568 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3569 
3570 	/* Clear reset is a superset of the initial reset */
3571 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3572 
3573 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3574 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3575 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3576 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3577 
3578 	regs->etoken = 0;
3579 	regs->etoken_extension = 0;
3580 }
3581 
3582 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3583 {
3584 	vcpu_load(vcpu);
3585 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3586 	vcpu_put(vcpu);
3587 	return 0;
3588 }
3589 
3590 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3591 {
3592 	vcpu_load(vcpu);
3593 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3594 	vcpu_put(vcpu);
3595 	return 0;
3596 }
3597 
3598 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3599 				  struct kvm_sregs *sregs)
3600 {
3601 	vcpu_load(vcpu);
3602 
3603 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3604 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3605 
3606 	vcpu_put(vcpu);
3607 	return 0;
3608 }
3609 
3610 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3611 				  struct kvm_sregs *sregs)
3612 {
3613 	vcpu_load(vcpu);
3614 
3615 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3616 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3617 
3618 	vcpu_put(vcpu);
3619 	return 0;
3620 }
3621 
3622 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3623 {
3624 	int ret = 0;
3625 
3626 	vcpu_load(vcpu);
3627 
3628 	if (test_fp_ctl(fpu->fpc)) {
3629 		ret = -EINVAL;
3630 		goto out;
3631 	}
3632 	vcpu->run->s.regs.fpc = fpu->fpc;
3633 	if (MACHINE_HAS_VX)
3634 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3635 				 (freg_t *) fpu->fprs);
3636 	else
3637 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3638 
3639 out:
3640 	vcpu_put(vcpu);
3641 	return ret;
3642 }
3643 
3644 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3645 {
3646 	vcpu_load(vcpu);
3647 
3648 	/* make sure we have the latest values */
3649 	save_fpu_regs();
3650 	if (MACHINE_HAS_VX)
3651 		convert_vx_to_fp((freg_t *) fpu->fprs,
3652 				 (__vector128 *) vcpu->run->s.regs.vrs);
3653 	else
3654 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3655 	fpu->fpc = vcpu->run->s.regs.fpc;
3656 
3657 	vcpu_put(vcpu);
3658 	return 0;
3659 }
3660 
3661 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3662 {
3663 	int rc = 0;
3664 
3665 	if (!is_vcpu_stopped(vcpu))
3666 		rc = -EBUSY;
3667 	else {
3668 		vcpu->run->psw_mask = psw.mask;
3669 		vcpu->run->psw_addr = psw.addr;
3670 	}
3671 	return rc;
3672 }
3673 
3674 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3675 				  struct kvm_translation *tr)
3676 {
3677 	return -EINVAL; /* not implemented yet */
3678 }
3679 
3680 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3681 			      KVM_GUESTDBG_USE_HW_BP | \
3682 			      KVM_GUESTDBG_ENABLE)
3683 
3684 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3685 					struct kvm_guest_debug *dbg)
3686 {
3687 	int rc = 0;
3688 
3689 	vcpu_load(vcpu);
3690 
3691 	vcpu->guest_debug = 0;
3692 	kvm_s390_clear_bp_data(vcpu);
3693 
3694 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3695 		rc = -EINVAL;
3696 		goto out;
3697 	}
3698 	if (!sclp.has_gpere) {
3699 		rc = -EINVAL;
3700 		goto out;
3701 	}
3702 
3703 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3704 		vcpu->guest_debug = dbg->control;
3705 		/* enforce guest PER */
3706 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3707 
3708 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3709 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3710 	} else {
3711 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3712 		vcpu->arch.guestdbg.last_bp = 0;
3713 	}
3714 
3715 	if (rc) {
3716 		vcpu->guest_debug = 0;
3717 		kvm_s390_clear_bp_data(vcpu);
3718 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3719 	}
3720 
3721 out:
3722 	vcpu_put(vcpu);
3723 	return rc;
3724 }
3725 
3726 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3727 				    struct kvm_mp_state *mp_state)
3728 {
3729 	int ret;
3730 
3731 	vcpu_load(vcpu);
3732 
3733 	/* CHECK_STOP and LOAD are not supported yet */
3734 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3735 				      KVM_MP_STATE_OPERATING;
3736 
3737 	vcpu_put(vcpu);
3738 	return ret;
3739 }
3740 
3741 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3742 				    struct kvm_mp_state *mp_state)
3743 {
3744 	int rc = 0;
3745 
3746 	vcpu_load(vcpu);
3747 
3748 	/* user space knows about this interface - let it control the state */
3749 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3750 
3751 	switch (mp_state->mp_state) {
3752 	case KVM_MP_STATE_STOPPED:
3753 		rc = kvm_s390_vcpu_stop(vcpu);
3754 		break;
3755 	case KVM_MP_STATE_OPERATING:
3756 		rc = kvm_s390_vcpu_start(vcpu);
3757 		break;
3758 	case KVM_MP_STATE_LOAD:
3759 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3760 			rc = -ENXIO;
3761 			break;
3762 		}
3763 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3764 		break;
3765 	case KVM_MP_STATE_CHECK_STOP:
3766 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3767 	default:
3768 		rc = -ENXIO;
3769 	}
3770 
3771 	vcpu_put(vcpu);
3772 	return rc;
3773 }
3774 
3775 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3776 {
3777 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3778 }
3779 
3780 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3781 {
3782 retry:
3783 	kvm_s390_vcpu_request_handled(vcpu);
3784 	if (!kvm_request_pending(vcpu))
3785 		return 0;
3786 	/*
3787 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3788 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3789 	 * This ensures that the ipte instruction for this request has
3790 	 * already finished. We might race against a second unmapper that
3791 	 * wants to set the blocking bit. Lets just retry the request loop.
3792 	 */
3793 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3794 		int rc;
3795 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3796 					  kvm_s390_get_prefix(vcpu),
3797 					  PAGE_SIZE * 2, PROT_WRITE);
3798 		if (rc) {
3799 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3800 			return rc;
3801 		}
3802 		goto retry;
3803 	}
3804 
3805 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3806 		vcpu->arch.sie_block->ihcpu = 0xffff;
3807 		goto retry;
3808 	}
3809 
3810 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3811 		if (!ibs_enabled(vcpu)) {
3812 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3813 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3814 		}
3815 		goto retry;
3816 	}
3817 
3818 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3819 		if (ibs_enabled(vcpu)) {
3820 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3821 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3822 		}
3823 		goto retry;
3824 	}
3825 
3826 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3827 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3828 		goto retry;
3829 	}
3830 
3831 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3832 		/*
3833 		 * Disable CMM virtualization; we will emulate the ESSA
3834 		 * instruction manually, in order to provide additional
3835 		 * functionalities needed for live migration.
3836 		 */
3837 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3838 		goto retry;
3839 	}
3840 
3841 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3842 		/*
3843 		 * Re-enable CMM virtualization if CMMA is available and
3844 		 * CMM has been used.
3845 		 */
3846 		if ((vcpu->kvm->arch.use_cmma) &&
3847 		    (vcpu->kvm->mm->context.uses_cmm))
3848 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3849 		goto retry;
3850 	}
3851 
3852 	/* nothing to do, just clear the request */
3853 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3854 	/* we left the vsie handler, nothing to do, just clear the request */
3855 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3856 
3857 	return 0;
3858 }
3859 
3860 void kvm_s390_set_tod_clock(struct kvm *kvm,
3861 			    const struct kvm_s390_vm_tod_clock *gtod)
3862 {
3863 	struct kvm_vcpu *vcpu;
3864 	union tod_clock clk;
3865 	int i;
3866 
3867 	mutex_lock(&kvm->lock);
3868 	preempt_disable();
3869 
3870 	store_tod_clock_ext(&clk);
3871 
3872 	kvm->arch.epoch = gtod->tod - clk.tod;
3873 	kvm->arch.epdx = 0;
3874 	if (test_kvm_facility(kvm, 139)) {
3875 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3876 		if (kvm->arch.epoch > gtod->tod)
3877 			kvm->arch.epdx -= 1;
3878 	}
3879 
3880 	kvm_s390_vcpu_block_all(kvm);
3881 	kvm_for_each_vcpu(i, vcpu, kvm) {
3882 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3883 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3884 	}
3885 
3886 	kvm_s390_vcpu_unblock_all(kvm);
3887 	preempt_enable();
3888 	mutex_unlock(&kvm->lock);
3889 }
3890 
3891 /**
3892  * kvm_arch_fault_in_page - fault-in guest page if necessary
3893  * @vcpu: The corresponding virtual cpu
3894  * @gpa: Guest physical address
3895  * @writable: Whether the page should be writable or not
3896  *
3897  * Make sure that a guest page has been faulted-in on the host.
3898  *
3899  * Return: Zero on success, negative error code otherwise.
3900  */
3901 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3902 {
3903 	return gmap_fault(vcpu->arch.gmap, gpa,
3904 			  writable ? FAULT_FLAG_WRITE : 0);
3905 }
3906 
3907 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3908 				      unsigned long token)
3909 {
3910 	struct kvm_s390_interrupt inti;
3911 	struct kvm_s390_irq irq;
3912 
3913 	if (start_token) {
3914 		irq.u.ext.ext_params2 = token;
3915 		irq.type = KVM_S390_INT_PFAULT_INIT;
3916 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3917 	} else {
3918 		inti.type = KVM_S390_INT_PFAULT_DONE;
3919 		inti.parm64 = token;
3920 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3921 	}
3922 }
3923 
3924 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3925 				     struct kvm_async_pf *work)
3926 {
3927 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3928 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3929 
3930 	return true;
3931 }
3932 
3933 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3934 				 struct kvm_async_pf *work)
3935 {
3936 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3937 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3938 }
3939 
3940 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3941 			       struct kvm_async_pf *work)
3942 {
3943 	/* s390 will always inject the page directly */
3944 }
3945 
3946 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3947 {
3948 	/*
3949 	 * s390 will always inject the page directly,
3950 	 * but we still want check_async_completion to cleanup
3951 	 */
3952 	return true;
3953 }
3954 
3955 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3956 {
3957 	hva_t hva;
3958 	struct kvm_arch_async_pf arch;
3959 
3960 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3961 		return false;
3962 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3963 	    vcpu->arch.pfault_compare)
3964 		return false;
3965 	if (psw_extint_disabled(vcpu))
3966 		return false;
3967 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3968 		return false;
3969 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3970 		return false;
3971 	if (!vcpu->arch.gmap->pfault_enabled)
3972 		return false;
3973 
3974 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3975 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3976 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3977 		return false;
3978 
3979 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3980 }
3981 
3982 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3983 {
3984 	int rc, cpuflags;
3985 
3986 	/*
3987 	 * On s390 notifications for arriving pages will be delivered directly
3988 	 * to the guest but the house keeping for completed pfaults is
3989 	 * handled outside the worker.
3990 	 */
3991 	kvm_check_async_pf_completion(vcpu);
3992 
3993 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3994 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3995 
3996 	if (need_resched())
3997 		schedule();
3998 
3999 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4000 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4001 		if (rc)
4002 			return rc;
4003 	}
4004 
4005 	rc = kvm_s390_handle_requests(vcpu);
4006 	if (rc)
4007 		return rc;
4008 
4009 	if (guestdbg_enabled(vcpu)) {
4010 		kvm_s390_backup_guest_per_regs(vcpu);
4011 		kvm_s390_patch_guest_per_regs(vcpu);
4012 	}
4013 
4014 	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4015 
4016 	vcpu->arch.sie_block->icptcode = 0;
4017 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4018 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4019 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4020 
4021 	return 0;
4022 }
4023 
4024 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4025 {
4026 	struct kvm_s390_pgm_info pgm_info = {
4027 		.code = PGM_ADDRESSING,
4028 	};
4029 	u8 opcode, ilen;
4030 	int rc;
4031 
4032 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4033 	trace_kvm_s390_sie_fault(vcpu);
4034 
4035 	/*
4036 	 * We want to inject an addressing exception, which is defined as a
4037 	 * suppressing or terminating exception. However, since we came here
4038 	 * by a DAT access exception, the PSW still points to the faulting
4039 	 * instruction since DAT exceptions are nullifying. So we've got
4040 	 * to look up the current opcode to get the length of the instruction
4041 	 * to be able to forward the PSW.
4042 	 */
4043 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4044 	ilen = insn_length(opcode);
4045 	if (rc < 0) {
4046 		return rc;
4047 	} else if (rc) {
4048 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4049 		 * Forward by arbitrary ilc, injection will take care of
4050 		 * nullification if necessary.
4051 		 */
4052 		pgm_info = vcpu->arch.pgm;
4053 		ilen = 4;
4054 	}
4055 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4056 	kvm_s390_forward_psw(vcpu, ilen);
4057 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4058 }
4059 
4060 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4061 {
4062 	struct mcck_volatile_info *mcck_info;
4063 	struct sie_page *sie_page;
4064 
4065 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4066 		   vcpu->arch.sie_block->icptcode);
4067 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4068 
4069 	if (guestdbg_enabled(vcpu))
4070 		kvm_s390_restore_guest_per_regs(vcpu);
4071 
4072 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4073 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4074 
4075 	if (exit_reason == -EINTR) {
4076 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4077 		sie_page = container_of(vcpu->arch.sie_block,
4078 					struct sie_page, sie_block);
4079 		mcck_info = &sie_page->mcck_info;
4080 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4081 		return 0;
4082 	}
4083 
4084 	if (vcpu->arch.sie_block->icptcode > 0) {
4085 		int rc = kvm_handle_sie_intercept(vcpu);
4086 
4087 		if (rc != -EOPNOTSUPP)
4088 			return rc;
4089 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4090 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4091 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4092 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4093 		return -EREMOTE;
4094 	} else if (exit_reason != -EFAULT) {
4095 		vcpu->stat.exit_null++;
4096 		return 0;
4097 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4098 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4099 		vcpu->run->s390_ucontrol.trans_exc_code =
4100 						current->thread.gmap_addr;
4101 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4102 		return -EREMOTE;
4103 	} else if (current->thread.gmap_pfault) {
4104 		trace_kvm_s390_major_guest_pfault(vcpu);
4105 		current->thread.gmap_pfault = 0;
4106 		if (kvm_arch_setup_async_pf(vcpu))
4107 			return 0;
4108 		vcpu->stat.pfault_sync++;
4109 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4110 	}
4111 	return vcpu_post_run_fault_in_sie(vcpu);
4112 }
4113 
4114 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4115 static int __vcpu_run(struct kvm_vcpu *vcpu)
4116 {
4117 	int rc, exit_reason;
4118 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4119 
4120 	/*
4121 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4122 	 * ning the guest), so that memslots (and other stuff) are protected
4123 	 */
4124 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4125 
4126 	do {
4127 		rc = vcpu_pre_run(vcpu);
4128 		if (rc)
4129 			break;
4130 
4131 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4132 		/*
4133 		 * As PF_VCPU will be used in fault handler, between
4134 		 * guest_enter and guest_exit should be no uaccess.
4135 		 */
4136 		local_irq_disable();
4137 		guest_enter_irqoff();
4138 		__disable_cpu_timer_accounting(vcpu);
4139 		local_irq_enable();
4140 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4141 			memcpy(sie_page->pv_grregs,
4142 			       vcpu->run->s.regs.gprs,
4143 			       sizeof(sie_page->pv_grregs));
4144 		}
4145 		if (test_cpu_flag(CIF_FPU))
4146 			load_fpu_regs();
4147 		exit_reason = sie64a(vcpu->arch.sie_block,
4148 				     vcpu->run->s.regs.gprs);
4149 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4150 			memcpy(vcpu->run->s.regs.gprs,
4151 			       sie_page->pv_grregs,
4152 			       sizeof(sie_page->pv_grregs));
4153 			/*
4154 			 * We're not allowed to inject interrupts on intercepts
4155 			 * that leave the guest state in an "in-between" state
4156 			 * where the next SIE entry will do a continuation.
4157 			 * Fence interrupts in our "internal" PSW.
4158 			 */
4159 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4160 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4161 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4162 			}
4163 		}
4164 		local_irq_disable();
4165 		__enable_cpu_timer_accounting(vcpu);
4166 		guest_exit_irqoff();
4167 		local_irq_enable();
4168 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4169 
4170 		rc = vcpu_post_run(vcpu, exit_reason);
4171 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4172 
4173 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4174 	return rc;
4175 }
4176 
4177 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4178 {
4179 	struct kvm_run *kvm_run = vcpu->run;
4180 	struct runtime_instr_cb *riccb;
4181 	struct gs_cb *gscb;
4182 
4183 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4184 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4185 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4186 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4187 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4188 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4189 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4190 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4191 	}
4192 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4193 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4194 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4195 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4196 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4197 			kvm_clear_async_pf_completion_queue(vcpu);
4198 	}
4199 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4200 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4201 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4202 	}
4203 	/*
4204 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4205 	 * we should enable RI here instead of doing the lazy enablement.
4206 	 */
4207 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4208 	    test_kvm_facility(vcpu->kvm, 64) &&
4209 	    riccb->v &&
4210 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4211 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4212 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4213 	}
4214 	/*
4215 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4216 	 * we should enable GS here instead of doing the lazy enablement.
4217 	 */
4218 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4219 	    test_kvm_facility(vcpu->kvm, 133) &&
4220 	    gscb->gssm &&
4221 	    !vcpu->arch.gs_enabled) {
4222 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4223 		vcpu->arch.sie_block->ecb |= ECB_GS;
4224 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4225 		vcpu->arch.gs_enabled = 1;
4226 	}
4227 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4228 	    test_kvm_facility(vcpu->kvm, 82)) {
4229 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4230 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4231 	}
4232 	if (MACHINE_HAS_GS) {
4233 		preempt_disable();
4234 		__ctl_set_bit(2, 4);
4235 		if (current->thread.gs_cb) {
4236 			vcpu->arch.host_gscb = current->thread.gs_cb;
4237 			save_gs_cb(vcpu->arch.host_gscb);
4238 		}
4239 		if (vcpu->arch.gs_enabled) {
4240 			current->thread.gs_cb = (struct gs_cb *)
4241 						&vcpu->run->s.regs.gscb;
4242 			restore_gs_cb(current->thread.gs_cb);
4243 		}
4244 		preempt_enable();
4245 	}
4246 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4247 }
4248 
4249 static void sync_regs(struct kvm_vcpu *vcpu)
4250 {
4251 	struct kvm_run *kvm_run = vcpu->run;
4252 
4253 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4254 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4255 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4256 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4257 		/* some control register changes require a tlb flush */
4258 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4259 	}
4260 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4261 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4262 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4263 	}
4264 	save_access_regs(vcpu->arch.host_acrs);
4265 	restore_access_regs(vcpu->run->s.regs.acrs);
4266 	/* save host (userspace) fprs/vrs */
4267 	save_fpu_regs();
4268 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4269 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4270 	if (MACHINE_HAS_VX)
4271 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4272 	else
4273 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4274 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4275 	if (test_fp_ctl(current->thread.fpu.fpc))
4276 		/* User space provided an invalid FPC, let's clear it */
4277 		current->thread.fpu.fpc = 0;
4278 
4279 	/* Sync fmt2 only data */
4280 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4281 		sync_regs_fmt2(vcpu);
4282 	} else {
4283 		/*
4284 		 * In several places we have to modify our internal view to
4285 		 * not do things that are disallowed by the ultravisor. For
4286 		 * example we must not inject interrupts after specific exits
4287 		 * (e.g. 112 prefix page not secure). We do this by turning
4288 		 * off the machine check, external and I/O interrupt bits
4289 		 * of our PSW copy. To avoid getting validity intercepts, we
4290 		 * do only accept the condition code from userspace.
4291 		 */
4292 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4293 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4294 						   PSW_MASK_CC;
4295 	}
4296 
4297 	kvm_run->kvm_dirty_regs = 0;
4298 }
4299 
4300 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4301 {
4302 	struct kvm_run *kvm_run = vcpu->run;
4303 
4304 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4305 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4306 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4307 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4308 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4309 	if (MACHINE_HAS_GS) {
4310 		__ctl_set_bit(2, 4);
4311 		if (vcpu->arch.gs_enabled)
4312 			save_gs_cb(current->thread.gs_cb);
4313 		preempt_disable();
4314 		current->thread.gs_cb = vcpu->arch.host_gscb;
4315 		restore_gs_cb(vcpu->arch.host_gscb);
4316 		preempt_enable();
4317 		if (!vcpu->arch.host_gscb)
4318 			__ctl_clear_bit(2, 4);
4319 		vcpu->arch.host_gscb = NULL;
4320 	}
4321 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4322 }
4323 
4324 static void store_regs(struct kvm_vcpu *vcpu)
4325 {
4326 	struct kvm_run *kvm_run = vcpu->run;
4327 
4328 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4329 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4330 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4331 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4332 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4333 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4334 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4335 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4336 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4337 	save_access_regs(vcpu->run->s.regs.acrs);
4338 	restore_access_regs(vcpu->arch.host_acrs);
4339 	/* Save guest register state */
4340 	save_fpu_regs();
4341 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4342 	/* Restore will be done lazily at return */
4343 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4344 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4345 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4346 		store_regs_fmt2(vcpu);
4347 }
4348 
4349 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4350 {
4351 	struct kvm_run *kvm_run = vcpu->run;
4352 	int rc;
4353 
4354 	if (kvm_run->immediate_exit)
4355 		return -EINTR;
4356 
4357 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4358 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4359 		return -EINVAL;
4360 
4361 	vcpu_load(vcpu);
4362 
4363 	if (guestdbg_exit_pending(vcpu)) {
4364 		kvm_s390_prepare_debug_exit(vcpu);
4365 		rc = 0;
4366 		goto out;
4367 	}
4368 
4369 	kvm_sigset_activate(vcpu);
4370 
4371 	/*
4372 	 * no need to check the return value of vcpu_start as it can only have
4373 	 * an error for protvirt, but protvirt means user cpu state
4374 	 */
4375 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4376 		kvm_s390_vcpu_start(vcpu);
4377 	} else if (is_vcpu_stopped(vcpu)) {
4378 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4379 				   vcpu->vcpu_id);
4380 		rc = -EINVAL;
4381 		goto out;
4382 	}
4383 
4384 	sync_regs(vcpu);
4385 	enable_cpu_timer_accounting(vcpu);
4386 
4387 	might_fault();
4388 	rc = __vcpu_run(vcpu);
4389 
4390 	if (signal_pending(current) && !rc) {
4391 		kvm_run->exit_reason = KVM_EXIT_INTR;
4392 		rc = -EINTR;
4393 	}
4394 
4395 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4396 		kvm_s390_prepare_debug_exit(vcpu);
4397 		rc = 0;
4398 	}
4399 
4400 	if (rc == -EREMOTE) {
4401 		/* userspace support is needed, kvm_run has been prepared */
4402 		rc = 0;
4403 	}
4404 
4405 	disable_cpu_timer_accounting(vcpu);
4406 	store_regs(vcpu);
4407 
4408 	kvm_sigset_deactivate(vcpu);
4409 
4410 	vcpu->stat.exit_userspace++;
4411 out:
4412 	vcpu_put(vcpu);
4413 	return rc;
4414 }
4415 
4416 /*
4417  * store status at address
4418  * we use have two special cases:
4419  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4420  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4421  */
4422 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4423 {
4424 	unsigned char archmode = 1;
4425 	freg_t fprs[NUM_FPRS];
4426 	unsigned int px;
4427 	u64 clkcomp, cputm;
4428 	int rc;
4429 
4430 	px = kvm_s390_get_prefix(vcpu);
4431 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4432 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4433 			return -EFAULT;
4434 		gpa = 0;
4435 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4436 		if (write_guest_real(vcpu, 163, &archmode, 1))
4437 			return -EFAULT;
4438 		gpa = px;
4439 	} else
4440 		gpa -= __LC_FPREGS_SAVE_AREA;
4441 
4442 	/* manually convert vector registers if necessary */
4443 	if (MACHINE_HAS_VX) {
4444 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4445 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4446 				     fprs, 128);
4447 	} else {
4448 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4449 				     vcpu->run->s.regs.fprs, 128);
4450 	}
4451 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4452 			      vcpu->run->s.regs.gprs, 128);
4453 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4454 			      &vcpu->arch.sie_block->gpsw, 16);
4455 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4456 			      &px, 4);
4457 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4458 			      &vcpu->run->s.regs.fpc, 4);
4459 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4460 			      &vcpu->arch.sie_block->todpr, 4);
4461 	cputm = kvm_s390_get_cpu_timer(vcpu);
4462 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4463 			      &cputm, 8);
4464 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4465 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4466 			      &clkcomp, 8);
4467 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4468 			      &vcpu->run->s.regs.acrs, 64);
4469 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4470 			      &vcpu->arch.sie_block->gcr, 128);
4471 	return rc ? -EFAULT : 0;
4472 }
4473 
4474 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4475 {
4476 	/*
4477 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4478 	 * switch in the run ioctl. Let's update our copies before we save
4479 	 * it into the save area
4480 	 */
4481 	save_fpu_regs();
4482 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4483 	save_access_regs(vcpu->run->s.regs.acrs);
4484 
4485 	return kvm_s390_store_status_unloaded(vcpu, addr);
4486 }
4487 
4488 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4489 {
4490 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4491 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4492 }
4493 
4494 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4495 {
4496 	unsigned int i;
4497 	struct kvm_vcpu *vcpu;
4498 
4499 	kvm_for_each_vcpu(i, vcpu, kvm) {
4500 		__disable_ibs_on_vcpu(vcpu);
4501 	}
4502 }
4503 
4504 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4505 {
4506 	if (!sclp.has_ibs)
4507 		return;
4508 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4509 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4510 }
4511 
4512 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4513 {
4514 	int i, online_vcpus, r = 0, started_vcpus = 0;
4515 
4516 	if (!is_vcpu_stopped(vcpu))
4517 		return 0;
4518 
4519 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4520 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4521 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4522 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4523 
4524 	/* Let's tell the UV that we want to change into the operating state */
4525 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4526 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4527 		if (r) {
4528 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4529 			return r;
4530 		}
4531 	}
4532 
4533 	for (i = 0; i < online_vcpus; i++) {
4534 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4535 			started_vcpus++;
4536 	}
4537 
4538 	if (started_vcpus == 0) {
4539 		/* we're the only active VCPU -> speed it up */
4540 		__enable_ibs_on_vcpu(vcpu);
4541 	} else if (started_vcpus == 1) {
4542 		/*
4543 		 * As we are starting a second VCPU, we have to disable
4544 		 * the IBS facility on all VCPUs to remove potentially
4545 		 * oustanding ENABLE requests.
4546 		 */
4547 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4548 	}
4549 
4550 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4551 	/*
4552 	 * The real PSW might have changed due to a RESTART interpreted by the
4553 	 * ultravisor. We block all interrupts and let the next sie exit
4554 	 * refresh our view.
4555 	 */
4556 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4557 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4558 	/*
4559 	 * Another VCPU might have used IBS while we were offline.
4560 	 * Let's play safe and flush the VCPU at startup.
4561 	 */
4562 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4563 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4564 	return 0;
4565 }
4566 
4567 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4568 {
4569 	int i, online_vcpus, r = 0, started_vcpus = 0;
4570 	struct kvm_vcpu *started_vcpu = NULL;
4571 
4572 	if (is_vcpu_stopped(vcpu))
4573 		return 0;
4574 
4575 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4576 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4577 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4578 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4579 
4580 	/* Let's tell the UV that we want to change into the stopped state */
4581 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4582 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4583 		if (r) {
4584 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4585 			return r;
4586 		}
4587 	}
4588 
4589 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4590 	kvm_s390_clear_stop_irq(vcpu);
4591 
4592 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4593 	__disable_ibs_on_vcpu(vcpu);
4594 
4595 	for (i = 0; i < online_vcpus; i++) {
4596 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4597 			started_vcpus++;
4598 			started_vcpu = vcpu->kvm->vcpus[i];
4599 		}
4600 	}
4601 
4602 	if (started_vcpus == 1) {
4603 		/*
4604 		 * As we only have one VCPU left, we want to enable the
4605 		 * IBS facility for that VCPU to speed it up.
4606 		 */
4607 		__enable_ibs_on_vcpu(started_vcpu);
4608 	}
4609 
4610 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4611 	return 0;
4612 }
4613 
4614 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4615 				     struct kvm_enable_cap *cap)
4616 {
4617 	int r;
4618 
4619 	if (cap->flags)
4620 		return -EINVAL;
4621 
4622 	switch (cap->cap) {
4623 	case KVM_CAP_S390_CSS_SUPPORT:
4624 		if (!vcpu->kvm->arch.css_support) {
4625 			vcpu->kvm->arch.css_support = 1;
4626 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4627 			trace_kvm_s390_enable_css(vcpu->kvm);
4628 		}
4629 		r = 0;
4630 		break;
4631 	default:
4632 		r = -EINVAL;
4633 		break;
4634 	}
4635 	return r;
4636 }
4637 
4638 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4639 				   struct kvm_s390_mem_op *mop)
4640 {
4641 	void __user *uaddr = (void __user *)mop->buf;
4642 	int r = 0;
4643 
4644 	if (mop->flags || !mop->size)
4645 		return -EINVAL;
4646 	if (mop->size + mop->sida_offset < mop->size)
4647 		return -EINVAL;
4648 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4649 		return -E2BIG;
4650 
4651 	switch (mop->op) {
4652 	case KVM_S390_MEMOP_SIDA_READ:
4653 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4654 				 mop->sida_offset), mop->size))
4655 			r = -EFAULT;
4656 
4657 		break;
4658 	case KVM_S390_MEMOP_SIDA_WRITE:
4659 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4660 				   mop->sida_offset), uaddr, mop->size))
4661 			r = -EFAULT;
4662 		break;
4663 	}
4664 	return r;
4665 }
4666 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4667 				  struct kvm_s390_mem_op *mop)
4668 {
4669 	void __user *uaddr = (void __user *)mop->buf;
4670 	void *tmpbuf = NULL;
4671 	int r = 0;
4672 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4673 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4674 
4675 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4676 		return -EINVAL;
4677 
4678 	if (mop->size > MEM_OP_MAX_SIZE)
4679 		return -E2BIG;
4680 
4681 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4682 		return -EINVAL;
4683 
4684 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4685 		tmpbuf = vmalloc(mop->size);
4686 		if (!tmpbuf)
4687 			return -ENOMEM;
4688 	}
4689 
4690 	switch (mop->op) {
4691 	case KVM_S390_MEMOP_LOGICAL_READ:
4692 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4693 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4694 					    mop->size, GACC_FETCH);
4695 			break;
4696 		}
4697 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4698 		if (r == 0) {
4699 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4700 				r = -EFAULT;
4701 		}
4702 		break;
4703 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4704 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4705 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4706 					    mop->size, GACC_STORE);
4707 			break;
4708 		}
4709 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4710 			r = -EFAULT;
4711 			break;
4712 		}
4713 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4714 		break;
4715 	}
4716 
4717 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4718 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4719 
4720 	vfree(tmpbuf);
4721 	return r;
4722 }
4723 
4724 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4725 				      struct kvm_s390_mem_op *mop)
4726 {
4727 	int r, srcu_idx;
4728 
4729 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4730 
4731 	switch (mop->op) {
4732 	case KVM_S390_MEMOP_LOGICAL_READ:
4733 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4734 		r = kvm_s390_guest_mem_op(vcpu, mop);
4735 		break;
4736 	case KVM_S390_MEMOP_SIDA_READ:
4737 	case KVM_S390_MEMOP_SIDA_WRITE:
4738 		/* we are locked against sida going away by the vcpu->mutex */
4739 		r = kvm_s390_guest_sida_op(vcpu, mop);
4740 		break;
4741 	default:
4742 		r = -EINVAL;
4743 	}
4744 
4745 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4746 	return r;
4747 }
4748 
4749 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4750 			       unsigned int ioctl, unsigned long arg)
4751 {
4752 	struct kvm_vcpu *vcpu = filp->private_data;
4753 	void __user *argp = (void __user *)arg;
4754 
4755 	switch (ioctl) {
4756 	case KVM_S390_IRQ: {
4757 		struct kvm_s390_irq s390irq;
4758 
4759 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4760 			return -EFAULT;
4761 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4762 	}
4763 	case KVM_S390_INTERRUPT: {
4764 		struct kvm_s390_interrupt s390int;
4765 		struct kvm_s390_irq s390irq = {};
4766 
4767 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4768 			return -EFAULT;
4769 		if (s390int_to_s390irq(&s390int, &s390irq))
4770 			return -EINVAL;
4771 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4772 	}
4773 	}
4774 	return -ENOIOCTLCMD;
4775 }
4776 
4777 long kvm_arch_vcpu_ioctl(struct file *filp,
4778 			 unsigned int ioctl, unsigned long arg)
4779 {
4780 	struct kvm_vcpu *vcpu = filp->private_data;
4781 	void __user *argp = (void __user *)arg;
4782 	int idx;
4783 	long r;
4784 	u16 rc, rrc;
4785 
4786 	vcpu_load(vcpu);
4787 
4788 	switch (ioctl) {
4789 	case KVM_S390_STORE_STATUS:
4790 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4791 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4792 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4793 		break;
4794 	case KVM_S390_SET_INITIAL_PSW: {
4795 		psw_t psw;
4796 
4797 		r = -EFAULT;
4798 		if (copy_from_user(&psw, argp, sizeof(psw)))
4799 			break;
4800 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4801 		break;
4802 	}
4803 	case KVM_S390_CLEAR_RESET:
4804 		r = 0;
4805 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4806 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4807 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4808 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4809 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4810 				   rc, rrc);
4811 		}
4812 		break;
4813 	case KVM_S390_INITIAL_RESET:
4814 		r = 0;
4815 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4816 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4817 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4818 					  UVC_CMD_CPU_RESET_INITIAL,
4819 					  &rc, &rrc);
4820 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4821 				   rc, rrc);
4822 		}
4823 		break;
4824 	case KVM_S390_NORMAL_RESET:
4825 		r = 0;
4826 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4827 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4828 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4829 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4830 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4831 				   rc, rrc);
4832 		}
4833 		break;
4834 	case KVM_SET_ONE_REG:
4835 	case KVM_GET_ONE_REG: {
4836 		struct kvm_one_reg reg;
4837 		r = -EINVAL;
4838 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4839 			break;
4840 		r = -EFAULT;
4841 		if (copy_from_user(&reg, argp, sizeof(reg)))
4842 			break;
4843 		if (ioctl == KVM_SET_ONE_REG)
4844 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4845 		else
4846 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4847 		break;
4848 	}
4849 #ifdef CONFIG_KVM_S390_UCONTROL
4850 	case KVM_S390_UCAS_MAP: {
4851 		struct kvm_s390_ucas_mapping ucasmap;
4852 
4853 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4854 			r = -EFAULT;
4855 			break;
4856 		}
4857 
4858 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4859 			r = -EINVAL;
4860 			break;
4861 		}
4862 
4863 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4864 				     ucasmap.vcpu_addr, ucasmap.length);
4865 		break;
4866 	}
4867 	case KVM_S390_UCAS_UNMAP: {
4868 		struct kvm_s390_ucas_mapping ucasmap;
4869 
4870 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4871 			r = -EFAULT;
4872 			break;
4873 		}
4874 
4875 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4876 			r = -EINVAL;
4877 			break;
4878 		}
4879 
4880 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4881 			ucasmap.length);
4882 		break;
4883 	}
4884 #endif
4885 	case KVM_S390_VCPU_FAULT: {
4886 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4887 		break;
4888 	}
4889 	case KVM_ENABLE_CAP:
4890 	{
4891 		struct kvm_enable_cap cap;
4892 		r = -EFAULT;
4893 		if (copy_from_user(&cap, argp, sizeof(cap)))
4894 			break;
4895 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4896 		break;
4897 	}
4898 	case KVM_S390_MEM_OP: {
4899 		struct kvm_s390_mem_op mem_op;
4900 
4901 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4902 			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4903 		else
4904 			r = -EFAULT;
4905 		break;
4906 	}
4907 	case KVM_S390_SET_IRQ_STATE: {
4908 		struct kvm_s390_irq_state irq_state;
4909 
4910 		r = -EFAULT;
4911 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4912 			break;
4913 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4914 		    irq_state.len == 0 ||
4915 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4916 			r = -EINVAL;
4917 			break;
4918 		}
4919 		/* do not use irq_state.flags, it will break old QEMUs */
4920 		r = kvm_s390_set_irq_state(vcpu,
4921 					   (void __user *) irq_state.buf,
4922 					   irq_state.len);
4923 		break;
4924 	}
4925 	case KVM_S390_GET_IRQ_STATE: {
4926 		struct kvm_s390_irq_state irq_state;
4927 
4928 		r = -EFAULT;
4929 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4930 			break;
4931 		if (irq_state.len == 0) {
4932 			r = -EINVAL;
4933 			break;
4934 		}
4935 		/* do not use irq_state.flags, it will break old QEMUs */
4936 		r = kvm_s390_get_irq_state(vcpu,
4937 					   (__u8 __user *)  irq_state.buf,
4938 					   irq_state.len);
4939 		break;
4940 	}
4941 	default:
4942 		r = -ENOTTY;
4943 	}
4944 
4945 	vcpu_put(vcpu);
4946 	return r;
4947 }
4948 
4949 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4950 {
4951 #ifdef CONFIG_KVM_S390_UCONTROL
4952 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4953 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4954 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4955 		get_page(vmf->page);
4956 		return 0;
4957 	}
4958 #endif
4959 	return VM_FAULT_SIGBUS;
4960 }
4961 
4962 /* Section: memory related */
4963 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4964 				   struct kvm_memory_slot *memslot,
4965 				   const struct kvm_userspace_memory_region *mem,
4966 				   enum kvm_mr_change change)
4967 {
4968 	/* A few sanity checks. We can have memory slots which have to be
4969 	   located/ended at a segment boundary (1MB). The memory in userland is
4970 	   ok to be fragmented into various different vmas. It is okay to mmap()
4971 	   and munmap() stuff in this slot after doing this call at any time */
4972 
4973 	if (mem->userspace_addr & 0xffffful)
4974 		return -EINVAL;
4975 
4976 	if (mem->memory_size & 0xffffful)
4977 		return -EINVAL;
4978 
4979 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4980 		return -EINVAL;
4981 
4982 	/* When we are protected, we should not change the memory slots */
4983 	if (kvm_s390_pv_get_handle(kvm))
4984 		return -EINVAL;
4985 	return 0;
4986 }
4987 
4988 void kvm_arch_commit_memory_region(struct kvm *kvm,
4989 				const struct kvm_userspace_memory_region *mem,
4990 				struct kvm_memory_slot *old,
4991 				const struct kvm_memory_slot *new,
4992 				enum kvm_mr_change change)
4993 {
4994 	int rc = 0;
4995 
4996 	switch (change) {
4997 	case KVM_MR_DELETE:
4998 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4999 					old->npages * PAGE_SIZE);
5000 		break;
5001 	case KVM_MR_MOVE:
5002 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5003 					old->npages * PAGE_SIZE);
5004 		if (rc)
5005 			break;
5006 		fallthrough;
5007 	case KVM_MR_CREATE:
5008 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5009 				      mem->guest_phys_addr, mem->memory_size);
5010 		break;
5011 	case KVM_MR_FLAGS_ONLY:
5012 		break;
5013 	default:
5014 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5015 	}
5016 	if (rc)
5017 		pr_warn("failed to commit memory region\n");
5018 	return;
5019 }
5020 
5021 static inline unsigned long nonhyp_mask(int i)
5022 {
5023 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5024 
5025 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5026 }
5027 
5028 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5029 {
5030 	vcpu->valid_wakeup = false;
5031 }
5032 
5033 static int __init kvm_s390_init(void)
5034 {
5035 	int i;
5036 
5037 	if (!sclp.has_sief2) {
5038 		pr_info("SIE is not available\n");
5039 		return -ENODEV;
5040 	}
5041 
5042 	if (nested && hpage) {
5043 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5044 		return -EINVAL;
5045 	}
5046 
5047 	for (i = 0; i < 16; i++)
5048 		kvm_s390_fac_base[i] |=
5049 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5050 
5051 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5052 }
5053 
5054 static void __exit kvm_s390_exit(void)
5055 {
5056 	kvm_exit();
5057 }
5058 
5059 module_init(kvm_s390_init);
5060 module_exit(kvm_s390_exit);
5061 
5062 /*
5063  * Enable autoloading of the kvm module.
5064  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5065  * since x86 takes a different approach.
5066  */
5067 #include <linux/miscdevice.h>
5068 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5069 MODULE_ALIAS("devname:kvm");
5070