xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision bef7a78d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50 
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54 
55 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
59 
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 	VCPU_STAT("userspace_handled", exit_userspace),
62 	VCPU_STAT("exit_null", exit_null),
63 	VCPU_STAT("pfault_sync", pfault_sync),
64 	VCPU_STAT("exit_validity", exit_validity),
65 	VCPU_STAT("exit_stop_request", exit_stop_request),
66 	VCPU_STAT("exit_external_request", exit_external_request),
67 	VCPU_STAT("exit_io_request", exit_io_request),
68 	VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
69 	VCPU_STAT("exit_instruction", exit_instruction),
70 	VCPU_STAT("exit_pei", exit_pei),
71 	VCPU_STAT("exit_program_interruption", exit_program_interruption),
72 	VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
73 	VCPU_STAT("exit_operation_exception", exit_operation_exception),
74 	VCPU_STAT("halt_successful_poll", halt_successful_poll),
75 	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
76 	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
77 	VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
78 	VCPU_STAT("halt_wakeup", halt_wakeup),
79 	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
80 	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
81 	VCPU_STAT("instruction_lctlg", instruction_lctlg),
82 	VCPU_STAT("instruction_lctl", instruction_lctl),
83 	VCPU_STAT("instruction_stctl", instruction_stctl),
84 	VCPU_STAT("instruction_stctg", instruction_stctg),
85 	VCPU_STAT("deliver_ckc", deliver_ckc),
86 	VCPU_STAT("deliver_cputm", deliver_cputm),
87 	VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
88 	VCPU_STAT("deliver_external_call", deliver_external_call),
89 	VCPU_STAT("deliver_service_signal", deliver_service_signal),
90 	VCPU_STAT("deliver_virtio", deliver_virtio),
91 	VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
92 	VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
93 	VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
94 	VCPU_STAT("deliver_program", deliver_program),
95 	VCPU_STAT("deliver_io", deliver_io),
96 	VCPU_STAT("deliver_machine_check", deliver_machine_check),
97 	VCPU_STAT("exit_wait_state", exit_wait_state),
98 	VCPU_STAT("inject_ckc", inject_ckc),
99 	VCPU_STAT("inject_cputm", inject_cputm),
100 	VCPU_STAT("inject_external_call", inject_external_call),
101 	VM_STAT("inject_float_mchk", inject_float_mchk),
102 	VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
103 	VM_STAT("inject_io", inject_io),
104 	VCPU_STAT("inject_mchk", inject_mchk),
105 	VM_STAT("inject_pfault_done", inject_pfault_done),
106 	VCPU_STAT("inject_program", inject_program),
107 	VCPU_STAT("inject_restart", inject_restart),
108 	VM_STAT("inject_service_signal", inject_service_signal),
109 	VCPU_STAT("inject_set_prefix", inject_set_prefix),
110 	VCPU_STAT("inject_stop_signal", inject_stop_signal),
111 	VCPU_STAT("inject_pfault_init", inject_pfault_init),
112 	VM_STAT("inject_virtio", inject_virtio),
113 	VCPU_STAT("instruction_epsw", instruction_epsw),
114 	VCPU_STAT("instruction_gs", instruction_gs),
115 	VCPU_STAT("instruction_io_other", instruction_io_other),
116 	VCPU_STAT("instruction_lpsw", instruction_lpsw),
117 	VCPU_STAT("instruction_lpswe", instruction_lpswe),
118 	VCPU_STAT("instruction_pfmf", instruction_pfmf),
119 	VCPU_STAT("instruction_ptff", instruction_ptff),
120 	VCPU_STAT("instruction_stidp", instruction_stidp),
121 	VCPU_STAT("instruction_sck", instruction_sck),
122 	VCPU_STAT("instruction_sckpf", instruction_sckpf),
123 	VCPU_STAT("instruction_spx", instruction_spx),
124 	VCPU_STAT("instruction_stpx", instruction_stpx),
125 	VCPU_STAT("instruction_stap", instruction_stap),
126 	VCPU_STAT("instruction_iske", instruction_iske),
127 	VCPU_STAT("instruction_ri", instruction_ri),
128 	VCPU_STAT("instruction_rrbe", instruction_rrbe),
129 	VCPU_STAT("instruction_sske", instruction_sske),
130 	VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
131 	VCPU_STAT("instruction_essa", instruction_essa),
132 	VCPU_STAT("instruction_stsi", instruction_stsi),
133 	VCPU_STAT("instruction_stfl", instruction_stfl),
134 	VCPU_STAT("instruction_tb", instruction_tb),
135 	VCPU_STAT("instruction_tpi", instruction_tpi),
136 	VCPU_STAT("instruction_tprot", instruction_tprot),
137 	VCPU_STAT("instruction_tsch", instruction_tsch),
138 	VCPU_STAT("instruction_sthyi", instruction_sthyi),
139 	VCPU_STAT("instruction_sie", instruction_sie),
140 	VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
141 	VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
142 	VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
143 	VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
144 	VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
145 	VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
146 	VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
147 	VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
148 	VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
149 	VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
150 	VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
151 	VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
152 	VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
153 	VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
154 	VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
155 	VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
156 	VCPU_STAT("instruction_diag_10", diagnose_10),
157 	VCPU_STAT("instruction_diag_44", diagnose_44),
158 	VCPU_STAT("instruction_diag_9c", diagnose_9c),
159 	VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
160 	VCPU_STAT("instruction_diag_258", diagnose_258),
161 	VCPU_STAT("instruction_diag_308", diagnose_308),
162 	VCPU_STAT("instruction_diag_500", diagnose_500),
163 	VCPU_STAT("instruction_diag_other", diagnose_other),
164 	{ NULL }
165 };
166 
167 struct kvm_s390_tod_clock_ext {
168 	__u8 epoch_idx;
169 	__u64 tod;
170 	__u8 reserved[7];
171 } __packed;
172 
173 /* allow nested virtualization in KVM (if enabled by user space) */
174 static int nested;
175 module_param(nested, int, S_IRUGO);
176 MODULE_PARM_DESC(nested, "Nested virtualization support");
177 
178 /* allow 1m huge page guest backing, if !nested */
179 static int hpage;
180 module_param(hpage, int, 0444);
181 MODULE_PARM_DESC(hpage, "1m huge page backing support");
182 
183 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
184 static u8 halt_poll_max_steal = 10;
185 module_param(halt_poll_max_steal, byte, 0644);
186 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
187 
188 /* if set to true, the GISA will be initialized and used if available */
189 static bool use_gisa  = true;
190 module_param(use_gisa, bool, 0644);
191 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
192 
193 /*
194  * For now we handle at most 16 double words as this is what the s390 base
195  * kernel handles and stores in the prefix page. If we ever need to go beyond
196  * this, this requires changes to code, but the external uapi can stay.
197  */
198 #define SIZE_INTERNAL 16
199 
200 /*
201  * Base feature mask that defines default mask for facilities. Consists of the
202  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
203  */
204 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
205 /*
206  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
207  * and defines the facilities that can be enabled via a cpu model.
208  */
209 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
210 
211 static unsigned long kvm_s390_fac_size(void)
212 {
213 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
214 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
215 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
216 		sizeof(S390_lowcore.stfle_fac_list));
217 
218 	return SIZE_INTERNAL;
219 }
220 
221 /* available cpu features supported by kvm */
222 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
223 /* available subfunctions indicated via query / "test bit" */
224 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
225 
226 static struct gmap_notifier gmap_notifier;
227 static struct gmap_notifier vsie_gmap_notifier;
228 debug_info_t *kvm_s390_dbf;
229 debug_info_t *kvm_s390_dbf_uv;
230 
231 /* Section: not file related */
232 int kvm_arch_hardware_enable(void)
233 {
234 	/* every s390 is virtualization enabled ;-) */
235 	return 0;
236 }
237 
238 int kvm_arch_check_processor_compat(void *opaque)
239 {
240 	return 0;
241 }
242 
243 /* forward declarations */
244 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
245 			      unsigned long end);
246 static int sca_switch_to_extended(struct kvm *kvm);
247 
248 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
249 {
250 	u8 delta_idx = 0;
251 
252 	/*
253 	 * The TOD jumps by delta, we have to compensate this by adding
254 	 * -delta to the epoch.
255 	 */
256 	delta = -delta;
257 
258 	/* sign-extension - we're adding to signed values below */
259 	if ((s64)delta < 0)
260 		delta_idx = -1;
261 
262 	scb->epoch += delta;
263 	if (scb->ecd & ECD_MEF) {
264 		scb->epdx += delta_idx;
265 		if (scb->epoch < delta)
266 			scb->epdx += 1;
267 	}
268 }
269 
270 /*
271  * This callback is executed during stop_machine(). All CPUs are therefore
272  * temporarily stopped. In order not to change guest behavior, we have to
273  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
274  * so a CPU won't be stopped while calculating with the epoch.
275  */
276 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
277 			  void *v)
278 {
279 	struct kvm *kvm;
280 	struct kvm_vcpu *vcpu;
281 	int i;
282 	unsigned long long *delta = v;
283 
284 	list_for_each_entry(kvm, &vm_list, vm_list) {
285 		kvm_for_each_vcpu(i, vcpu, kvm) {
286 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
287 			if (i == 0) {
288 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
289 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
290 			}
291 			if (vcpu->arch.cputm_enabled)
292 				vcpu->arch.cputm_start += *delta;
293 			if (vcpu->arch.vsie_block)
294 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
295 						   *delta);
296 		}
297 	}
298 	return NOTIFY_OK;
299 }
300 
301 static struct notifier_block kvm_clock_notifier = {
302 	.notifier_call = kvm_clock_sync,
303 };
304 
305 int kvm_arch_hardware_setup(void *opaque)
306 {
307 	gmap_notifier.notifier_call = kvm_gmap_notifier;
308 	gmap_register_pte_notifier(&gmap_notifier);
309 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
310 	gmap_register_pte_notifier(&vsie_gmap_notifier);
311 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
312 				       &kvm_clock_notifier);
313 	return 0;
314 }
315 
316 void kvm_arch_hardware_unsetup(void)
317 {
318 	gmap_unregister_pte_notifier(&gmap_notifier);
319 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
320 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
321 					 &kvm_clock_notifier);
322 }
323 
324 static void allow_cpu_feat(unsigned long nr)
325 {
326 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
327 }
328 
329 static inline int plo_test_bit(unsigned char nr)
330 {
331 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
332 	int cc;
333 
334 	asm volatile(
335 		/* Parameter registers are ignored for "test bit" */
336 		"	plo	0,0,0,0(0)\n"
337 		"	ipm	%0\n"
338 		"	srl	%0,28\n"
339 		: "=d" (cc)
340 		: "d" (r0)
341 		: "cc");
342 	return cc == 0;
343 }
344 
345 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
346 {
347 	register unsigned long r0 asm("0") = 0;	/* query function */
348 	register unsigned long r1 asm("1") = (unsigned long) query;
349 
350 	asm volatile(
351 		/* Parameter regs are ignored */
352 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
353 		:
354 		: "d" (r0), "a" (r1), [opc] "i" (opcode)
355 		: "cc", "memory");
356 }
357 
358 #define INSN_SORTL 0xb938
359 #define INSN_DFLTCC 0xb939
360 
361 static void kvm_s390_cpu_feat_init(void)
362 {
363 	int i;
364 
365 	for (i = 0; i < 256; ++i) {
366 		if (plo_test_bit(i))
367 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
368 	}
369 
370 	if (test_facility(28)) /* TOD-clock steering */
371 		ptff(kvm_s390_available_subfunc.ptff,
372 		     sizeof(kvm_s390_available_subfunc.ptff),
373 		     PTFF_QAF);
374 
375 	if (test_facility(17)) { /* MSA */
376 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
377 			      kvm_s390_available_subfunc.kmac);
378 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
379 			      kvm_s390_available_subfunc.kmc);
380 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
381 			      kvm_s390_available_subfunc.km);
382 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
383 			      kvm_s390_available_subfunc.kimd);
384 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
385 			      kvm_s390_available_subfunc.klmd);
386 	}
387 	if (test_facility(76)) /* MSA3 */
388 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
389 			      kvm_s390_available_subfunc.pckmo);
390 	if (test_facility(77)) { /* MSA4 */
391 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
392 			      kvm_s390_available_subfunc.kmctr);
393 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
394 			      kvm_s390_available_subfunc.kmf);
395 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
396 			      kvm_s390_available_subfunc.kmo);
397 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
398 			      kvm_s390_available_subfunc.pcc);
399 	}
400 	if (test_facility(57)) /* MSA5 */
401 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
402 			      kvm_s390_available_subfunc.ppno);
403 
404 	if (test_facility(146)) /* MSA8 */
405 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
406 			      kvm_s390_available_subfunc.kma);
407 
408 	if (test_facility(155)) /* MSA9 */
409 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
410 			      kvm_s390_available_subfunc.kdsa);
411 
412 	if (test_facility(150)) /* SORTL */
413 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
414 
415 	if (test_facility(151)) /* DFLTCC */
416 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
417 
418 	if (MACHINE_HAS_ESOP)
419 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
420 	/*
421 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
422 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
423 	 */
424 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
425 	    !test_facility(3) || !nested)
426 		return;
427 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
428 	if (sclp.has_64bscao)
429 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
430 	if (sclp.has_siif)
431 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
432 	if (sclp.has_gpere)
433 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
434 	if (sclp.has_gsls)
435 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
436 	if (sclp.has_ib)
437 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
438 	if (sclp.has_cei)
439 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
440 	if (sclp.has_ibs)
441 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
442 	if (sclp.has_kss)
443 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
444 	/*
445 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
446 	 * all skey handling functions read/set the skey from the PGSTE
447 	 * instead of the real storage key.
448 	 *
449 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
450 	 * pages being detected as preserved although they are resident.
451 	 *
452 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
453 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
454 	 *
455 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
456 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
457 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
458 	 *
459 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
460 	 * cannot easily shadow the SCA because of the ipte lock.
461 	 */
462 }
463 
464 int kvm_arch_init(void *opaque)
465 {
466 	int rc = -ENOMEM;
467 
468 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
469 	if (!kvm_s390_dbf)
470 		return -ENOMEM;
471 
472 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
473 	if (!kvm_s390_dbf_uv)
474 		goto out;
475 
476 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
477 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
478 		goto out;
479 
480 	kvm_s390_cpu_feat_init();
481 
482 	/* Register floating interrupt controller interface. */
483 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
484 	if (rc) {
485 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
486 		goto out;
487 	}
488 
489 	rc = kvm_s390_gib_init(GAL_ISC);
490 	if (rc)
491 		goto out;
492 
493 	return 0;
494 
495 out:
496 	kvm_arch_exit();
497 	return rc;
498 }
499 
500 void kvm_arch_exit(void)
501 {
502 	kvm_s390_gib_destroy();
503 	debug_unregister(kvm_s390_dbf);
504 	debug_unregister(kvm_s390_dbf_uv);
505 }
506 
507 /* Section: device related */
508 long kvm_arch_dev_ioctl(struct file *filp,
509 			unsigned int ioctl, unsigned long arg)
510 {
511 	if (ioctl == KVM_S390_ENABLE_SIE)
512 		return s390_enable_sie();
513 	return -EINVAL;
514 }
515 
516 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
517 {
518 	int r;
519 
520 	switch (ext) {
521 	case KVM_CAP_S390_PSW:
522 	case KVM_CAP_S390_GMAP:
523 	case KVM_CAP_SYNC_MMU:
524 #ifdef CONFIG_KVM_S390_UCONTROL
525 	case KVM_CAP_S390_UCONTROL:
526 #endif
527 	case KVM_CAP_ASYNC_PF:
528 	case KVM_CAP_SYNC_REGS:
529 	case KVM_CAP_ONE_REG:
530 	case KVM_CAP_ENABLE_CAP:
531 	case KVM_CAP_S390_CSS_SUPPORT:
532 	case KVM_CAP_IOEVENTFD:
533 	case KVM_CAP_DEVICE_CTRL:
534 	case KVM_CAP_S390_IRQCHIP:
535 	case KVM_CAP_VM_ATTRIBUTES:
536 	case KVM_CAP_MP_STATE:
537 	case KVM_CAP_IMMEDIATE_EXIT:
538 	case KVM_CAP_S390_INJECT_IRQ:
539 	case KVM_CAP_S390_USER_SIGP:
540 	case KVM_CAP_S390_USER_STSI:
541 	case KVM_CAP_S390_SKEYS:
542 	case KVM_CAP_S390_IRQ_STATE:
543 	case KVM_CAP_S390_USER_INSTR0:
544 	case KVM_CAP_S390_CMMA_MIGRATION:
545 	case KVM_CAP_S390_AIS:
546 	case KVM_CAP_S390_AIS_MIGRATION:
547 	case KVM_CAP_S390_VCPU_RESETS:
548 	case KVM_CAP_SET_GUEST_DEBUG:
549 	case KVM_CAP_S390_DIAG318:
550 		r = 1;
551 		break;
552 	case KVM_CAP_S390_HPAGE_1M:
553 		r = 0;
554 		if (hpage && !kvm_is_ucontrol(kvm))
555 			r = 1;
556 		break;
557 	case KVM_CAP_S390_MEM_OP:
558 		r = MEM_OP_MAX_SIZE;
559 		break;
560 	case KVM_CAP_NR_VCPUS:
561 	case KVM_CAP_MAX_VCPUS:
562 	case KVM_CAP_MAX_VCPU_ID:
563 		r = KVM_S390_BSCA_CPU_SLOTS;
564 		if (!kvm_s390_use_sca_entries())
565 			r = KVM_MAX_VCPUS;
566 		else if (sclp.has_esca && sclp.has_64bscao)
567 			r = KVM_S390_ESCA_CPU_SLOTS;
568 		break;
569 	case KVM_CAP_S390_COW:
570 		r = MACHINE_HAS_ESOP;
571 		break;
572 	case KVM_CAP_S390_VECTOR_REGISTERS:
573 		r = MACHINE_HAS_VX;
574 		break;
575 	case KVM_CAP_S390_RI:
576 		r = test_facility(64);
577 		break;
578 	case KVM_CAP_S390_GS:
579 		r = test_facility(133);
580 		break;
581 	case KVM_CAP_S390_BPB:
582 		r = test_facility(82);
583 		break;
584 	case KVM_CAP_S390_PROTECTED:
585 		r = is_prot_virt_host();
586 		break;
587 	default:
588 		r = 0;
589 	}
590 	return r;
591 }
592 
593 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
594 {
595 	int i;
596 	gfn_t cur_gfn, last_gfn;
597 	unsigned long gaddr, vmaddr;
598 	struct gmap *gmap = kvm->arch.gmap;
599 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
600 
601 	/* Loop over all guest segments */
602 	cur_gfn = memslot->base_gfn;
603 	last_gfn = memslot->base_gfn + memslot->npages;
604 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
605 		gaddr = gfn_to_gpa(cur_gfn);
606 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
607 		if (kvm_is_error_hva(vmaddr))
608 			continue;
609 
610 		bitmap_zero(bitmap, _PAGE_ENTRIES);
611 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
612 		for (i = 0; i < _PAGE_ENTRIES; i++) {
613 			if (test_bit(i, bitmap))
614 				mark_page_dirty(kvm, cur_gfn + i);
615 		}
616 
617 		if (fatal_signal_pending(current))
618 			return;
619 		cond_resched();
620 	}
621 }
622 
623 /* Section: vm related */
624 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
625 
626 /*
627  * Get (and clear) the dirty memory log for a memory slot.
628  */
629 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
630 			       struct kvm_dirty_log *log)
631 {
632 	int r;
633 	unsigned long n;
634 	struct kvm_memory_slot *memslot;
635 	int is_dirty;
636 
637 	if (kvm_is_ucontrol(kvm))
638 		return -EINVAL;
639 
640 	mutex_lock(&kvm->slots_lock);
641 
642 	r = -EINVAL;
643 	if (log->slot >= KVM_USER_MEM_SLOTS)
644 		goto out;
645 
646 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
647 	if (r)
648 		goto out;
649 
650 	/* Clear the dirty log */
651 	if (is_dirty) {
652 		n = kvm_dirty_bitmap_bytes(memslot);
653 		memset(memslot->dirty_bitmap, 0, n);
654 	}
655 	r = 0;
656 out:
657 	mutex_unlock(&kvm->slots_lock);
658 	return r;
659 }
660 
661 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
662 {
663 	unsigned int i;
664 	struct kvm_vcpu *vcpu;
665 
666 	kvm_for_each_vcpu(i, vcpu, kvm) {
667 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
668 	}
669 }
670 
671 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
672 {
673 	int r;
674 
675 	if (cap->flags)
676 		return -EINVAL;
677 
678 	switch (cap->cap) {
679 	case KVM_CAP_S390_IRQCHIP:
680 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
681 		kvm->arch.use_irqchip = 1;
682 		r = 0;
683 		break;
684 	case KVM_CAP_S390_USER_SIGP:
685 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
686 		kvm->arch.user_sigp = 1;
687 		r = 0;
688 		break;
689 	case KVM_CAP_S390_VECTOR_REGISTERS:
690 		mutex_lock(&kvm->lock);
691 		if (kvm->created_vcpus) {
692 			r = -EBUSY;
693 		} else if (MACHINE_HAS_VX) {
694 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
695 			set_kvm_facility(kvm->arch.model.fac_list, 129);
696 			if (test_facility(134)) {
697 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
698 				set_kvm_facility(kvm->arch.model.fac_list, 134);
699 			}
700 			if (test_facility(135)) {
701 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
702 				set_kvm_facility(kvm->arch.model.fac_list, 135);
703 			}
704 			if (test_facility(148)) {
705 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
706 				set_kvm_facility(kvm->arch.model.fac_list, 148);
707 			}
708 			if (test_facility(152)) {
709 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
710 				set_kvm_facility(kvm->arch.model.fac_list, 152);
711 			}
712 			r = 0;
713 		} else
714 			r = -EINVAL;
715 		mutex_unlock(&kvm->lock);
716 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
717 			 r ? "(not available)" : "(success)");
718 		break;
719 	case KVM_CAP_S390_RI:
720 		r = -EINVAL;
721 		mutex_lock(&kvm->lock);
722 		if (kvm->created_vcpus) {
723 			r = -EBUSY;
724 		} else if (test_facility(64)) {
725 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
726 			set_kvm_facility(kvm->arch.model.fac_list, 64);
727 			r = 0;
728 		}
729 		mutex_unlock(&kvm->lock);
730 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
731 			 r ? "(not available)" : "(success)");
732 		break;
733 	case KVM_CAP_S390_AIS:
734 		mutex_lock(&kvm->lock);
735 		if (kvm->created_vcpus) {
736 			r = -EBUSY;
737 		} else {
738 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
739 			set_kvm_facility(kvm->arch.model.fac_list, 72);
740 			r = 0;
741 		}
742 		mutex_unlock(&kvm->lock);
743 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
744 			 r ? "(not available)" : "(success)");
745 		break;
746 	case KVM_CAP_S390_GS:
747 		r = -EINVAL;
748 		mutex_lock(&kvm->lock);
749 		if (kvm->created_vcpus) {
750 			r = -EBUSY;
751 		} else if (test_facility(133)) {
752 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
753 			set_kvm_facility(kvm->arch.model.fac_list, 133);
754 			r = 0;
755 		}
756 		mutex_unlock(&kvm->lock);
757 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
758 			 r ? "(not available)" : "(success)");
759 		break;
760 	case KVM_CAP_S390_HPAGE_1M:
761 		mutex_lock(&kvm->lock);
762 		if (kvm->created_vcpus)
763 			r = -EBUSY;
764 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
765 			r = -EINVAL;
766 		else {
767 			r = 0;
768 			mmap_write_lock(kvm->mm);
769 			kvm->mm->context.allow_gmap_hpage_1m = 1;
770 			mmap_write_unlock(kvm->mm);
771 			/*
772 			 * We might have to create fake 4k page
773 			 * tables. To avoid that the hardware works on
774 			 * stale PGSTEs, we emulate these instructions.
775 			 */
776 			kvm->arch.use_skf = 0;
777 			kvm->arch.use_pfmfi = 0;
778 		}
779 		mutex_unlock(&kvm->lock);
780 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
781 			 r ? "(not available)" : "(success)");
782 		break;
783 	case KVM_CAP_S390_USER_STSI:
784 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
785 		kvm->arch.user_stsi = 1;
786 		r = 0;
787 		break;
788 	case KVM_CAP_S390_USER_INSTR0:
789 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
790 		kvm->arch.user_instr0 = 1;
791 		icpt_operexc_on_all_vcpus(kvm);
792 		r = 0;
793 		break;
794 	default:
795 		r = -EINVAL;
796 		break;
797 	}
798 	return r;
799 }
800 
801 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
802 {
803 	int ret;
804 
805 	switch (attr->attr) {
806 	case KVM_S390_VM_MEM_LIMIT_SIZE:
807 		ret = 0;
808 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
809 			 kvm->arch.mem_limit);
810 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
811 			ret = -EFAULT;
812 		break;
813 	default:
814 		ret = -ENXIO;
815 		break;
816 	}
817 	return ret;
818 }
819 
820 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
821 {
822 	int ret;
823 	unsigned int idx;
824 	switch (attr->attr) {
825 	case KVM_S390_VM_MEM_ENABLE_CMMA:
826 		ret = -ENXIO;
827 		if (!sclp.has_cmma)
828 			break;
829 
830 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
831 		mutex_lock(&kvm->lock);
832 		if (kvm->created_vcpus)
833 			ret = -EBUSY;
834 		else if (kvm->mm->context.allow_gmap_hpage_1m)
835 			ret = -EINVAL;
836 		else {
837 			kvm->arch.use_cmma = 1;
838 			/* Not compatible with cmma. */
839 			kvm->arch.use_pfmfi = 0;
840 			ret = 0;
841 		}
842 		mutex_unlock(&kvm->lock);
843 		break;
844 	case KVM_S390_VM_MEM_CLR_CMMA:
845 		ret = -ENXIO;
846 		if (!sclp.has_cmma)
847 			break;
848 		ret = -EINVAL;
849 		if (!kvm->arch.use_cmma)
850 			break;
851 
852 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
853 		mutex_lock(&kvm->lock);
854 		idx = srcu_read_lock(&kvm->srcu);
855 		s390_reset_cmma(kvm->arch.gmap->mm);
856 		srcu_read_unlock(&kvm->srcu, idx);
857 		mutex_unlock(&kvm->lock);
858 		ret = 0;
859 		break;
860 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
861 		unsigned long new_limit;
862 
863 		if (kvm_is_ucontrol(kvm))
864 			return -EINVAL;
865 
866 		if (get_user(new_limit, (u64 __user *)attr->addr))
867 			return -EFAULT;
868 
869 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
870 		    new_limit > kvm->arch.mem_limit)
871 			return -E2BIG;
872 
873 		if (!new_limit)
874 			return -EINVAL;
875 
876 		/* gmap_create takes last usable address */
877 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
878 			new_limit -= 1;
879 
880 		ret = -EBUSY;
881 		mutex_lock(&kvm->lock);
882 		if (!kvm->created_vcpus) {
883 			/* gmap_create will round the limit up */
884 			struct gmap *new = gmap_create(current->mm, new_limit);
885 
886 			if (!new) {
887 				ret = -ENOMEM;
888 			} else {
889 				gmap_remove(kvm->arch.gmap);
890 				new->private = kvm;
891 				kvm->arch.gmap = new;
892 				ret = 0;
893 			}
894 		}
895 		mutex_unlock(&kvm->lock);
896 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
897 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
898 			 (void *) kvm->arch.gmap->asce);
899 		break;
900 	}
901 	default:
902 		ret = -ENXIO;
903 		break;
904 	}
905 	return ret;
906 }
907 
908 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
909 
910 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
911 {
912 	struct kvm_vcpu *vcpu;
913 	int i;
914 
915 	kvm_s390_vcpu_block_all(kvm);
916 
917 	kvm_for_each_vcpu(i, vcpu, kvm) {
918 		kvm_s390_vcpu_crypto_setup(vcpu);
919 		/* recreate the shadow crycb by leaving the VSIE handler */
920 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
921 	}
922 
923 	kvm_s390_vcpu_unblock_all(kvm);
924 }
925 
926 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
927 {
928 	mutex_lock(&kvm->lock);
929 	switch (attr->attr) {
930 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
931 		if (!test_kvm_facility(kvm, 76)) {
932 			mutex_unlock(&kvm->lock);
933 			return -EINVAL;
934 		}
935 		get_random_bytes(
936 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
937 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
938 		kvm->arch.crypto.aes_kw = 1;
939 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
940 		break;
941 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
942 		if (!test_kvm_facility(kvm, 76)) {
943 			mutex_unlock(&kvm->lock);
944 			return -EINVAL;
945 		}
946 		get_random_bytes(
947 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
948 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
949 		kvm->arch.crypto.dea_kw = 1;
950 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
951 		break;
952 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
953 		if (!test_kvm_facility(kvm, 76)) {
954 			mutex_unlock(&kvm->lock);
955 			return -EINVAL;
956 		}
957 		kvm->arch.crypto.aes_kw = 0;
958 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
959 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
960 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
961 		break;
962 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
963 		if (!test_kvm_facility(kvm, 76)) {
964 			mutex_unlock(&kvm->lock);
965 			return -EINVAL;
966 		}
967 		kvm->arch.crypto.dea_kw = 0;
968 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
969 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
970 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
971 		break;
972 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
973 		if (!ap_instructions_available()) {
974 			mutex_unlock(&kvm->lock);
975 			return -EOPNOTSUPP;
976 		}
977 		kvm->arch.crypto.apie = 1;
978 		break;
979 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
980 		if (!ap_instructions_available()) {
981 			mutex_unlock(&kvm->lock);
982 			return -EOPNOTSUPP;
983 		}
984 		kvm->arch.crypto.apie = 0;
985 		break;
986 	default:
987 		mutex_unlock(&kvm->lock);
988 		return -ENXIO;
989 	}
990 
991 	kvm_s390_vcpu_crypto_reset_all(kvm);
992 	mutex_unlock(&kvm->lock);
993 	return 0;
994 }
995 
996 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
997 {
998 	int cx;
999 	struct kvm_vcpu *vcpu;
1000 
1001 	kvm_for_each_vcpu(cx, vcpu, kvm)
1002 		kvm_s390_sync_request(req, vcpu);
1003 }
1004 
1005 /*
1006  * Must be called with kvm->srcu held to avoid races on memslots, and with
1007  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1008  */
1009 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1010 {
1011 	struct kvm_memory_slot *ms;
1012 	struct kvm_memslots *slots;
1013 	unsigned long ram_pages = 0;
1014 	int slotnr;
1015 
1016 	/* migration mode already enabled */
1017 	if (kvm->arch.migration_mode)
1018 		return 0;
1019 	slots = kvm_memslots(kvm);
1020 	if (!slots || !slots->used_slots)
1021 		return -EINVAL;
1022 
1023 	if (!kvm->arch.use_cmma) {
1024 		kvm->arch.migration_mode = 1;
1025 		return 0;
1026 	}
1027 	/* mark all the pages in active slots as dirty */
1028 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1029 		ms = slots->memslots + slotnr;
1030 		if (!ms->dirty_bitmap)
1031 			return -EINVAL;
1032 		/*
1033 		 * The second half of the bitmap is only used on x86,
1034 		 * and would be wasted otherwise, so we put it to good
1035 		 * use here to keep track of the state of the storage
1036 		 * attributes.
1037 		 */
1038 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1039 		ram_pages += ms->npages;
1040 	}
1041 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1042 	kvm->arch.migration_mode = 1;
1043 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1044 	return 0;
1045 }
1046 
1047 /*
1048  * Must be called with kvm->slots_lock to avoid races with ourselves and
1049  * kvm_s390_vm_start_migration.
1050  */
1051 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1052 {
1053 	/* migration mode already disabled */
1054 	if (!kvm->arch.migration_mode)
1055 		return 0;
1056 	kvm->arch.migration_mode = 0;
1057 	if (kvm->arch.use_cmma)
1058 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1059 	return 0;
1060 }
1061 
1062 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1063 				     struct kvm_device_attr *attr)
1064 {
1065 	int res = -ENXIO;
1066 
1067 	mutex_lock(&kvm->slots_lock);
1068 	switch (attr->attr) {
1069 	case KVM_S390_VM_MIGRATION_START:
1070 		res = kvm_s390_vm_start_migration(kvm);
1071 		break;
1072 	case KVM_S390_VM_MIGRATION_STOP:
1073 		res = kvm_s390_vm_stop_migration(kvm);
1074 		break;
1075 	default:
1076 		break;
1077 	}
1078 	mutex_unlock(&kvm->slots_lock);
1079 
1080 	return res;
1081 }
1082 
1083 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1084 				     struct kvm_device_attr *attr)
1085 {
1086 	u64 mig = kvm->arch.migration_mode;
1087 
1088 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1089 		return -ENXIO;
1090 
1091 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1092 		return -EFAULT;
1093 	return 0;
1094 }
1095 
1096 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1097 {
1098 	struct kvm_s390_vm_tod_clock gtod;
1099 
1100 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1101 		return -EFAULT;
1102 
1103 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1104 		return -EINVAL;
1105 	kvm_s390_set_tod_clock(kvm, &gtod);
1106 
1107 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1108 		gtod.epoch_idx, gtod.tod);
1109 
1110 	return 0;
1111 }
1112 
1113 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1114 {
1115 	u8 gtod_high;
1116 
1117 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1118 					   sizeof(gtod_high)))
1119 		return -EFAULT;
1120 
1121 	if (gtod_high != 0)
1122 		return -EINVAL;
1123 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1124 
1125 	return 0;
1126 }
1127 
1128 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1129 {
1130 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1131 
1132 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1133 			   sizeof(gtod.tod)))
1134 		return -EFAULT;
1135 
1136 	kvm_s390_set_tod_clock(kvm, &gtod);
1137 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1138 	return 0;
1139 }
1140 
1141 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1142 {
1143 	int ret;
1144 
1145 	if (attr->flags)
1146 		return -EINVAL;
1147 
1148 	switch (attr->attr) {
1149 	case KVM_S390_VM_TOD_EXT:
1150 		ret = kvm_s390_set_tod_ext(kvm, attr);
1151 		break;
1152 	case KVM_S390_VM_TOD_HIGH:
1153 		ret = kvm_s390_set_tod_high(kvm, attr);
1154 		break;
1155 	case KVM_S390_VM_TOD_LOW:
1156 		ret = kvm_s390_set_tod_low(kvm, attr);
1157 		break;
1158 	default:
1159 		ret = -ENXIO;
1160 		break;
1161 	}
1162 	return ret;
1163 }
1164 
1165 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1166 				   struct kvm_s390_vm_tod_clock *gtod)
1167 {
1168 	struct kvm_s390_tod_clock_ext htod;
1169 
1170 	preempt_disable();
1171 
1172 	get_tod_clock_ext((char *)&htod);
1173 
1174 	gtod->tod = htod.tod + kvm->arch.epoch;
1175 	gtod->epoch_idx = 0;
1176 	if (test_kvm_facility(kvm, 139)) {
1177 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1178 		if (gtod->tod < htod.tod)
1179 			gtod->epoch_idx += 1;
1180 	}
1181 
1182 	preempt_enable();
1183 }
1184 
1185 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1186 {
1187 	struct kvm_s390_vm_tod_clock gtod;
1188 
1189 	memset(&gtod, 0, sizeof(gtod));
1190 	kvm_s390_get_tod_clock(kvm, &gtod);
1191 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1192 		return -EFAULT;
1193 
1194 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1195 		gtod.epoch_idx, gtod.tod);
1196 	return 0;
1197 }
1198 
1199 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1200 {
1201 	u8 gtod_high = 0;
1202 
1203 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1204 					 sizeof(gtod_high)))
1205 		return -EFAULT;
1206 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1207 
1208 	return 0;
1209 }
1210 
1211 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1212 {
1213 	u64 gtod;
1214 
1215 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1216 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1217 		return -EFAULT;
1218 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1219 
1220 	return 0;
1221 }
1222 
1223 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1224 {
1225 	int ret;
1226 
1227 	if (attr->flags)
1228 		return -EINVAL;
1229 
1230 	switch (attr->attr) {
1231 	case KVM_S390_VM_TOD_EXT:
1232 		ret = kvm_s390_get_tod_ext(kvm, attr);
1233 		break;
1234 	case KVM_S390_VM_TOD_HIGH:
1235 		ret = kvm_s390_get_tod_high(kvm, attr);
1236 		break;
1237 	case KVM_S390_VM_TOD_LOW:
1238 		ret = kvm_s390_get_tod_low(kvm, attr);
1239 		break;
1240 	default:
1241 		ret = -ENXIO;
1242 		break;
1243 	}
1244 	return ret;
1245 }
1246 
1247 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1248 {
1249 	struct kvm_s390_vm_cpu_processor *proc;
1250 	u16 lowest_ibc, unblocked_ibc;
1251 	int ret = 0;
1252 
1253 	mutex_lock(&kvm->lock);
1254 	if (kvm->created_vcpus) {
1255 		ret = -EBUSY;
1256 		goto out;
1257 	}
1258 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1259 	if (!proc) {
1260 		ret = -ENOMEM;
1261 		goto out;
1262 	}
1263 	if (!copy_from_user(proc, (void __user *)attr->addr,
1264 			    sizeof(*proc))) {
1265 		kvm->arch.model.cpuid = proc->cpuid;
1266 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1267 		unblocked_ibc = sclp.ibc & 0xfff;
1268 		if (lowest_ibc && proc->ibc) {
1269 			if (proc->ibc > unblocked_ibc)
1270 				kvm->arch.model.ibc = unblocked_ibc;
1271 			else if (proc->ibc < lowest_ibc)
1272 				kvm->arch.model.ibc = lowest_ibc;
1273 			else
1274 				kvm->arch.model.ibc = proc->ibc;
1275 		}
1276 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1277 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1278 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1279 			 kvm->arch.model.ibc,
1280 			 kvm->arch.model.cpuid);
1281 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1282 			 kvm->arch.model.fac_list[0],
1283 			 kvm->arch.model.fac_list[1],
1284 			 kvm->arch.model.fac_list[2]);
1285 	} else
1286 		ret = -EFAULT;
1287 	kfree(proc);
1288 out:
1289 	mutex_unlock(&kvm->lock);
1290 	return ret;
1291 }
1292 
1293 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1294 				       struct kvm_device_attr *attr)
1295 {
1296 	struct kvm_s390_vm_cpu_feat data;
1297 
1298 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1299 		return -EFAULT;
1300 	if (!bitmap_subset((unsigned long *) data.feat,
1301 			   kvm_s390_available_cpu_feat,
1302 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1303 		return -EINVAL;
1304 
1305 	mutex_lock(&kvm->lock);
1306 	if (kvm->created_vcpus) {
1307 		mutex_unlock(&kvm->lock);
1308 		return -EBUSY;
1309 	}
1310 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1311 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1312 	mutex_unlock(&kvm->lock);
1313 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1314 			 data.feat[0],
1315 			 data.feat[1],
1316 			 data.feat[2]);
1317 	return 0;
1318 }
1319 
1320 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1321 					  struct kvm_device_attr *attr)
1322 {
1323 	mutex_lock(&kvm->lock);
1324 	if (kvm->created_vcpus) {
1325 		mutex_unlock(&kvm->lock);
1326 		return -EBUSY;
1327 	}
1328 
1329 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1330 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1331 		mutex_unlock(&kvm->lock);
1332 		return -EFAULT;
1333 	}
1334 	mutex_unlock(&kvm->lock);
1335 
1336 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1337 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1338 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1339 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1340 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1341 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1342 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1343 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1344 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1345 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1346 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1347 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1348 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1349 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1350 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1351 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1352 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1353 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1354 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1355 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1356 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1357 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1358 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1359 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1360 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1361 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1362 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1363 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1365 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1366 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1368 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1369 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1371 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1372 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1374 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1375 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1377 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1378 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1380 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1381 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1382 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1383 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1384 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1385 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1386 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1387 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1388 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1389 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1390 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1391 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1392 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1393 
1394 	return 0;
1395 }
1396 
1397 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1398 {
1399 	int ret = -ENXIO;
1400 
1401 	switch (attr->attr) {
1402 	case KVM_S390_VM_CPU_PROCESSOR:
1403 		ret = kvm_s390_set_processor(kvm, attr);
1404 		break;
1405 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1406 		ret = kvm_s390_set_processor_feat(kvm, attr);
1407 		break;
1408 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1409 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1410 		break;
1411 	}
1412 	return ret;
1413 }
1414 
1415 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1416 {
1417 	struct kvm_s390_vm_cpu_processor *proc;
1418 	int ret = 0;
1419 
1420 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1421 	if (!proc) {
1422 		ret = -ENOMEM;
1423 		goto out;
1424 	}
1425 	proc->cpuid = kvm->arch.model.cpuid;
1426 	proc->ibc = kvm->arch.model.ibc;
1427 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1428 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1429 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1430 		 kvm->arch.model.ibc,
1431 		 kvm->arch.model.cpuid);
1432 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1433 		 kvm->arch.model.fac_list[0],
1434 		 kvm->arch.model.fac_list[1],
1435 		 kvm->arch.model.fac_list[2]);
1436 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1437 		ret = -EFAULT;
1438 	kfree(proc);
1439 out:
1440 	return ret;
1441 }
1442 
1443 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1444 {
1445 	struct kvm_s390_vm_cpu_machine *mach;
1446 	int ret = 0;
1447 
1448 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1449 	if (!mach) {
1450 		ret = -ENOMEM;
1451 		goto out;
1452 	}
1453 	get_cpu_id((struct cpuid *) &mach->cpuid);
1454 	mach->ibc = sclp.ibc;
1455 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1456 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1457 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1458 	       sizeof(S390_lowcore.stfle_fac_list));
1459 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1460 		 kvm->arch.model.ibc,
1461 		 kvm->arch.model.cpuid);
1462 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1463 		 mach->fac_mask[0],
1464 		 mach->fac_mask[1],
1465 		 mach->fac_mask[2]);
1466 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1467 		 mach->fac_list[0],
1468 		 mach->fac_list[1],
1469 		 mach->fac_list[2]);
1470 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1471 		ret = -EFAULT;
1472 	kfree(mach);
1473 out:
1474 	return ret;
1475 }
1476 
1477 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1478 				       struct kvm_device_attr *attr)
1479 {
1480 	struct kvm_s390_vm_cpu_feat data;
1481 
1482 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1483 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1484 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1485 		return -EFAULT;
1486 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1487 			 data.feat[0],
1488 			 data.feat[1],
1489 			 data.feat[2]);
1490 	return 0;
1491 }
1492 
1493 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1494 				     struct kvm_device_attr *attr)
1495 {
1496 	struct kvm_s390_vm_cpu_feat data;
1497 
1498 	bitmap_copy((unsigned long *) data.feat,
1499 		    kvm_s390_available_cpu_feat,
1500 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1501 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1502 		return -EFAULT;
1503 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1504 			 data.feat[0],
1505 			 data.feat[1],
1506 			 data.feat[2]);
1507 	return 0;
1508 }
1509 
1510 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1511 					  struct kvm_device_attr *attr)
1512 {
1513 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1514 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1515 		return -EFAULT;
1516 
1517 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1518 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1519 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1520 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1521 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1522 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1523 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1524 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1525 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1526 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1527 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1528 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1529 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1530 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1531 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1532 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1533 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1534 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1535 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1536 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1537 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1538 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1539 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1540 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1541 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1542 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1543 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1544 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1546 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1547 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1549 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1550 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1552 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1553 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1555 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1556 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1558 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1559 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1561 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1562 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1563 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1564 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1565 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1566 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1567 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1568 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1569 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1570 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1571 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1572 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1573 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1574 
1575 	return 0;
1576 }
1577 
1578 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1579 					struct kvm_device_attr *attr)
1580 {
1581 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1582 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1583 		return -EFAULT;
1584 
1585 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1586 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1587 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1588 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1589 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1590 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1591 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1592 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1593 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1594 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1595 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1596 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1597 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1598 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1599 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1600 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1601 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1602 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1603 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1604 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1605 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1606 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1607 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1608 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1609 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1610 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1611 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1612 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1614 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1615 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1617 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1618 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1620 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1621 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1623 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1624 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1626 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1627 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1629 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1630 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1631 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1632 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1633 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1634 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1635 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1636 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1637 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1638 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1639 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1640 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1641 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1642 
1643 	return 0;
1644 }
1645 
1646 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1647 {
1648 	int ret = -ENXIO;
1649 
1650 	switch (attr->attr) {
1651 	case KVM_S390_VM_CPU_PROCESSOR:
1652 		ret = kvm_s390_get_processor(kvm, attr);
1653 		break;
1654 	case KVM_S390_VM_CPU_MACHINE:
1655 		ret = kvm_s390_get_machine(kvm, attr);
1656 		break;
1657 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1658 		ret = kvm_s390_get_processor_feat(kvm, attr);
1659 		break;
1660 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1661 		ret = kvm_s390_get_machine_feat(kvm, attr);
1662 		break;
1663 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1664 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1665 		break;
1666 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1667 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1668 		break;
1669 	}
1670 	return ret;
1671 }
1672 
1673 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1674 {
1675 	int ret;
1676 
1677 	switch (attr->group) {
1678 	case KVM_S390_VM_MEM_CTRL:
1679 		ret = kvm_s390_set_mem_control(kvm, attr);
1680 		break;
1681 	case KVM_S390_VM_TOD:
1682 		ret = kvm_s390_set_tod(kvm, attr);
1683 		break;
1684 	case KVM_S390_VM_CPU_MODEL:
1685 		ret = kvm_s390_set_cpu_model(kvm, attr);
1686 		break;
1687 	case KVM_S390_VM_CRYPTO:
1688 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1689 		break;
1690 	case KVM_S390_VM_MIGRATION:
1691 		ret = kvm_s390_vm_set_migration(kvm, attr);
1692 		break;
1693 	default:
1694 		ret = -ENXIO;
1695 		break;
1696 	}
1697 
1698 	return ret;
1699 }
1700 
1701 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1702 {
1703 	int ret;
1704 
1705 	switch (attr->group) {
1706 	case KVM_S390_VM_MEM_CTRL:
1707 		ret = kvm_s390_get_mem_control(kvm, attr);
1708 		break;
1709 	case KVM_S390_VM_TOD:
1710 		ret = kvm_s390_get_tod(kvm, attr);
1711 		break;
1712 	case KVM_S390_VM_CPU_MODEL:
1713 		ret = kvm_s390_get_cpu_model(kvm, attr);
1714 		break;
1715 	case KVM_S390_VM_MIGRATION:
1716 		ret = kvm_s390_vm_get_migration(kvm, attr);
1717 		break;
1718 	default:
1719 		ret = -ENXIO;
1720 		break;
1721 	}
1722 
1723 	return ret;
1724 }
1725 
1726 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1727 {
1728 	int ret;
1729 
1730 	switch (attr->group) {
1731 	case KVM_S390_VM_MEM_CTRL:
1732 		switch (attr->attr) {
1733 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1734 		case KVM_S390_VM_MEM_CLR_CMMA:
1735 			ret = sclp.has_cmma ? 0 : -ENXIO;
1736 			break;
1737 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1738 			ret = 0;
1739 			break;
1740 		default:
1741 			ret = -ENXIO;
1742 			break;
1743 		}
1744 		break;
1745 	case KVM_S390_VM_TOD:
1746 		switch (attr->attr) {
1747 		case KVM_S390_VM_TOD_LOW:
1748 		case KVM_S390_VM_TOD_HIGH:
1749 			ret = 0;
1750 			break;
1751 		default:
1752 			ret = -ENXIO;
1753 			break;
1754 		}
1755 		break;
1756 	case KVM_S390_VM_CPU_MODEL:
1757 		switch (attr->attr) {
1758 		case KVM_S390_VM_CPU_PROCESSOR:
1759 		case KVM_S390_VM_CPU_MACHINE:
1760 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1761 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1762 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1763 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1764 			ret = 0;
1765 			break;
1766 		default:
1767 			ret = -ENXIO;
1768 			break;
1769 		}
1770 		break;
1771 	case KVM_S390_VM_CRYPTO:
1772 		switch (attr->attr) {
1773 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1774 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1775 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1776 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1777 			ret = 0;
1778 			break;
1779 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1780 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1781 			ret = ap_instructions_available() ? 0 : -ENXIO;
1782 			break;
1783 		default:
1784 			ret = -ENXIO;
1785 			break;
1786 		}
1787 		break;
1788 	case KVM_S390_VM_MIGRATION:
1789 		ret = 0;
1790 		break;
1791 	default:
1792 		ret = -ENXIO;
1793 		break;
1794 	}
1795 
1796 	return ret;
1797 }
1798 
1799 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1800 {
1801 	uint8_t *keys;
1802 	uint64_t hva;
1803 	int srcu_idx, i, r = 0;
1804 
1805 	if (args->flags != 0)
1806 		return -EINVAL;
1807 
1808 	/* Is this guest using storage keys? */
1809 	if (!mm_uses_skeys(current->mm))
1810 		return KVM_S390_GET_SKEYS_NONE;
1811 
1812 	/* Enforce sane limit on memory allocation */
1813 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1814 		return -EINVAL;
1815 
1816 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1817 	if (!keys)
1818 		return -ENOMEM;
1819 
1820 	mmap_read_lock(current->mm);
1821 	srcu_idx = srcu_read_lock(&kvm->srcu);
1822 	for (i = 0; i < args->count; i++) {
1823 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1824 		if (kvm_is_error_hva(hva)) {
1825 			r = -EFAULT;
1826 			break;
1827 		}
1828 
1829 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1830 		if (r)
1831 			break;
1832 	}
1833 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1834 	mmap_read_unlock(current->mm);
1835 
1836 	if (!r) {
1837 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1838 				 sizeof(uint8_t) * args->count);
1839 		if (r)
1840 			r = -EFAULT;
1841 	}
1842 
1843 	kvfree(keys);
1844 	return r;
1845 }
1846 
1847 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1848 {
1849 	uint8_t *keys;
1850 	uint64_t hva;
1851 	int srcu_idx, i, r = 0;
1852 	bool unlocked;
1853 
1854 	if (args->flags != 0)
1855 		return -EINVAL;
1856 
1857 	/* Enforce sane limit on memory allocation */
1858 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1859 		return -EINVAL;
1860 
1861 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1862 	if (!keys)
1863 		return -ENOMEM;
1864 
1865 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1866 			   sizeof(uint8_t) * args->count);
1867 	if (r) {
1868 		r = -EFAULT;
1869 		goto out;
1870 	}
1871 
1872 	/* Enable storage key handling for the guest */
1873 	r = s390_enable_skey();
1874 	if (r)
1875 		goto out;
1876 
1877 	i = 0;
1878 	mmap_read_lock(current->mm);
1879 	srcu_idx = srcu_read_lock(&kvm->srcu);
1880         while (i < args->count) {
1881 		unlocked = false;
1882 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1883 		if (kvm_is_error_hva(hva)) {
1884 			r = -EFAULT;
1885 			break;
1886 		}
1887 
1888 		/* Lowest order bit is reserved */
1889 		if (keys[i] & 0x01) {
1890 			r = -EINVAL;
1891 			break;
1892 		}
1893 
1894 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1895 		if (r) {
1896 			r = fixup_user_fault(current->mm, hva,
1897 					     FAULT_FLAG_WRITE, &unlocked);
1898 			if (r)
1899 				break;
1900 		}
1901 		if (!r)
1902 			i++;
1903 	}
1904 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1905 	mmap_read_unlock(current->mm);
1906 out:
1907 	kvfree(keys);
1908 	return r;
1909 }
1910 
1911 /*
1912  * Base address and length must be sent at the start of each block, therefore
1913  * it's cheaper to send some clean data, as long as it's less than the size of
1914  * two longs.
1915  */
1916 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1917 /* for consistency */
1918 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1919 
1920 /*
1921  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1922  * address falls in a hole. In that case the index of one of the memslots
1923  * bordering the hole is returned.
1924  */
1925 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1926 {
1927 	int start = 0, end = slots->used_slots;
1928 	int slot = atomic_read(&slots->lru_slot);
1929 	struct kvm_memory_slot *memslots = slots->memslots;
1930 
1931 	if (gfn >= memslots[slot].base_gfn &&
1932 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1933 		return slot;
1934 
1935 	while (start < end) {
1936 		slot = start + (end - start) / 2;
1937 
1938 		if (gfn >= memslots[slot].base_gfn)
1939 			end = slot;
1940 		else
1941 			start = slot + 1;
1942 	}
1943 
1944 	if (start >= slots->used_slots)
1945 		return slots->used_slots - 1;
1946 
1947 	if (gfn >= memslots[start].base_gfn &&
1948 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1949 		atomic_set(&slots->lru_slot, start);
1950 	}
1951 
1952 	return start;
1953 }
1954 
1955 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1956 			      u8 *res, unsigned long bufsize)
1957 {
1958 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1959 
1960 	args->count = 0;
1961 	while (args->count < bufsize) {
1962 		hva = gfn_to_hva(kvm, cur_gfn);
1963 		/*
1964 		 * We return an error if the first value was invalid, but we
1965 		 * return successfully if at least one value was copied.
1966 		 */
1967 		if (kvm_is_error_hva(hva))
1968 			return args->count ? 0 : -EFAULT;
1969 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1970 			pgstev = 0;
1971 		res[args->count++] = (pgstev >> 24) & 0x43;
1972 		cur_gfn++;
1973 	}
1974 
1975 	return 0;
1976 }
1977 
1978 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1979 					      unsigned long cur_gfn)
1980 {
1981 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1982 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1983 	unsigned long ofs = cur_gfn - ms->base_gfn;
1984 
1985 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1986 		slotidx--;
1987 		/* If we are above the highest slot, wrap around */
1988 		if (slotidx < 0)
1989 			slotidx = slots->used_slots - 1;
1990 
1991 		ms = slots->memslots + slotidx;
1992 		ofs = 0;
1993 	}
1994 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1995 	while ((slotidx > 0) && (ofs >= ms->npages)) {
1996 		slotidx--;
1997 		ms = slots->memslots + slotidx;
1998 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1999 	}
2000 	return ms->base_gfn + ofs;
2001 }
2002 
2003 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2004 			     u8 *res, unsigned long bufsize)
2005 {
2006 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2007 	struct kvm_memslots *slots = kvm_memslots(kvm);
2008 	struct kvm_memory_slot *ms;
2009 
2010 	if (unlikely(!slots->used_slots))
2011 		return 0;
2012 
2013 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2014 	ms = gfn_to_memslot(kvm, cur_gfn);
2015 	args->count = 0;
2016 	args->start_gfn = cur_gfn;
2017 	if (!ms)
2018 		return 0;
2019 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2020 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2021 
2022 	while (args->count < bufsize) {
2023 		hva = gfn_to_hva(kvm, cur_gfn);
2024 		if (kvm_is_error_hva(hva))
2025 			return 0;
2026 		/* Decrement only if we actually flipped the bit to 0 */
2027 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2028 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2029 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2030 			pgstev = 0;
2031 		/* Save the value */
2032 		res[args->count++] = (pgstev >> 24) & 0x43;
2033 		/* If the next bit is too far away, stop. */
2034 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2035 			return 0;
2036 		/* If we reached the previous "next", find the next one */
2037 		if (cur_gfn == next_gfn)
2038 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2039 		/* Reached the end of memory or of the buffer, stop */
2040 		if ((next_gfn >= mem_end) ||
2041 		    (next_gfn - args->start_gfn >= bufsize))
2042 			return 0;
2043 		cur_gfn++;
2044 		/* Reached the end of the current memslot, take the next one. */
2045 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2046 			ms = gfn_to_memslot(kvm, cur_gfn);
2047 			if (!ms)
2048 				return 0;
2049 		}
2050 	}
2051 	return 0;
2052 }
2053 
2054 /*
2055  * This function searches for the next page with dirty CMMA attributes, and
2056  * saves the attributes in the buffer up to either the end of the buffer or
2057  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2058  * no trailing clean bytes are saved.
2059  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2060  * output buffer will indicate 0 as length.
2061  */
2062 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2063 				  struct kvm_s390_cmma_log *args)
2064 {
2065 	unsigned long bufsize;
2066 	int srcu_idx, peek, ret;
2067 	u8 *values;
2068 
2069 	if (!kvm->arch.use_cmma)
2070 		return -ENXIO;
2071 	/* Invalid/unsupported flags were specified */
2072 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2073 		return -EINVAL;
2074 	/* Migration mode query, and we are not doing a migration */
2075 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2076 	if (!peek && !kvm->arch.migration_mode)
2077 		return -EINVAL;
2078 	/* CMMA is disabled or was not used, or the buffer has length zero */
2079 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2080 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2081 		memset(args, 0, sizeof(*args));
2082 		return 0;
2083 	}
2084 	/* We are not peeking, and there are no dirty pages */
2085 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2086 		memset(args, 0, sizeof(*args));
2087 		return 0;
2088 	}
2089 
2090 	values = vmalloc(bufsize);
2091 	if (!values)
2092 		return -ENOMEM;
2093 
2094 	mmap_read_lock(kvm->mm);
2095 	srcu_idx = srcu_read_lock(&kvm->srcu);
2096 	if (peek)
2097 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2098 	else
2099 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2100 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2101 	mmap_read_unlock(kvm->mm);
2102 
2103 	if (kvm->arch.migration_mode)
2104 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2105 	else
2106 		args->remaining = 0;
2107 
2108 	if (copy_to_user((void __user *)args->values, values, args->count))
2109 		ret = -EFAULT;
2110 
2111 	vfree(values);
2112 	return ret;
2113 }
2114 
2115 /*
2116  * This function sets the CMMA attributes for the given pages. If the input
2117  * buffer has zero length, no action is taken, otherwise the attributes are
2118  * set and the mm->context.uses_cmm flag is set.
2119  */
2120 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2121 				  const struct kvm_s390_cmma_log *args)
2122 {
2123 	unsigned long hva, mask, pgstev, i;
2124 	uint8_t *bits;
2125 	int srcu_idx, r = 0;
2126 
2127 	mask = args->mask;
2128 
2129 	if (!kvm->arch.use_cmma)
2130 		return -ENXIO;
2131 	/* invalid/unsupported flags */
2132 	if (args->flags != 0)
2133 		return -EINVAL;
2134 	/* Enforce sane limit on memory allocation */
2135 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2136 		return -EINVAL;
2137 	/* Nothing to do */
2138 	if (args->count == 0)
2139 		return 0;
2140 
2141 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2142 	if (!bits)
2143 		return -ENOMEM;
2144 
2145 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2146 	if (r) {
2147 		r = -EFAULT;
2148 		goto out;
2149 	}
2150 
2151 	mmap_read_lock(kvm->mm);
2152 	srcu_idx = srcu_read_lock(&kvm->srcu);
2153 	for (i = 0; i < args->count; i++) {
2154 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2155 		if (kvm_is_error_hva(hva)) {
2156 			r = -EFAULT;
2157 			break;
2158 		}
2159 
2160 		pgstev = bits[i];
2161 		pgstev = pgstev << 24;
2162 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2163 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2164 	}
2165 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2166 	mmap_read_unlock(kvm->mm);
2167 
2168 	if (!kvm->mm->context.uses_cmm) {
2169 		mmap_write_lock(kvm->mm);
2170 		kvm->mm->context.uses_cmm = 1;
2171 		mmap_write_unlock(kvm->mm);
2172 	}
2173 out:
2174 	vfree(bits);
2175 	return r;
2176 }
2177 
2178 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2179 {
2180 	struct kvm_vcpu *vcpu;
2181 	u16 rc, rrc;
2182 	int ret = 0;
2183 	int i;
2184 
2185 	/*
2186 	 * We ignore failures and try to destroy as many CPUs as possible.
2187 	 * At the same time we must not free the assigned resources when
2188 	 * this fails, as the ultravisor has still access to that memory.
2189 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2190 	 * behind.
2191 	 * We want to return the first failure rc and rrc, though.
2192 	 */
2193 	kvm_for_each_vcpu(i, vcpu, kvm) {
2194 		mutex_lock(&vcpu->mutex);
2195 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2196 			*rcp = rc;
2197 			*rrcp = rrc;
2198 			ret = -EIO;
2199 		}
2200 		mutex_unlock(&vcpu->mutex);
2201 	}
2202 	return ret;
2203 }
2204 
2205 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2206 {
2207 	int i, r = 0;
2208 	u16 dummy;
2209 
2210 	struct kvm_vcpu *vcpu;
2211 
2212 	kvm_for_each_vcpu(i, vcpu, kvm) {
2213 		mutex_lock(&vcpu->mutex);
2214 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2215 		mutex_unlock(&vcpu->mutex);
2216 		if (r)
2217 			break;
2218 	}
2219 	if (r)
2220 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2221 	return r;
2222 }
2223 
2224 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2225 {
2226 	int r = 0;
2227 	u16 dummy;
2228 	void __user *argp = (void __user *)cmd->data;
2229 
2230 	switch (cmd->cmd) {
2231 	case KVM_PV_ENABLE: {
2232 		r = -EINVAL;
2233 		if (kvm_s390_pv_is_protected(kvm))
2234 			break;
2235 
2236 		/*
2237 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2238 		 *  esca, we need no cleanup in the error cases below
2239 		 */
2240 		r = sca_switch_to_extended(kvm);
2241 		if (r)
2242 			break;
2243 
2244 		mmap_write_lock(current->mm);
2245 		r = gmap_mark_unmergeable();
2246 		mmap_write_unlock(current->mm);
2247 		if (r)
2248 			break;
2249 
2250 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2251 		if (r)
2252 			break;
2253 
2254 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2255 		if (r)
2256 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2257 
2258 		/* we need to block service interrupts from now on */
2259 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2260 		break;
2261 	}
2262 	case KVM_PV_DISABLE: {
2263 		r = -EINVAL;
2264 		if (!kvm_s390_pv_is_protected(kvm))
2265 			break;
2266 
2267 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2268 		/*
2269 		 * If a CPU could not be destroyed, destroy VM will also fail.
2270 		 * There is no point in trying to destroy it. Instead return
2271 		 * the rc and rrc from the first CPU that failed destroying.
2272 		 */
2273 		if (r)
2274 			break;
2275 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2276 
2277 		/* no need to block service interrupts any more */
2278 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2279 		break;
2280 	}
2281 	case KVM_PV_SET_SEC_PARMS: {
2282 		struct kvm_s390_pv_sec_parm parms = {};
2283 		void *hdr;
2284 
2285 		r = -EINVAL;
2286 		if (!kvm_s390_pv_is_protected(kvm))
2287 			break;
2288 
2289 		r = -EFAULT;
2290 		if (copy_from_user(&parms, argp, sizeof(parms)))
2291 			break;
2292 
2293 		/* Currently restricted to 8KB */
2294 		r = -EINVAL;
2295 		if (parms.length > PAGE_SIZE * 2)
2296 			break;
2297 
2298 		r = -ENOMEM;
2299 		hdr = vmalloc(parms.length);
2300 		if (!hdr)
2301 			break;
2302 
2303 		r = -EFAULT;
2304 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2305 				    parms.length))
2306 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2307 						      &cmd->rc, &cmd->rrc);
2308 
2309 		vfree(hdr);
2310 		break;
2311 	}
2312 	case KVM_PV_UNPACK: {
2313 		struct kvm_s390_pv_unp unp = {};
2314 
2315 		r = -EINVAL;
2316 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2317 			break;
2318 
2319 		r = -EFAULT;
2320 		if (copy_from_user(&unp, argp, sizeof(unp)))
2321 			break;
2322 
2323 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2324 				       &cmd->rc, &cmd->rrc);
2325 		break;
2326 	}
2327 	case KVM_PV_VERIFY: {
2328 		r = -EINVAL;
2329 		if (!kvm_s390_pv_is_protected(kvm))
2330 			break;
2331 
2332 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2333 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2334 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2335 			     cmd->rrc);
2336 		break;
2337 	}
2338 	case KVM_PV_PREP_RESET: {
2339 		r = -EINVAL;
2340 		if (!kvm_s390_pv_is_protected(kvm))
2341 			break;
2342 
2343 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2344 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2345 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2346 			     cmd->rc, cmd->rrc);
2347 		break;
2348 	}
2349 	case KVM_PV_UNSHARE_ALL: {
2350 		r = -EINVAL;
2351 		if (!kvm_s390_pv_is_protected(kvm))
2352 			break;
2353 
2354 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2355 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2356 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2357 			     cmd->rc, cmd->rrc);
2358 		break;
2359 	}
2360 	default:
2361 		r = -ENOTTY;
2362 	}
2363 	return r;
2364 }
2365 
2366 long kvm_arch_vm_ioctl(struct file *filp,
2367 		       unsigned int ioctl, unsigned long arg)
2368 {
2369 	struct kvm *kvm = filp->private_data;
2370 	void __user *argp = (void __user *)arg;
2371 	struct kvm_device_attr attr;
2372 	int r;
2373 
2374 	switch (ioctl) {
2375 	case KVM_S390_INTERRUPT: {
2376 		struct kvm_s390_interrupt s390int;
2377 
2378 		r = -EFAULT;
2379 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2380 			break;
2381 		r = kvm_s390_inject_vm(kvm, &s390int);
2382 		break;
2383 	}
2384 	case KVM_CREATE_IRQCHIP: {
2385 		struct kvm_irq_routing_entry routing;
2386 
2387 		r = -EINVAL;
2388 		if (kvm->arch.use_irqchip) {
2389 			/* Set up dummy routing. */
2390 			memset(&routing, 0, sizeof(routing));
2391 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2392 		}
2393 		break;
2394 	}
2395 	case KVM_SET_DEVICE_ATTR: {
2396 		r = -EFAULT;
2397 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2398 			break;
2399 		r = kvm_s390_vm_set_attr(kvm, &attr);
2400 		break;
2401 	}
2402 	case KVM_GET_DEVICE_ATTR: {
2403 		r = -EFAULT;
2404 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2405 			break;
2406 		r = kvm_s390_vm_get_attr(kvm, &attr);
2407 		break;
2408 	}
2409 	case KVM_HAS_DEVICE_ATTR: {
2410 		r = -EFAULT;
2411 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2412 			break;
2413 		r = kvm_s390_vm_has_attr(kvm, &attr);
2414 		break;
2415 	}
2416 	case KVM_S390_GET_SKEYS: {
2417 		struct kvm_s390_skeys args;
2418 
2419 		r = -EFAULT;
2420 		if (copy_from_user(&args, argp,
2421 				   sizeof(struct kvm_s390_skeys)))
2422 			break;
2423 		r = kvm_s390_get_skeys(kvm, &args);
2424 		break;
2425 	}
2426 	case KVM_S390_SET_SKEYS: {
2427 		struct kvm_s390_skeys args;
2428 
2429 		r = -EFAULT;
2430 		if (copy_from_user(&args, argp,
2431 				   sizeof(struct kvm_s390_skeys)))
2432 			break;
2433 		r = kvm_s390_set_skeys(kvm, &args);
2434 		break;
2435 	}
2436 	case KVM_S390_GET_CMMA_BITS: {
2437 		struct kvm_s390_cmma_log args;
2438 
2439 		r = -EFAULT;
2440 		if (copy_from_user(&args, argp, sizeof(args)))
2441 			break;
2442 		mutex_lock(&kvm->slots_lock);
2443 		r = kvm_s390_get_cmma_bits(kvm, &args);
2444 		mutex_unlock(&kvm->slots_lock);
2445 		if (!r) {
2446 			r = copy_to_user(argp, &args, sizeof(args));
2447 			if (r)
2448 				r = -EFAULT;
2449 		}
2450 		break;
2451 	}
2452 	case KVM_S390_SET_CMMA_BITS: {
2453 		struct kvm_s390_cmma_log args;
2454 
2455 		r = -EFAULT;
2456 		if (copy_from_user(&args, argp, sizeof(args)))
2457 			break;
2458 		mutex_lock(&kvm->slots_lock);
2459 		r = kvm_s390_set_cmma_bits(kvm, &args);
2460 		mutex_unlock(&kvm->slots_lock);
2461 		break;
2462 	}
2463 	case KVM_S390_PV_COMMAND: {
2464 		struct kvm_pv_cmd args;
2465 
2466 		/* protvirt means user sigp */
2467 		kvm->arch.user_cpu_state_ctrl = 1;
2468 		r = 0;
2469 		if (!is_prot_virt_host()) {
2470 			r = -EINVAL;
2471 			break;
2472 		}
2473 		if (copy_from_user(&args, argp, sizeof(args))) {
2474 			r = -EFAULT;
2475 			break;
2476 		}
2477 		if (args.flags) {
2478 			r = -EINVAL;
2479 			break;
2480 		}
2481 		mutex_lock(&kvm->lock);
2482 		r = kvm_s390_handle_pv(kvm, &args);
2483 		mutex_unlock(&kvm->lock);
2484 		if (copy_to_user(argp, &args, sizeof(args))) {
2485 			r = -EFAULT;
2486 			break;
2487 		}
2488 		break;
2489 	}
2490 	default:
2491 		r = -ENOTTY;
2492 	}
2493 
2494 	return r;
2495 }
2496 
2497 static int kvm_s390_apxa_installed(void)
2498 {
2499 	struct ap_config_info info;
2500 
2501 	if (ap_instructions_available()) {
2502 		if (ap_qci(&info) == 0)
2503 			return info.apxa;
2504 	}
2505 
2506 	return 0;
2507 }
2508 
2509 /*
2510  * The format of the crypto control block (CRYCB) is specified in the 3 low
2511  * order bits of the CRYCB designation (CRYCBD) field as follows:
2512  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2513  *	     AP extended addressing (APXA) facility are installed.
2514  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2515  * Format 2: Both the APXA and MSAX3 facilities are installed
2516  */
2517 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2518 {
2519 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2520 
2521 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2522 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2523 
2524 	/* Check whether MSAX3 is installed */
2525 	if (!test_kvm_facility(kvm, 76))
2526 		return;
2527 
2528 	if (kvm_s390_apxa_installed())
2529 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2530 	else
2531 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2532 }
2533 
2534 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2535 			       unsigned long *aqm, unsigned long *adm)
2536 {
2537 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2538 
2539 	mutex_lock(&kvm->lock);
2540 	kvm_s390_vcpu_block_all(kvm);
2541 
2542 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2543 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2544 		memcpy(crycb->apcb1.apm, apm, 32);
2545 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2546 			 apm[0], apm[1], apm[2], apm[3]);
2547 		memcpy(crycb->apcb1.aqm, aqm, 32);
2548 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2549 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2550 		memcpy(crycb->apcb1.adm, adm, 32);
2551 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2552 			 adm[0], adm[1], adm[2], adm[3]);
2553 		break;
2554 	case CRYCB_FORMAT1:
2555 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2556 		memcpy(crycb->apcb0.apm, apm, 8);
2557 		memcpy(crycb->apcb0.aqm, aqm, 2);
2558 		memcpy(crycb->apcb0.adm, adm, 2);
2559 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2560 			 apm[0], *((unsigned short *)aqm),
2561 			 *((unsigned short *)adm));
2562 		break;
2563 	default:	/* Can not happen */
2564 		break;
2565 	}
2566 
2567 	/* recreate the shadow crycb for each vcpu */
2568 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2569 	kvm_s390_vcpu_unblock_all(kvm);
2570 	mutex_unlock(&kvm->lock);
2571 }
2572 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2573 
2574 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2575 {
2576 	mutex_lock(&kvm->lock);
2577 	kvm_s390_vcpu_block_all(kvm);
2578 
2579 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2580 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2581 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2582 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2583 
2584 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2585 	/* recreate the shadow crycb for each vcpu */
2586 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2587 	kvm_s390_vcpu_unblock_all(kvm);
2588 	mutex_unlock(&kvm->lock);
2589 }
2590 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2591 
2592 static u64 kvm_s390_get_initial_cpuid(void)
2593 {
2594 	struct cpuid cpuid;
2595 
2596 	get_cpu_id(&cpuid);
2597 	cpuid.version = 0xff;
2598 	return *((u64 *) &cpuid);
2599 }
2600 
2601 static void kvm_s390_crypto_init(struct kvm *kvm)
2602 {
2603 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2604 	kvm_s390_set_crycb_format(kvm);
2605 
2606 	if (!test_kvm_facility(kvm, 76))
2607 		return;
2608 
2609 	/* Enable AES/DEA protected key functions by default */
2610 	kvm->arch.crypto.aes_kw = 1;
2611 	kvm->arch.crypto.dea_kw = 1;
2612 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2613 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2614 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2615 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2616 }
2617 
2618 static void sca_dispose(struct kvm *kvm)
2619 {
2620 	if (kvm->arch.use_esca)
2621 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2622 	else
2623 		free_page((unsigned long)(kvm->arch.sca));
2624 	kvm->arch.sca = NULL;
2625 }
2626 
2627 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2628 {
2629 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2630 	int i, rc;
2631 	char debug_name[16];
2632 	static unsigned long sca_offset;
2633 
2634 	rc = -EINVAL;
2635 #ifdef CONFIG_KVM_S390_UCONTROL
2636 	if (type & ~KVM_VM_S390_UCONTROL)
2637 		goto out_err;
2638 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2639 		goto out_err;
2640 #else
2641 	if (type)
2642 		goto out_err;
2643 #endif
2644 
2645 	rc = s390_enable_sie();
2646 	if (rc)
2647 		goto out_err;
2648 
2649 	rc = -ENOMEM;
2650 
2651 	if (!sclp.has_64bscao)
2652 		alloc_flags |= GFP_DMA;
2653 	rwlock_init(&kvm->arch.sca_lock);
2654 	/* start with basic SCA */
2655 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2656 	if (!kvm->arch.sca)
2657 		goto out_err;
2658 	mutex_lock(&kvm_lock);
2659 	sca_offset += 16;
2660 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2661 		sca_offset = 0;
2662 	kvm->arch.sca = (struct bsca_block *)
2663 			((char *) kvm->arch.sca + sca_offset);
2664 	mutex_unlock(&kvm_lock);
2665 
2666 	sprintf(debug_name, "kvm-%u", current->pid);
2667 
2668 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2669 	if (!kvm->arch.dbf)
2670 		goto out_err;
2671 
2672 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2673 	kvm->arch.sie_page2 =
2674 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2675 	if (!kvm->arch.sie_page2)
2676 		goto out_err;
2677 
2678 	kvm->arch.sie_page2->kvm = kvm;
2679 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2680 
2681 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2682 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2683 					      (kvm_s390_fac_base[i] |
2684 					       kvm_s390_fac_ext[i]);
2685 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2686 					      kvm_s390_fac_base[i];
2687 	}
2688 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2689 
2690 	/* we are always in czam mode - even on pre z14 machines */
2691 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2692 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2693 	/* we emulate STHYI in kvm */
2694 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2695 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2696 	if (MACHINE_HAS_TLB_GUEST) {
2697 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2698 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2699 	}
2700 
2701 	if (css_general_characteristics.aiv && test_facility(65))
2702 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2703 
2704 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2705 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2706 
2707 	kvm_s390_crypto_init(kvm);
2708 
2709 	mutex_init(&kvm->arch.float_int.ais_lock);
2710 	spin_lock_init(&kvm->arch.float_int.lock);
2711 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2712 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2713 	init_waitqueue_head(&kvm->arch.ipte_wq);
2714 	mutex_init(&kvm->arch.ipte_mutex);
2715 
2716 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2717 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2718 
2719 	if (type & KVM_VM_S390_UCONTROL) {
2720 		kvm->arch.gmap = NULL;
2721 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2722 	} else {
2723 		if (sclp.hamax == U64_MAX)
2724 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2725 		else
2726 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2727 						    sclp.hamax + 1);
2728 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2729 		if (!kvm->arch.gmap)
2730 			goto out_err;
2731 		kvm->arch.gmap->private = kvm;
2732 		kvm->arch.gmap->pfault_enabled = 0;
2733 	}
2734 
2735 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2736 	kvm->arch.use_skf = sclp.has_skey;
2737 	spin_lock_init(&kvm->arch.start_stop_lock);
2738 	kvm_s390_vsie_init(kvm);
2739 	if (use_gisa)
2740 		kvm_s390_gisa_init(kvm);
2741 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2742 
2743 	return 0;
2744 out_err:
2745 	free_page((unsigned long)kvm->arch.sie_page2);
2746 	debug_unregister(kvm->arch.dbf);
2747 	sca_dispose(kvm);
2748 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2749 	return rc;
2750 }
2751 
2752 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2753 {
2754 	u16 rc, rrc;
2755 
2756 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2757 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2758 	kvm_s390_clear_local_irqs(vcpu);
2759 	kvm_clear_async_pf_completion_queue(vcpu);
2760 	if (!kvm_is_ucontrol(vcpu->kvm))
2761 		sca_del_vcpu(vcpu);
2762 
2763 	if (kvm_is_ucontrol(vcpu->kvm))
2764 		gmap_remove(vcpu->arch.gmap);
2765 
2766 	if (vcpu->kvm->arch.use_cmma)
2767 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2768 	/* We can not hold the vcpu mutex here, we are already dying */
2769 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2770 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2771 	free_page((unsigned long)(vcpu->arch.sie_block));
2772 }
2773 
2774 static void kvm_free_vcpus(struct kvm *kvm)
2775 {
2776 	unsigned int i;
2777 	struct kvm_vcpu *vcpu;
2778 
2779 	kvm_for_each_vcpu(i, vcpu, kvm)
2780 		kvm_vcpu_destroy(vcpu);
2781 
2782 	mutex_lock(&kvm->lock);
2783 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2784 		kvm->vcpus[i] = NULL;
2785 
2786 	atomic_set(&kvm->online_vcpus, 0);
2787 	mutex_unlock(&kvm->lock);
2788 }
2789 
2790 void kvm_arch_destroy_vm(struct kvm *kvm)
2791 {
2792 	u16 rc, rrc;
2793 
2794 	kvm_free_vcpus(kvm);
2795 	sca_dispose(kvm);
2796 	kvm_s390_gisa_destroy(kvm);
2797 	/*
2798 	 * We are already at the end of life and kvm->lock is not taken.
2799 	 * This is ok as the file descriptor is closed by now and nobody
2800 	 * can mess with the pv state. To avoid lockdep_assert_held from
2801 	 * complaining we do not use kvm_s390_pv_is_protected.
2802 	 */
2803 	if (kvm_s390_pv_get_handle(kvm))
2804 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2805 	debug_unregister(kvm->arch.dbf);
2806 	free_page((unsigned long)kvm->arch.sie_page2);
2807 	if (!kvm_is_ucontrol(kvm))
2808 		gmap_remove(kvm->arch.gmap);
2809 	kvm_s390_destroy_adapters(kvm);
2810 	kvm_s390_clear_float_irqs(kvm);
2811 	kvm_s390_vsie_destroy(kvm);
2812 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2813 }
2814 
2815 /* Section: vcpu related */
2816 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2817 {
2818 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2819 	if (!vcpu->arch.gmap)
2820 		return -ENOMEM;
2821 	vcpu->arch.gmap->private = vcpu->kvm;
2822 
2823 	return 0;
2824 }
2825 
2826 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2827 {
2828 	if (!kvm_s390_use_sca_entries())
2829 		return;
2830 	read_lock(&vcpu->kvm->arch.sca_lock);
2831 	if (vcpu->kvm->arch.use_esca) {
2832 		struct esca_block *sca = vcpu->kvm->arch.sca;
2833 
2834 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2835 		sca->cpu[vcpu->vcpu_id].sda = 0;
2836 	} else {
2837 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2838 
2839 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2840 		sca->cpu[vcpu->vcpu_id].sda = 0;
2841 	}
2842 	read_unlock(&vcpu->kvm->arch.sca_lock);
2843 }
2844 
2845 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2846 {
2847 	if (!kvm_s390_use_sca_entries()) {
2848 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2849 
2850 		/* we still need the basic sca for the ipte control */
2851 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2852 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2853 		return;
2854 	}
2855 	read_lock(&vcpu->kvm->arch.sca_lock);
2856 	if (vcpu->kvm->arch.use_esca) {
2857 		struct esca_block *sca = vcpu->kvm->arch.sca;
2858 
2859 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2860 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2861 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2862 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2863 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2864 	} else {
2865 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2866 
2867 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2868 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2869 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2870 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2871 	}
2872 	read_unlock(&vcpu->kvm->arch.sca_lock);
2873 }
2874 
2875 /* Basic SCA to Extended SCA data copy routines */
2876 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2877 {
2878 	d->sda = s->sda;
2879 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2880 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2881 }
2882 
2883 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2884 {
2885 	int i;
2886 
2887 	d->ipte_control = s->ipte_control;
2888 	d->mcn[0] = s->mcn;
2889 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2890 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2891 }
2892 
2893 static int sca_switch_to_extended(struct kvm *kvm)
2894 {
2895 	struct bsca_block *old_sca = kvm->arch.sca;
2896 	struct esca_block *new_sca;
2897 	struct kvm_vcpu *vcpu;
2898 	unsigned int vcpu_idx;
2899 	u32 scaol, scaoh;
2900 
2901 	if (kvm->arch.use_esca)
2902 		return 0;
2903 
2904 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2905 	if (!new_sca)
2906 		return -ENOMEM;
2907 
2908 	scaoh = (u32)((u64)(new_sca) >> 32);
2909 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2910 
2911 	kvm_s390_vcpu_block_all(kvm);
2912 	write_lock(&kvm->arch.sca_lock);
2913 
2914 	sca_copy_b_to_e(new_sca, old_sca);
2915 
2916 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2917 		vcpu->arch.sie_block->scaoh = scaoh;
2918 		vcpu->arch.sie_block->scaol = scaol;
2919 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2920 	}
2921 	kvm->arch.sca = new_sca;
2922 	kvm->arch.use_esca = 1;
2923 
2924 	write_unlock(&kvm->arch.sca_lock);
2925 	kvm_s390_vcpu_unblock_all(kvm);
2926 
2927 	free_page((unsigned long)old_sca);
2928 
2929 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2930 		 old_sca, kvm->arch.sca);
2931 	return 0;
2932 }
2933 
2934 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2935 {
2936 	int rc;
2937 
2938 	if (!kvm_s390_use_sca_entries()) {
2939 		if (id < KVM_MAX_VCPUS)
2940 			return true;
2941 		return false;
2942 	}
2943 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2944 		return true;
2945 	if (!sclp.has_esca || !sclp.has_64bscao)
2946 		return false;
2947 
2948 	mutex_lock(&kvm->lock);
2949 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2950 	mutex_unlock(&kvm->lock);
2951 
2952 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2953 }
2954 
2955 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2956 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2957 {
2958 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2959 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2960 	vcpu->arch.cputm_start = get_tod_clock_fast();
2961 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2962 }
2963 
2964 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2965 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2966 {
2967 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2968 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2969 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2970 	vcpu->arch.cputm_start = 0;
2971 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2972 }
2973 
2974 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2975 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2976 {
2977 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2978 	vcpu->arch.cputm_enabled = true;
2979 	__start_cpu_timer_accounting(vcpu);
2980 }
2981 
2982 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2983 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2984 {
2985 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2986 	__stop_cpu_timer_accounting(vcpu);
2987 	vcpu->arch.cputm_enabled = false;
2988 }
2989 
2990 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2991 {
2992 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2993 	__enable_cpu_timer_accounting(vcpu);
2994 	preempt_enable();
2995 }
2996 
2997 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2998 {
2999 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3000 	__disable_cpu_timer_accounting(vcpu);
3001 	preempt_enable();
3002 }
3003 
3004 /* set the cpu timer - may only be called from the VCPU thread itself */
3005 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3006 {
3007 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3008 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3009 	if (vcpu->arch.cputm_enabled)
3010 		vcpu->arch.cputm_start = get_tod_clock_fast();
3011 	vcpu->arch.sie_block->cputm = cputm;
3012 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3013 	preempt_enable();
3014 }
3015 
3016 /* update and get the cpu timer - can also be called from other VCPU threads */
3017 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3018 {
3019 	unsigned int seq;
3020 	__u64 value;
3021 
3022 	if (unlikely(!vcpu->arch.cputm_enabled))
3023 		return vcpu->arch.sie_block->cputm;
3024 
3025 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3026 	do {
3027 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3028 		/*
3029 		 * If the writer would ever execute a read in the critical
3030 		 * section, e.g. in irq context, we have a deadlock.
3031 		 */
3032 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3033 		value = vcpu->arch.sie_block->cputm;
3034 		/* if cputm_start is 0, accounting is being started/stopped */
3035 		if (likely(vcpu->arch.cputm_start))
3036 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3037 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3038 	preempt_enable();
3039 	return value;
3040 }
3041 
3042 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3043 {
3044 
3045 	gmap_enable(vcpu->arch.enabled_gmap);
3046 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3047 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3048 		__start_cpu_timer_accounting(vcpu);
3049 	vcpu->cpu = cpu;
3050 }
3051 
3052 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3053 {
3054 	vcpu->cpu = -1;
3055 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3056 		__stop_cpu_timer_accounting(vcpu);
3057 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3058 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3059 	gmap_disable(vcpu->arch.enabled_gmap);
3060 
3061 }
3062 
3063 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3064 {
3065 	mutex_lock(&vcpu->kvm->lock);
3066 	preempt_disable();
3067 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3068 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3069 	preempt_enable();
3070 	mutex_unlock(&vcpu->kvm->lock);
3071 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3072 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3073 		sca_add_vcpu(vcpu);
3074 	}
3075 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3076 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3077 	/* make vcpu_load load the right gmap on the first trigger */
3078 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3079 }
3080 
3081 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3082 {
3083 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3084 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3085 		return true;
3086 	return false;
3087 }
3088 
3089 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3090 {
3091 	/* At least one ECC subfunction must be present */
3092 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3093 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3094 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3095 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3096 	       kvm_has_pckmo_subfunc(kvm, 41);
3097 
3098 }
3099 
3100 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3101 {
3102 	/*
3103 	 * If the AP instructions are not being interpreted and the MSAX3
3104 	 * facility is not configured for the guest, there is nothing to set up.
3105 	 */
3106 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3107 		return;
3108 
3109 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3110 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3111 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3112 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3113 
3114 	if (vcpu->kvm->arch.crypto.apie)
3115 		vcpu->arch.sie_block->eca |= ECA_APIE;
3116 
3117 	/* Set up protected key support */
3118 	if (vcpu->kvm->arch.crypto.aes_kw) {
3119 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3120 		/* ecc is also wrapped with AES key */
3121 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3122 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3123 	}
3124 
3125 	if (vcpu->kvm->arch.crypto.dea_kw)
3126 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3127 }
3128 
3129 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3130 {
3131 	free_page(vcpu->arch.sie_block->cbrlo);
3132 	vcpu->arch.sie_block->cbrlo = 0;
3133 }
3134 
3135 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3136 {
3137 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3138 	if (!vcpu->arch.sie_block->cbrlo)
3139 		return -ENOMEM;
3140 	return 0;
3141 }
3142 
3143 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3144 {
3145 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3146 
3147 	vcpu->arch.sie_block->ibc = model->ibc;
3148 	if (test_kvm_facility(vcpu->kvm, 7))
3149 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3150 }
3151 
3152 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3153 {
3154 	int rc = 0;
3155 	u16 uvrc, uvrrc;
3156 
3157 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3158 						    CPUSTAT_SM |
3159 						    CPUSTAT_STOPPED);
3160 
3161 	if (test_kvm_facility(vcpu->kvm, 78))
3162 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3163 	else if (test_kvm_facility(vcpu->kvm, 8))
3164 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3165 
3166 	kvm_s390_vcpu_setup_model(vcpu);
3167 
3168 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3169 	if (MACHINE_HAS_ESOP)
3170 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3171 	if (test_kvm_facility(vcpu->kvm, 9))
3172 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3173 	if (test_kvm_facility(vcpu->kvm, 73))
3174 		vcpu->arch.sie_block->ecb |= ECB_TE;
3175 
3176 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3177 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3178 	if (test_kvm_facility(vcpu->kvm, 130))
3179 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3180 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3181 	if (sclp.has_cei)
3182 		vcpu->arch.sie_block->eca |= ECA_CEI;
3183 	if (sclp.has_ib)
3184 		vcpu->arch.sie_block->eca |= ECA_IB;
3185 	if (sclp.has_siif)
3186 		vcpu->arch.sie_block->eca |= ECA_SII;
3187 	if (sclp.has_sigpif)
3188 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3189 	if (test_kvm_facility(vcpu->kvm, 129)) {
3190 		vcpu->arch.sie_block->eca |= ECA_VX;
3191 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3192 	}
3193 	if (test_kvm_facility(vcpu->kvm, 139))
3194 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3195 	if (test_kvm_facility(vcpu->kvm, 156))
3196 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3197 	if (vcpu->arch.sie_block->gd) {
3198 		vcpu->arch.sie_block->eca |= ECA_AIV;
3199 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3200 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3201 	}
3202 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3203 					| SDNXC;
3204 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3205 
3206 	if (sclp.has_kss)
3207 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3208 	else
3209 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3210 
3211 	if (vcpu->kvm->arch.use_cmma) {
3212 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3213 		if (rc)
3214 			return rc;
3215 	}
3216 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3217 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3218 
3219 	vcpu->arch.sie_block->hpid = HPID_KVM;
3220 
3221 	kvm_s390_vcpu_crypto_setup(vcpu);
3222 
3223 	mutex_lock(&vcpu->kvm->lock);
3224 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3225 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3226 		if (rc)
3227 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3228 	}
3229 	mutex_unlock(&vcpu->kvm->lock);
3230 
3231 	return rc;
3232 }
3233 
3234 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3235 {
3236 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3237 		return -EINVAL;
3238 	return 0;
3239 }
3240 
3241 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3242 {
3243 	struct sie_page *sie_page;
3244 	int rc;
3245 
3246 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3247 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3248 	if (!sie_page)
3249 		return -ENOMEM;
3250 
3251 	vcpu->arch.sie_block = &sie_page->sie_block;
3252 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3253 
3254 	/* the real guest size will always be smaller than msl */
3255 	vcpu->arch.sie_block->mso = 0;
3256 	vcpu->arch.sie_block->msl = sclp.hamax;
3257 
3258 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3259 	spin_lock_init(&vcpu->arch.local_int.lock);
3260 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3261 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3262 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3263 	seqcount_init(&vcpu->arch.cputm_seqcount);
3264 
3265 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3266 	kvm_clear_async_pf_completion_queue(vcpu);
3267 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3268 				    KVM_SYNC_GPRS |
3269 				    KVM_SYNC_ACRS |
3270 				    KVM_SYNC_CRS |
3271 				    KVM_SYNC_ARCH0 |
3272 				    KVM_SYNC_PFAULT |
3273 				    KVM_SYNC_DIAG318;
3274 	kvm_s390_set_prefix(vcpu, 0);
3275 	if (test_kvm_facility(vcpu->kvm, 64))
3276 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3277 	if (test_kvm_facility(vcpu->kvm, 82))
3278 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3279 	if (test_kvm_facility(vcpu->kvm, 133))
3280 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3281 	if (test_kvm_facility(vcpu->kvm, 156))
3282 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3283 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3284 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3285 	 */
3286 	if (MACHINE_HAS_VX)
3287 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3288 	else
3289 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3290 
3291 	if (kvm_is_ucontrol(vcpu->kvm)) {
3292 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3293 		if (rc)
3294 			goto out_free_sie_block;
3295 	}
3296 
3297 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3298 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3299 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3300 
3301 	rc = kvm_s390_vcpu_setup(vcpu);
3302 	if (rc)
3303 		goto out_ucontrol_uninit;
3304 	return 0;
3305 
3306 out_ucontrol_uninit:
3307 	if (kvm_is_ucontrol(vcpu->kvm))
3308 		gmap_remove(vcpu->arch.gmap);
3309 out_free_sie_block:
3310 	free_page((unsigned long)(vcpu->arch.sie_block));
3311 	return rc;
3312 }
3313 
3314 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3315 {
3316 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3317 }
3318 
3319 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3320 {
3321 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3322 }
3323 
3324 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3325 {
3326 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3327 	exit_sie(vcpu);
3328 }
3329 
3330 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3331 {
3332 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3333 }
3334 
3335 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3336 {
3337 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3338 	exit_sie(vcpu);
3339 }
3340 
3341 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3342 {
3343 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3344 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3345 }
3346 
3347 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3348 {
3349 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3350 }
3351 
3352 /*
3353  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3354  * If the CPU is not running (e.g. waiting as idle) the function will
3355  * return immediately. */
3356 void exit_sie(struct kvm_vcpu *vcpu)
3357 {
3358 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3359 	kvm_s390_vsie_kick(vcpu);
3360 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3361 		cpu_relax();
3362 }
3363 
3364 /* Kick a guest cpu out of SIE to process a request synchronously */
3365 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3366 {
3367 	kvm_make_request(req, vcpu);
3368 	kvm_s390_vcpu_request(vcpu);
3369 }
3370 
3371 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3372 			      unsigned long end)
3373 {
3374 	struct kvm *kvm = gmap->private;
3375 	struct kvm_vcpu *vcpu;
3376 	unsigned long prefix;
3377 	int i;
3378 
3379 	if (gmap_is_shadow(gmap))
3380 		return;
3381 	if (start >= 1UL << 31)
3382 		/* We are only interested in prefix pages */
3383 		return;
3384 	kvm_for_each_vcpu(i, vcpu, kvm) {
3385 		/* match against both prefix pages */
3386 		prefix = kvm_s390_get_prefix(vcpu);
3387 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3388 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3389 				   start, end);
3390 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3391 		}
3392 	}
3393 }
3394 
3395 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3396 {
3397 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3398 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3399 	    halt_poll_max_steal) {
3400 		vcpu->stat.halt_no_poll_steal++;
3401 		return true;
3402 	}
3403 	return false;
3404 }
3405 
3406 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3407 {
3408 	/* kvm common code refers to this, but never calls it */
3409 	BUG();
3410 	return 0;
3411 }
3412 
3413 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3414 					   struct kvm_one_reg *reg)
3415 {
3416 	int r = -EINVAL;
3417 
3418 	switch (reg->id) {
3419 	case KVM_REG_S390_TODPR:
3420 		r = put_user(vcpu->arch.sie_block->todpr,
3421 			     (u32 __user *)reg->addr);
3422 		break;
3423 	case KVM_REG_S390_EPOCHDIFF:
3424 		r = put_user(vcpu->arch.sie_block->epoch,
3425 			     (u64 __user *)reg->addr);
3426 		break;
3427 	case KVM_REG_S390_CPU_TIMER:
3428 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3429 			     (u64 __user *)reg->addr);
3430 		break;
3431 	case KVM_REG_S390_CLOCK_COMP:
3432 		r = put_user(vcpu->arch.sie_block->ckc,
3433 			     (u64 __user *)reg->addr);
3434 		break;
3435 	case KVM_REG_S390_PFTOKEN:
3436 		r = put_user(vcpu->arch.pfault_token,
3437 			     (u64 __user *)reg->addr);
3438 		break;
3439 	case KVM_REG_S390_PFCOMPARE:
3440 		r = put_user(vcpu->arch.pfault_compare,
3441 			     (u64 __user *)reg->addr);
3442 		break;
3443 	case KVM_REG_S390_PFSELECT:
3444 		r = put_user(vcpu->arch.pfault_select,
3445 			     (u64 __user *)reg->addr);
3446 		break;
3447 	case KVM_REG_S390_PP:
3448 		r = put_user(vcpu->arch.sie_block->pp,
3449 			     (u64 __user *)reg->addr);
3450 		break;
3451 	case KVM_REG_S390_GBEA:
3452 		r = put_user(vcpu->arch.sie_block->gbea,
3453 			     (u64 __user *)reg->addr);
3454 		break;
3455 	default:
3456 		break;
3457 	}
3458 
3459 	return r;
3460 }
3461 
3462 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3463 					   struct kvm_one_reg *reg)
3464 {
3465 	int r = -EINVAL;
3466 	__u64 val;
3467 
3468 	switch (reg->id) {
3469 	case KVM_REG_S390_TODPR:
3470 		r = get_user(vcpu->arch.sie_block->todpr,
3471 			     (u32 __user *)reg->addr);
3472 		break;
3473 	case KVM_REG_S390_EPOCHDIFF:
3474 		r = get_user(vcpu->arch.sie_block->epoch,
3475 			     (u64 __user *)reg->addr);
3476 		break;
3477 	case KVM_REG_S390_CPU_TIMER:
3478 		r = get_user(val, (u64 __user *)reg->addr);
3479 		if (!r)
3480 			kvm_s390_set_cpu_timer(vcpu, val);
3481 		break;
3482 	case KVM_REG_S390_CLOCK_COMP:
3483 		r = get_user(vcpu->arch.sie_block->ckc,
3484 			     (u64 __user *)reg->addr);
3485 		break;
3486 	case KVM_REG_S390_PFTOKEN:
3487 		r = get_user(vcpu->arch.pfault_token,
3488 			     (u64 __user *)reg->addr);
3489 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3490 			kvm_clear_async_pf_completion_queue(vcpu);
3491 		break;
3492 	case KVM_REG_S390_PFCOMPARE:
3493 		r = get_user(vcpu->arch.pfault_compare,
3494 			     (u64 __user *)reg->addr);
3495 		break;
3496 	case KVM_REG_S390_PFSELECT:
3497 		r = get_user(vcpu->arch.pfault_select,
3498 			     (u64 __user *)reg->addr);
3499 		break;
3500 	case KVM_REG_S390_PP:
3501 		r = get_user(vcpu->arch.sie_block->pp,
3502 			     (u64 __user *)reg->addr);
3503 		break;
3504 	case KVM_REG_S390_GBEA:
3505 		r = get_user(vcpu->arch.sie_block->gbea,
3506 			     (u64 __user *)reg->addr);
3507 		break;
3508 	default:
3509 		break;
3510 	}
3511 
3512 	return r;
3513 }
3514 
3515 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3516 {
3517 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3518 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3519 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3520 
3521 	kvm_clear_async_pf_completion_queue(vcpu);
3522 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3523 		kvm_s390_vcpu_stop(vcpu);
3524 	kvm_s390_clear_local_irqs(vcpu);
3525 }
3526 
3527 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3528 {
3529 	/* Initial reset is a superset of the normal reset */
3530 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3531 
3532 	/*
3533 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3534 	 * We do not only reset the internal data, but also ...
3535 	 */
3536 	vcpu->arch.sie_block->gpsw.mask = 0;
3537 	vcpu->arch.sie_block->gpsw.addr = 0;
3538 	kvm_s390_set_prefix(vcpu, 0);
3539 	kvm_s390_set_cpu_timer(vcpu, 0);
3540 	vcpu->arch.sie_block->ckc = 0;
3541 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3542 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3543 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3544 
3545 	/* ... the data in sync regs */
3546 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3547 	vcpu->run->s.regs.ckc = 0;
3548 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3549 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3550 	vcpu->run->psw_addr = 0;
3551 	vcpu->run->psw_mask = 0;
3552 	vcpu->run->s.regs.todpr = 0;
3553 	vcpu->run->s.regs.cputm = 0;
3554 	vcpu->run->s.regs.ckc = 0;
3555 	vcpu->run->s.regs.pp = 0;
3556 	vcpu->run->s.regs.gbea = 1;
3557 	vcpu->run->s.regs.fpc = 0;
3558 	/*
3559 	 * Do not reset these registers in the protected case, as some of
3560 	 * them are overlayed and they are not accessible in this case
3561 	 * anyway.
3562 	 */
3563 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3564 		vcpu->arch.sie_block->gbea = 1;
3565 		vcpu->arch.sie_block->pp = 0;
3566 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3567 		vcpu->arch.sie_block->todpr = 0;
3568 	}
3569 }
3570 
3571 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3572 {
3573 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3574 
3575 	/* Clear reset is a superset of the initial reset */
3576 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3577 
3578 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3579 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3580 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3581 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3582 
3583 	regs->etoken = 0;
3584 	regs->etoken_extension = 0;
3585 }
3586 
3587 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3588 {
3589 	vcpu_load(vcpu);
3590 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3591 	vcpu_put(vcpu);
3592 	return 0;
3593 }
3594 
3595 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3596 {
3597 	vcpu_load(vcpu);
3598 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3599 	vcpu_put(vcpu);
3600 	return 0;
3601 }
3602 
3603 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3604 				  struct kvm_sregs *sregs)
3605 {
3606 	vcpu_load(vcpu);
3607 
3608 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3609 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3610 
3611 	vcpu_put(vcpu);
3612 	return 0;
3613 }
3614 
3615 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3616 				  struct kvm_sregs *sregs)
3617 {
3618 	vcpu_load(vcpu);
3619 
3620 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3621 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3622 
3623 	vcpu_put(vcpu);
3624 	return 0;
3625 }
3626 
3627 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3628 {
3629 	int ret = 0;
3630 
3631 	vcpu_load(vcpu);
3632 
3633 	if (test_fp_ctl(fpu->fpc)) {
3634 		ret = -EINVAL;
3635 		goto out;
3636 	}
3637 	vcpu->run->s.regs.fpc = fpu->fpc;
3638 	if (MACHINE_HAS_VX)
3639 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3640 				 (freg_t *) fpu->fprs);
3641 	else
3642 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3643 
3644 out:
3645 	vcpu_put(vcpu);
3646 	return ret;
3647 }
3648 
3649 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3650 {
3651 	vcpu_load(vcpu);
3652 
3653 	/* make sure we have the latest values */
3654 	save_fpu_regs();
3655 	if (MACHINE_HAS_VX)
3656 		convert_vx_to_fp((freg_t *) fpu->fprs,
3657 				 (__vector128 *) vcpu->run->s.regs.vrs);
3658 	else
3659 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3660 	fpu->fpc = vcpu->run->s.regs.fpc;
3661 
3662 	vcpu_put(vcpu);
3663 	return 0;
3664 }
3665 
3666 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3667 {
3668 	int rc = 0;
3669 
3670 	if (!is_vcpu_stopped(vcpu))
3671 		rc = -EBUSY;
3672 	else {
3673 		vcpu->run->psw_mask = psw.mask;
3674 		vcpu->run->psw_addr = psw.addr;
3675 	}
3676 	return rc;
3677 }
3678 
3679 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3680 				  struct kvm_translation *tr)
3681 {
3682 	return -EINVAL; /* not implemented yet */
3683 }
3684 
3685 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3686 			      KVM_GUESTDBG_USE_HW_BP | \
3687 			      KVM_GUESTDBG_ENABLE)
3688 
3689 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3690 					struct kvm_guest_debug *dbg)
3691 {
3692 	int rc = 0;
3693 
3694 	vcpu_load(vcpu);
3695 
3696 	vcpu->guest_debug = 0;
3697 	kvm_s390_clear_bp_data(vcpu);
3698 
3699 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3700 		rc = -EINVAL;
3701 		goto out;
3702 	}
3703 	if (!sclp.has_gpere) {
3704 		rc = -EINVAL;
3705 		goto out;
3706 	}
3707 
3708 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3709 		vcpu->guest_debug = dbg->control;
3710 		/* enforce guest PER */
3711 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3712 
3713 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3714 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3715 	} else {
3716 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3717 		vcpu->arch.guestdbg.last_bp = 0;
3718 	}
3719 
3720 	if (rc) {
3721 		vcpu->guest_debug = 0;
3722 		kvm_s390_clear_bp_data(vcpu);
3723 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3724 	}
3725 
3726 out:
3727 	vcpu_put(vcpu);
3728 	return rc;
3729 }
3730 
3731 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3732 				    struct kvm_mp_state *mp_state)
3733 {
3734 	int ret;
3735 
3736 	vcpu_load(vcpu);
3737 
3738 	/* CHECK_STOP and LOAD are not supported yet */
3739 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3740 				      KVM_MP_STATE_OPERATING;
3741 
3742 	vcpu_put(vcpu);
3743 	return ret;
3744 }
3745 
3746 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3747 				    struct kvm_mp_state *mp_state)
3748 {
3749 	int rc = 0;
3750 
3751 	vcpu_load(vcpu);
3752 
3753 	/* user space knows about this interface - let it control the state */
3754 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3755 
3756 	switch (mp_state->mp_state) {
3757 	case KVM_MP_STATE_STOPPED:
3758 		rc = kvm_s390_vcpu_stop(vcpu);
3759 		break;
3760 	case KVM_MP_STATE_OPERATING:
3761 		rc = kvm_s390_vcpu_start(vcpu);
3762 		break;
3763 	case KVM_MP_STATE_LOAD:
3764 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3765 			rc = -ENXIO;
3766 			break;
3767 		}
3768 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3769 		break;
3770 	case KVM_MP_STATE_CHECK_STOP:
3771 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3772 	default:
3773 		rc = -ENXIO;
3774 	}
3775 
3776 	vcpu_put(vcpu);
3777 	return rc;
3778 }
3779 
3780 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3781 {
3782 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3783 }
3784 
3785 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3786 {
3787 retry:
3788 	kvm_s390_vcpu_request_handled(vcpu);
3789 	if (!kvm_request_pending(vcpu))
3790 		return 0;
3791 	/*
3792 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3793 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3794 	 * This ensures that the ipte instruction for this request has
3795 	 * already finished. We might race against a second unmapper that
3796 	 * wants to set the blocking bit. Lets just retry the request loop.
3797 	 */
3798 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3799 		int rc;
3800 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3801 					  kvm_s390_get_prefix(vcpu),
3802 					  PAGE_SIZE * 2, PROT_WRITE);
3803 		if (rc) {
3804 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3805 			return rc;
3806 		}
3807 		goto retry;
3808 	}
3809 
3810 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3811 		vcpu->arch.sie_block->ihcpu = 0xffff;
3812 		goto retry;
3813 	}
3814 
3815 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3816 		if (!ibs_enabled(vcpu)) {
3817 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3818 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3819 		}
3820 		goto retry;
3821 	}
3822 
3823 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3824 		if (ibs_enabled(vcpu)) {
3825 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3826 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3827 		}
3828 		goto retry;
3829 	}
3830 
3831 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3832 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3833 		goto retry;
3834 	}
3835 
3836 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3837 		/*
3838 		 * Disable CMM virtualization; we will emulate the ESSA
3839 		 * instruction manually, in order to provide additional
3840 		 * functionalities needed for live migration.
3841 		 */
3842 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3843 		goto retry;
3844 	}
3845 
3846 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3847 		/*
3848 		 * Re-enable CMM virtualization if CMMA is available and
3849 		 * CMM has been used.
3850 		 */
3851 		if ((vcpu->kvm->arch.use_cmma) &&
3852 		    (vcpu->kvm->mm->context.uses_cmm))
3853 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3854 		goto retry;
3855 	}
3856 
3857 	/* nothing to do, just clear the request */
3858 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3859 	/* we left the vsie handler, nothing to do, just clear the request */
3860 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3861 
3862 	return 0;
3863 }
3864 
3865 void kvm_s390_set_tod_clock(struct kvm *kvm,
3866 			    const struct kvm_s390_vm_tod_clock *gtod)
3867 {
3868 	struct kvm_vcpu *vcpu;
3869 	struct kvm_s390_tod_clock_ext htod;
3870 	int i;
3871 
3872 	mutex_lock(&kvm->lock);
3873 	preempt_disable();
3874 
3875 	get_tod_clock_ext((char *)&htod);
3876 
3877 	kvm->arch.epoch = gtod->tod - htod.tod;
3878 	kvm->arch.epdx = 0;
3879 	if (test_kvm_facility(kvm, 139)) {
3880 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3881 		if (kvm->arch.epoch > gtod->tod)
3882 			kvm->arch.epdx -= 1;
3883 	}
3884 
3885 	kvm_s390_vcpu_block_all(kvm);
3886 	kvm_for_each_vcpu(i, vcpu, kvm) {
3887 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3888 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3889 	}
3890 
3891 	kvm_s390_vcpu_unblock_all(kvm);
3892 	preempt_enable();
3893 	mutex_unlock(&kvm->lock);
3894 }
3895 
3896 /**
3897  * kvm_arch_fault_in_page - fault-in guest page if necessary
3898  * @vcpu: The corresponding virtual cpu
3899  * @gpa: Guest physical address
3900  * @writable: Whether the page should be writable or not
3901  *
3902  * Make sure that a guest page has been faulted-in on the host.
3903  *
3904  * Return: Zero on success, negative error code otherwise.
3905  */
3906 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3907 {
3908 	return gmap_fault(vcpu->arch.gmap, gpa,
3909 			  writable ? FAULT_FLAG_WRITE : 0);
3910 }
3911 
3912 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3913 				      unsigned long token)
3914 {
3915 	struct kvm_s390_interrupt inti;
3916 	struct kvm_s390_irq irq;
3917 
3918 	if (start_token) {
3919 		irq.u.ext.ext_params2 = token;
3920 		irq.type = KVM_S390_INT_PFAULT_INIT;
3921 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3922 	} else {
3923 		inti.type = KVM_S390_INT_PFAULT_DONE;
3924 		inti.parm64 = token;
3925 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3926 	}
3927 }
3928 
3929 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3930 				     struct kvm_async_pf *work)
3931 {
3932 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3933 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3934 
3935 	return true;
3936 }
3937 
3938 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3939 				 struct kvm_async_pf *work)
3940 {
3941 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3942 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3943 }
3944 
3945 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3946 			       struct kvm_async_pf *work)
3947 {
3948 	/* s390 will always inject the page directly */
3949 }
3950 
3951 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3952 {
3953 	/*
3954 	 * s390 will always inject the page directly,
3955 	 * but we still want check_async_completion to cleanup
3956 	 */
3957 	return true;
3958 }
3959 
3960 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3961 {
3962 	hva_t hva;
3963 	struct kvm_arch_async_pf arch;
3964 
3965 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3966 		return false;
3967 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3968 	    vcpu->arch.pfault_compare)
3969 		return false;
3970 	if (psw_extint_disabled(vcpu))
3971 		return false;
3972 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3973 		return false;
3974 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3975 		return false;
3976 	if (!vcpu->arch.gmap->pfault_enabled)
3977 		return false;
3978 
3979 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3980 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3981 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3982 		return false;
3983 
3984 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3985 }
3986 
3987 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3988 {
3989 	int rc, cpuflags;
3990 
3991 	/*
3992 	 * On s390 notifications for arriving pages will be delivered directly
3993 	 * to the guest but the house keeping for completed pfaults is
3994 	 * handled outside the worker.
3995 	 */
3996 	kvm_check_async_pf_completion(vcpu);
3997 
3998 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3999 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4000 
4001 	if (need_resched())
4002 		schedule();
4003 
4004 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4005 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4006 		if (rc)
4007 			return rc;
4008 	}
4009 
4010 	rc = kvm_s390_handle_requests(vcpu);
4011 	if (rc)
4012 		return rc;
4013 
4014 	if (guestdbg_enabled(vcpu)) {
4015 		kvm_s390_backup_guest_per_regs(vcpu);
4016 		kvm_s390_patch_guest_per_regs(vcpu);
4017 	}
4018 
4019 	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4020 
4021 	vcpu->arch.sie_block->icptcode = 0;
4022 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4023 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4024 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4025 
4026 	return 0;
4027 }
4028 
4029 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4030 {
4031 	struct kvm_s390_pgm_info pgm_info = {
4032 		.code = PGM_ADDRESSING,
4033 	};
4034 	u8 opcode, ilen;
4035 	int rc;
4036 
4037 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4038 	trace_kvm_s390_sie_fault(vcpu);
4039 
4040 	/*
4041 	 * We want to inject an addressing exception, which is defined as a
4042 	 * suppressing or terminating exception. However, since we came here
4043 	 * by a DAT access exception, the PSW still points to the faulting
4044 	 * instruction since DAT exceptions are nullifying. So we've got
4045 	 * to look up the current opcode to get the length of the instruction
4046 	 * to be able to forward the PSW.
4047 	 */
4048 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4049 	ilen = insn_length(opcode);
4050 	if (rc < 0) {
4051 		return rc;
4052 	} else if (rc) {
4053 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4054 		 * Forward by arbitrary ilc, injection will take care of
4055 		 * nullification if necessary.
4056 		 */
4057 		pgm_info = vcpu->arch.pgm;
4058 		ilen = 4;
4059 	}
4060 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4061 	kvm_s390_forward_psw(vcpu, ilen);
4062 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4063 }
4064 
4065 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4066 {
4067 	struct mcck_volatile_info *mcck_info;
4068 	struct sie_page *sie_page;
4069 
4070 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4071 		   vcpu->arch.sie_block->icptcode);
4072 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4073 
4074 	if (guestdbg_enabled(vcpu))
4075 		kvm_s390_restore_guest_per_regs(vcpu);
4076 
4077 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4078 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4079 
4080 	if (exit_reason == -EINTR) {
4081 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4082 		sie_page = container_of(vcpu->arch.sie_block,
4083 					struct sie_page, sie_block);
4084 		mcck_info = &sie_page->mcck_info;
4085 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4086 		return 0;
4087 	}
4088 
4089 	if (vcpu->arch.sie_block->icptcode > 0) {
4090 		int rc = kvm_handle_sie_intercept(vcpu);
4091 
4092 		if (rc != -EOPNOTSUPP)
4093 			return rc;
4094 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4095 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4096 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4097 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4098 		return -EREMOTE;
4099 	} else if (exit_reason != -EFAULT) {
4100 		vcpu->stat.exit_null++;
4101 		return 0;
4102 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4103 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4104 		vcpu->run->s390_ucontrol.trans_exc_code =
4105 						current->thread.gmap_addr;
4106 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4107 		return -EREMOTE;
4108 	} else if (current->thread.gmap_pfault) {
4109 		trace_kvm_s390_major_guest_pfault(vcpu);
4110 		current->thread.gmap_pfault = 0;
4111 		if (kvm_arch_setup_async_pf(vcpu))
4112 			return 0;
4113 		vcpu->stat.pfault_sync++;
4114 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4115 	}
4116 	return vcpu_post_run_fault_in_sie(vcpu);
4117 }
4118 
4119 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4120 static int __vcpu_run(struct kvm_vcpu *vcpu)
4121 {
4122 	int rc, exit_reason;
4123 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4124 
4125 	/*
4126 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4127 	 * ning the guest), so that memslots (and other stuff) are protected
4128 	 */
4129 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4130 
4131 	do {
4132 		rc = vcpu_pre_run(vcpu);
4133 		if (rc)
4134 			break;
4135 
4136 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4137 		/*
4138 		 * As PF_VCPU will be used in fault handler, between
4139 		 * guest_enter and guest_exit should be no uaccess.
4140 		 */
4141 		local_irq_disable();
4142 		guest_enter_irqoff();
4143 		__disable_cpu_timer_accounting(vcpu);
4144 		local_irq_enable();
4145 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4146 			memcpy(sie_page->pv_grregs,
4147 			       vcpu->run->s.regs.gprs,
4148 			       sizeof(sie_page->pv_grregs));
4149 		}
4150 		exit_reason = sie64a(vcpu->arch.sie_block,
4151 				     vcpu->run->s.regs.gprs);
4152 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4153 			memcpy(vcpu->run->s.regs.gprs,
4154 			       sie_page->pv_grregs,
4155 			       sizeof(sie_page->pv_grregs));
4156 			/*
4157 			 * We're not allowed to inject interrupts on intercepts
4158 			 * that leave the guest state in an "in-between" state
4159 			 * where the next SIE entry will do a continuation.
4160 			 * Fence interrupts in our "internal" PSW.
4161 			 */
4162 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4163 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4164 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4165 			}
4166 		}
4167 		local_irq_disable();
4168 		__enable_cpu_timer_accounting(vcpu);
4169 		guest_exit_irqoff();
4170 		local_irq_enable();
4171 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4172 
4173 		rc = vcpu_post_run(vcpu, exit_reason);
4174 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4175 
4176 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4177 	return rc;
4178 }
4179 
4180 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4181 {
4182 	struct kvm_run *kvm_run = vcpu->run;
4183 	struct runtime_instr_cb *riccb;
4184 	struct gs_cb *gscb;
4185 
4186 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4187 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4188 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4189 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4190 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4191 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4192 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4193 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4194 	}
4195 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4196 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4197 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4198 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4199 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4200 			kvm_clear_async_pf_completion_queue(vcpu);
4201 	}
4202 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4203 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4204 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4205 	}
4206 	/*
4207 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4208 	 * we should enable RI here instead of doing the lazy enablement.
4209 	 */
4210 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4211 	    test_kvm_facility(vcpu->kvm, 64) &&
4212 	    riccb->v &&
4213 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4214 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4215 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4216 	}
4217 	/*
4218 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4219 	 * we should enable GS here instead of doing the lazy enablement.
4220 	 */
4221 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4222 	    test_kvm_facility(vcpu->kvm, 133) &&
4223 	    gscb->gssm &&
4224 	    !vcpu->arch.gs_enabled) {
4225 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4226 		vcpu->arch.sie_block->ecb |= ECB_GS;
4227 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4228 		vcpu->arch.gs_enabled = 1;
4229 	}
4230 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4231 	    test_kvm_facility(vcpu->kvm, 82)) {
4232 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4233 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4234 	}
4235 	if (MACHINE_HAS_GS) {
4236 		preempt_disable();
4237 		__ctl_set_bit(2, 4);
4238 		if (current->thread.gs_cb) {
4239 			vcpu->arch.host_gscb = current->thread.gs_cb;
4240 			save_gs_cb(vcpu->arch.host_gscb);
4241 		}
4242 		if (vcpu->arch.gs_enabled) {
4243 			current->thread.gs_cb = (struct gs_cb *)
4244 						&vcpu->run->s.regs.gscb;
4245 			restore_gs_cb(current->thread.gs_cb);
4246 		}
4247 		preempt_enable();
4248 	}
4249 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4250 }
4251 
4252 static void sync_regs(struct kvm_vcpu *vcpu)
4253 {
4254 	struct kvm_run *kvm_run = vcpu->run;
4255 
4256 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4257 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4258 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4259 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4260 		/* some control register changes require a tlb flush */
4261 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4262 	}
4263 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4264 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4265 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4266 	}
4267 	save_access_regs(vcpu->arch.host_acrs);
4268 	restore_access_regs(vcpu->run->s.regs.acrs);
4269 	/* save host (userspace) fprs/vrs */
4270 	save_fpu_regs();
4271 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4272 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4273 	if (MACHINE_HAS_VX)
4274 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4275 	else
4276 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4277 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4278 	if (test_fp_ctl(current->thread.fpu.fpc))
4279 		/* User space provided an invalid FPC, let's clear it */
4280 		current->thread.fpu.fpc = 0;
4281 
4282 	/* Sync fmt2 only data */
4283 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4284 		sync_regs_fmt2(vcpu);
4285 	} else {
4286 		/*
4287 		 * In several places we have to modify our internal view to
4288 		 * not do things that are disallowed by the ultravisor. For
4289 		 * example we must not inject interrupts after specific exits
4290 		 * (e.g. 112 prefix page not secure). We do this by turning
4291 		 * off the machine check, external and I/O interrupt bits
4292 		 * of our PSW copy. To avoid getting validity intercepts, we
4293 		 * do only accept the condition code from userspace.
4294 		 */
4295 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4296 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4297 						   PSW_MASK_CC;
4298 	}
4299 
4300 	kvm_run->kvm_dirty_regs = 0;
4301 }
4302 
4303 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4304 {
4305 	struct kvm_run *kvm_run = vcpu->run;
4306 
4307 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4308 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4309 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4310 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4311 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4312 	if (MACHINE_HAS_GS) {
4313 		__ctl_set_bit(2, 4);
4314 		if (vcpu->arch.gs_enabled)
4315 			save_gs_cb(current->thread.gs_cb);
4316 		preempt_disable();
4317 		current->thread.gs_cb = vcpu->arch.host_gscb;
4318 		restore_gs_cb(vcpu->arch.host_gscb);
4319 		preempt_enable();
4320 		if (!vcpu->arch.host_gscb)
4321 			__ctl_clear_bit(2, 4);
4322 		vcpu->arch.host_gscb = NULL;
4323 	}
4324 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4325 }
4326 
4327 static void store_regs(struct kvm_vcpu *vcpu)
4328 {
4329 	struct kvm_run *kvm_run = vcpu->run;
4330 
4331 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4332 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4333 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4334 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4335 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4336 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4337 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4338 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4339 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4340 	save_access_regs(vcpu->run->s.regs.acrs);
4341 	restore_access_regs(vcpu->arch.host_acrs);
4342 	/* Save guest register state */
4343 	save_fpu_regs();
4344 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4345 	/* Restore will be done lazily at return */
4346 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4347 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4348 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4349 		store_regs_fmt2(vcpu);
4350 }
4351 
4352 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4353 {
4354 	struct kvm_run *kvm_run = vcpu->run;
4355 	int rc;
4356 
4357 	if (kvm_run->immediate_exit)
4358 		return -EINTR;
4359 
4360 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4361 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4362 		return -EINVAL;
4363 
4364 	vcpu_load(vcpu);
4365 
4366 	if (guestdbg_exit_pending(vcpu)) {
4367 		kvm_s390_prepare_debug_exit(vcpu);
4368 		rc = 0;
4369 		goto out;
4370 	}
4371 
4372 	kvm_sigset_activate(vcpu);
4373 
4374 	/*
4375 	 * no need to check the return value of vcpu_start as it can only have
4376 	 * an error for protvirt, but protvirt means user cpu state
4377 	 */
4378 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4379 		kvm_s390_vcpu_start(vcpu);
4380 	} else if (is_vcpu_stopped(vcpu)) {
4381 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4382 				   vcpu->vcpu_id);
4383 		rc = -EINVAL;
4384 		goto out;
4385 	}
4386 
4387 	sync_regs(vcpu);
4388 	enable_cpu_timer_accounting(vcpu);
4389 
4390 	might_fault();
4391 	rc = __vcpu_run(vcpu);
4392 
4393 	if (signal_pending(current) && !rc) {
4394 		kvm_run->exit_reason = KVM_EXIT_INTR;
4395 		rc = -EINTR;
4396 	}
4397 
4398 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4399 		kvm_s390_prepare_debug_exit(vcpu);
4400 		rc = 0;
4401 	}
4402 
4403 	if (rc == -EREMOTE) {
4404 		/* userspace support is needed, kvm_run has been prepared */
4405 		rc = 0;
4406 	}
4407 
4408 	disable_cpu_timer_accounting(vcpu);
4409 	store_regs(vcpu);
4410 
4411 	kvm_sigset_deactivate(vcpu);
4412 
4413 	vcpu->stat.exit_userspace++;
4414 out:
4415 	vcpu_put(vcpu);
4416 	return rc;
4417 }
4418 
4419 /*
4420  * store status at address
4421  * we use have two special cases:
4422  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4423  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4424  */
4425 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4426 {
4427 	unsigned char archmode = 1;
4428 	freg_t fprs[NUM_FPRS];
4429 	unsigned int px;
4430 	u64 clkcomp, cputm;
4431 	int rc;
4432 
4433 	px = kvm_s390_get_prefix(vcpu);
4434 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4435 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4436 			return -EFAULT;
4437 		gpa = 0;
4438 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4439 		if (write_guest_real(vcpu, 163, &archmode, 1))
4440 			return -EFAULT;
4441 		gpa = px;
4442 	} else
4443 		gpa -= __LC_FPREGS_SAVE_AREA;
4444 
4445 	/* manually convert vector registers if necessary */
4446 	if (MACHINE_HAS_VX) {
4447 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4448 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4449 				     fprs, 128);
4450 	} else {
4451 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4452 				     vcpu->run->s.regs.fprs, 128);
4453 	}
4454 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4455 			      vcpu->run->s.regs.gprs, 128);
4456 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4457 			      &vcpu->arch.sie_block->gpsw, 16);
4458 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4459 			      &px, 4);
4460 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4461 			      &vcpu->run->s.regs.fpc, 4);
4462 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4463 			      &vcpu->arch.sie_block->todpr, 4);
4464 	cputm = kvm_s390_get_cpu_timer(vcpu);
4465 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4466 			      &cputm, 8);
4467 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4468 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4469 			      &clkcomp, 8);
4470 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4471 			      &vcpu->run->s.regs.acrs, 64);
4472 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4473 			      &vcpu->arch.sie_block->gcr, 128);
4474 	return rc ? -EFAULT : 0;
4475 }
4476 
4477 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4478 {
4479 	/*
4480 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4481 	 * switch in the run ioctl. Let's update our copies before we save
4482 	 * it into the save area
4483 	 */
4484 	save_fpu_regs();
4485 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4486 	save_access_regs(vcpu->run->s.regs.acrs);
4487 
4488 	return kvm_s390_store_status_unloaded(vcpu, addr);
4489 }
4490 
4491 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4492 {
4493 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4494 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4495 }
4496 
4497 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4498 {
4499 	unsigned int i;
4500 	struct kvm_vcpu *vcpu;
4501 
4502 	kvm_for_each_vcpu(i, vcpu, kvm) {
4503 		__disable_ibs_on_vcpu(vcpu);
4504 	}
4505 }
4506 
4507 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4508 {
4509 	if (!sclp.has_ibs)
4510 		return;
4511 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4512 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4513 }
4514 
4515 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4516 {
4517 	int i, online_vcpus, r = 0, started_vcpus = 0;
4518 
4519 	if (!is_vcpu_stopped(vcpu))
4520 		return 0;
4521 
4522 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4523 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4524 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4525 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4526 
4527 	/* Let's tell the UV that we want to change into the operating state */
4528 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4529 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4530 		if (r) {
4531 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4532 			return r;
4533 		}
4534 	}
4535 
4536 	for (i = 0; i < online_vcpus; i++) {
4537 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4538 			started_vcpus++;
4539 	}
4540 
4541 	if (started_vcpus == 0) {
4542 		/* we're the only active VCPU -> speed it up */
4543 		__enable_ibs_on_vcpu(vcpu);
4544 	} else if (started_vcpus == 1) {
4545 		/*
4546 		 * As we are starting a second VCPU, we have to disable
4547 		 * the IBS facility on all VCPUs to remove potentially
4548 		 * oustanding ENABLE requests.
4549 		 */
4550 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4551 	}
4552 
4553 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4554 	/*
4555 	 * The real PSW might have changed due to a RESTART interpreted by the
4556 	 * ultravisor. We block all interrupts and let the next sie exit
4557 	 * refresh our view.
4558 	 */
4559 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4560 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4561 	/*
4562 	 * Another VCPU might have used IBS while we were offline.
4563 	 * Let's play safe and flush the VCPU at startup.
4564 	 */
4565 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4566 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4567 	return 0;
4568 }
4569 
4570 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4571 {
4572 	int i, online_vcpus, r = 0, started_vcpus = 0;
4573 	struct kvm_vcpu *started_vcpu = NULL;
4574 
4575 	if (is_vcpu_stopped(vcpu))
4576 		return 0;
4577 
4578 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4579 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4580 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4581 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4582 
4583 	/* Let's tell the UV that we want to change into the stopped state */
4584 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4585 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4586 		if (r) {
4587 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4588 			return r;
4589 		}
4590 	}
4591 
4592 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4593 	kvm_s390_clear_stop_irq(vcpu);
4594 
4595 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4596 	__disable_ibs_on_vcpu(vcpu);
4597 
4598 	for (i = 0; i < online_vcpus; i++) {
4599 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4600 			started_vcpus++;
4601 			started_vcpu = vcpu->kvm->vcpus[i];
4602 		}
4603 	}
4604 
4605 	if (started_vcpus == 1) {
4606 		/*
4607 		 * As we only have one VCPU left, we want to enable the
4608 		 * IBS facility for that VCPU to speed it up.
4609 		 */
4610 		__enable_ibs_on_vcpu(started_vcpu);
4611 	}
4612 
4613 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4614 	return 0;
4615 }
4616 
4617 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4618 				     struct kvm_enable_cap *cap)
4619 {
4620 	int r;
4621 
4622 	if (cap->flags)
4623 		return -EINVAL;
4624 
4625 	switch (cap->cap) {
4626 	case KVM_CAP_S390_CSS_SUPPORT:
4627 		if (!vcpu->kvm->arch.css_support) {
4628 			vcpu->kvm->arch.css_support = 1;
4629 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4630 			trace_kvm_s390_enable_css(vcpu->kvm);
4631 		}
4632 		r = 0;
4633 		break;
4634 	default:
4635 		r = -EINVAL;
4636 		break;
4637 	}
4638 	return r;
4639 }
4640 
4641 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4642 				   struct kvm_s390_mem_op *mop)
4643 {
4644 	void __user *uaddr = (void __user *)mop->buf;
4645 	int r = 0;
4646 
4647 	if (mop->flags || !mop->size)
4648 		return -EINVAL;
4649 	if (mop->size + mop->sida_offset < mop->size)
4650 		return -EINVAL;
4651 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4652 		return -E2BIG;
4653 
4654 	switch (mop->op) {
4655 	case KVM_S390_MEMOP_SIDA_READ:
4656 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4657 				 mop->sida_offset), mop->size))
4658 			r = -EFAULT;
4659 
4660 		break;
4661 	case KVM_S390_MEMOP_SIDA_WRITE:
4662 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4663 				   mop->sida_offset), uaddr, mop->size))
4664 			r = -EFAULT;
4665 		break;
4666 	}
4667 	return r;
4668 }
4669 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4670 				  struct kvm_s390_mem_op *mop)
4671 {
4672 	void __user *uaddr = (void __user *)mop->buf;
4673 	void *tmpbuf = NULL;
4674 	int r = 0;
4675 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4676 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4677 
4678 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4679 		return -EINVAL;
4680 
4681 	if (mop->size > MEM_OP_MAX_SIZE)
4682 		return -E2BIG;
4683 
4684 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4685 		return -EINVAL;
4686 
4687 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4688 		tmpbuf = vmalloc(mop->size);
4689 		if (!tmpbuf)
4690 			return -ENOMEM;
4691 	}
4692 
4693 	switch (mop->op) {
4694 	case KVM_S390_MEMOP_LOGICAL_READ:
4695 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4696 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4697 					    mop->size, GACC_FETCH);
4698 			break;
4699 		}
4700 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4701 		if (r == 0) {
4702 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4703 				r = -EFAULT;
4704 		}
4705 		break;
4706 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4707 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4708 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4709 					    mop->size, GACC_STORE);
4710 			break;
4711 		}
4712 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4713 			r = -EFAULT;
4714 			break;
4715 		}
4716 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4717 		break;
4718 	}
4719 
4720 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4721 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4722 
4723 	vfree(tmpbuf);
4724 	return r;
4725 }
4726 
4727 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4728 				      struct kvm_s390_mem_op *mop)
4729 {
4730 	int r, srcu_idx;
4731 
4732 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4733 
4734 	switch (mop->op) {
4735 	case KVM_S390_MEMOP_LOGICAL_READ:
4736 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4737 		r = kvm_s390_guest_mem_op(vcpu, mop);
4738 		break;
4739 	case KVM_S390_MEMOP_SIDA_READ:
4740 	case KVM_S390_MEMOP_SIDA_WRITE:
4741 		/* we are locked against sida going away by the vcpu->mutex */
4742 		r = kvm_s390_guest_sida_op(vcpu, mop);
4743 		break;
4744 	default:
4745 		r = -EINVAL;
4746 	}
4747 
4748 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4749 	return r;
4750 }
4751 
4752 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4753 			       unsigned int ioctl, unsigned long arg)
4754 {
4755 	struct kvm_vcpu *vcpu = filp->private_data;
4756 	void __user *argp = (void __user *)arg;
4757 
4758 	switch (ioctl) {
4759 	case KVM_S390_IRQ: {
4760 		struct kvm_s390_irq s390irq;
4761 
4762 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4763 			return -EFAULT;
4764 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4765 	}
4766 	case KVM_S390_INTERRUPT: {
4767 		struct kvm_s390_interrupt s390int;
4768 		struct kvm_s390_irq s390irq = {};
4769 
4770 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4771 			return -EFAULT;
4772 		if (s390int_to_s390irq(&s390int, &s390irq))
4773 			return -EINVAL;
4774 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4775 	}
4776 	}
4777 	return -ENOIOCTLCMD;
4778 }
4779 
4780 long kvm_arch_vcpu_ioctl(struct file *filp,
4781 			 unsigned int ioctl, unsigned long arg)
4782 {
4783 	struct kvm_vcpu *vcpu = filp->private_data;
4784 	void __user *argp = (void __user *)arg;
4785 	int idx;
4786 	long r;
4787 	u16 rc, rrc;
4788 
4789 	vcpu_load(vcpu);
4790 
4791 	switch (ioctl) {
4792 	case KVM_S390_STORE_STATUS:
4793 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4794 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4795 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4796 		break;
4797 	case KVM_S390_SET_INITIAL_PSW: {
4798 		psw_t psw;
4799 
4800 		r = -EFAULT;
4801 		if (copy_from_user(&psw, argp, sizeof(psw)))
4802 			break;
4803 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4804 		break;
4805 	}
4806 	case KVM_S390_CLEAR_RESET:
4807 		r = 0;
4808 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4809 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4810 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4811 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4812 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4813 				   rc, rrc);
4814 		}
4815 		break;
4816 	case KVM_S390_INITIAL_RESET:
4817 		r = 0;
4818 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4819 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4820 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4821 					  UVC_CMD_CPU_RESET_INITIAL,
4822 					  &rc, &rrc);
4823 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4824 				   rc, rrc);
4825 		}
4826 		break;
4827 	case KVM_S390_NORMAL_RESET:
4828 		r = 0;
4829 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4830 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4831 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4832 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4833 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4834 				   rc, rrc);
4835 		}
4836 		break;
4837 	case KVM_SET_ONE_REG:
4838 	case KVM_GET_ONE_REG: {
4839 		struct kvm_one_reg reg;
4840 		r = -EINVAL;
4841 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4842 			break;
4843 		r = -EFAULT;
4844 		if (copy_from_user(&reg, argp, sizeof(reg)))
4845 			break;
4846 		if (ioctl == KVM_SET_ONE_REG)
4847 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4848 		else
4849 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4850 		break;
4851 	}
4852 #ifdef CONFIG_KVM_S390_UCONTROL
4853 	case KVM_S390_UCAS_MAP: {
4854 		struct kvm_s390_ucas_mapping ucasmap;
4855 
4856 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4857 			r = -EFAULT;
4858 			break;
4859 		}
4860 
4861 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4862 			r = -EINVAL;
4863 			break;
4864 		}
4865 
4866 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4867 				     ucasmap.vcpu_addr, ucasmap.length);
4868 		break;
4869 	}
4870 	case KVM_S390_UCAS_UNMAP: {
4871 		struct kvm_s390_ucas_mapping ucasmap;
4872 
4873 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4874 			r = -EFAULT;
4875 			break;
4876 		}
4877 
4878 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4879 			r = -EINVAL;
4880 			break;
4881 		}
4882 
4883 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4884 			ucasmap.length);
4885 		break;
4886 	}
4887 #endif
4888 	case KVM_S390_VCPU_FAULT: {
4889 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4890 		break;
4891 	}
4892 	case KVM_ENABLE_CAP:
4893 	{
4894 		struct kvm_enable_cap cap;
4895 		r = -EFAULT;
4896 		if (copy_from_user(&cap, argp, sizeof(cap)))
4897 			break;
4898 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4899 		break;
4900 	}
4901 	case KVM_S390_MEM_OP: {
4902 		struct kvm_s390_mem_op mem_op;
4903 
4904 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4905 			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4906 		else
4907 			r = -EFAULT;
4908 		break;
4909 	}
4910 	case KVM_S390_SET_IRQ_STATE: {
4911 		struct kvm_s390_irq_state irq_state;
4912 
4913 		r = -EFAULT;
4914 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4915 			break;
4916 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4917 		    irq_state.len == 0 ||
4918 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4919 			r = -EINVAL;
4920 			break;
4921 		}
4922 		/* do not use irq_state.flags, it will break old QEMUs */
4923 		r = kvm_s390_set_irq_state(vcpu,
4924 					   (void __user *) irq_state.buf,
4925 					   irq_state.len);
4926 		break;
4927 	}
4928 	case KVM_S390_GET_IRQ_STATE: {
4929 		struct kvm_s390_irq_state irq_state;
4930 
4931 		r = -EFAULT;
4932 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4933 			break;
4934 		if (irq_state.len == 0) {
4935 			r = -EINVAL;
4936 			break;
4937 		}
4938 		/* do not use irq_state.flags, it will break old QEMUs */
4939 		r = kvm_s390_get_irq_state(vcpu,
4940 					   (__u8 __user *)  irq_state.buf,
4941 					   irq_state.len);
4942 		break;
4943 	}
4944 	default:
4945 		r = -ENOTTY;
4946 	}
4947 
4948 	vcpu_put(vcpu);
4949 	return r;
4950 }
4951 
4952 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4953 {
4954 #ifdef CONFIG_KVM_S390_UCONTROL
4955 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4956 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4957 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4958 		get_page(vmf->page);
4959 		return 0;
4960 	}
4961 #endif
4962 	return VM_FAULT_SIGBUS;
4963 }
4964 
4965 /* Section: memory related */
4966 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4967 				   struct kvm_memory_slot *memslot,
4968 				   const struct kvm_userspace_memory_region *mem,
4969 				   enum kvm_mr_change change)
4970 {
4971 	/* A few sanity checks. We can have memory slots which have to be
4972 	   located/ended at a segment boundary (1MB). The memory in userland is
4973 	   ok to be fragmented into various different vmas. It is okay to mmap()
4974 	   and munmap() stuff in this slot after doing this call at any time */
4975 
4976 	if (mem->userspace_addr & 0xffffful)
4977 		return -EINVAL;
4978 
4979 	if (mem->memory_size & 0xffffful)
4980 		return -EINVAL;
4981 
4982 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4983 		return -EINVAL;
4984 
4985 	/* When we are protected, we should not change the memory slots */
4986 	if (kvm_s390_pv_get_handle(kvm))
4987 		return -EINVAL;
4988 	return 0;
4989 }
4990 
4991 void kvm_arch_commit_memory_region(struct kvm *kvm,
4992 				const struct kvm_userspace_memory_region *mem,
4993 				struct kvm_memory_slot *old,
4994 				const struct kvm_memory_slot *new,
4995 				enum kvm_mr_change change)
4996 {
4997 	int rc = 0;
4998 
4999 	switch (change) {
5000 	case KVM_MR_DELETE:
5001 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5002 					old->npages * PAGE_SIZE);
5003 		break;
5004 	case KVM_MR_MOVE:
5005 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5006 					old->npages * PAGE_SIZE);
5007 		if (rc)
5008 			break;
5009 		fallthrough;
5010 	case KVM_MR_CREATE:
5011 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5012 				      mem->guest_phys_addr, mem->memory_size);
5013 		break;
5014 	case KVM_MR_FLAGS_ONLY:
5015 		break;
5016 	default:
5017 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5018 	}
5019 	if (rc)
5020 		pr_warn("failed to commit memory region\n");
5021 	return;
5022 }
5023 
5024 static inline unsigned long nonhyp_mask(int i)
5025 {
5026 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5027 
5028 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5029 }
5030 
5031 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5032 {
5033 	vcpu->valid_wakeup = false;
5034 }
5035 
5036 static int __init kvm_s390_init(void)
5037 {
5038 	int i;
5039 
5040 	if (!sclp.has_sief2) {
5041 		pr_info("SIE is not available\n");
5042 		return -ENODEV;
5043 	}
5044 
5045 	if (nested && hpage) {
5046 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5047 		return -EINVAL;
5048 	}
5049 
5050 	for (i = 0; i < 16; i++)
5051 		kvm_s390_fac_base[i] |=
5052 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5053 
5054 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5055 }
5056 
5057 static void __exit kvm_s390_exit(void)
5058 {
5059 	kvm_exit();
5060 }
5061 
5062 module_init(kvm_s390_init);
5063 module_exit(kvm_s390_exit);
5064 
5065 /*
5066  * Enable autoloading of the kvm module.
5067  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5068  * since x86 takes a different approach.
5069  */
5070 #include <linux/miscdevice.h>
5071 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5072 MODULE_ALIAS("devname:kvm");
5073