xref: /openbmc/linux/arch/s390/kvm/kvm-s390.c (revision 715f23b6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include "kvm-s390.h"
48 #include "gaccess.h"
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53 
54 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
58 
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61 
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
64 	{ "exit_null", VCPU_STAT(exit_null) },
65 	{ "exit_validity", VCPU_STAT(exit_validity) },
66 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
67 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
68 	{ "exit_io_request", VCPU_STAT(exit_io_request) },
69 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
71 	{ "exit_pei", VCPU_STAT(exit_pei) },
72 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 	{ "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
79 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
85 	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
86 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
87 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
88 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
89 	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
90 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
91 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
92 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
93 	{ "deliver_program", VCPU_STAT(deliver_program) },
94 	{ "deliver_io", VCPU_STAT(deliver_io) },
95 	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
96 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
97 	{ "inject_ckc", VCPU_STAT(inject_ckc) },
98 	{ "inject_cputm", VCPU_STAT(inject_cputm) },
99 	{ "inject_external_call", VCPU_STAT(inject_external_call) },
100 	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
101 	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
102 	{ "inject_io", VM_STAT(inject_io) },
103 	{ "inject_mchk", VCPU_STAT(inject_mchk) },
104 	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
105 	{ "inject_program", VCPU_STAT(inject_program) },
106 	{ "inject_restart", VCPU_STAT(inject_restart) },
107 	{ "inject_service_signal", VM_STAT(inject_service_signal) },
108 	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
109 	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
110 	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
111 	{ "inject_virtio", VM_STAT(inject_virtio) },
112 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
113 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
114 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
115 	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
116 	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
117 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
118 	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
119 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
120 	{ "instruction_sck", VCPU_STAT(instruction_sck) },
121 	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
122 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
123 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
124 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
125 	{ "instruction_iske", VCPU_STAT(instruction_iske) },
126 	{ "instruction_ri", VCPU_STAT(instruction_ri) },
127 	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
128 	{ "instruction_sske", VCPU_STAT(instruction_sske) },
129 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
130 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
131 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
132 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
133 	{ "instruction_tb", VCPU_STAT(instruction_tb) },
134 	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
135 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
136 	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
137 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
138 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
139 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
140 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
141 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
142 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
143 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
144 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
145 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
146 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
147 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
148 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
149 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
150 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
151 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
152 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
153 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
154 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
155 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
156 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
157 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
158 	{ "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
159 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
160 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
161 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
162 	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
163 	{ NULL }
164 };
165 
166 struct kvm_s390_tod_clock_ext {
167 	__u8 epoch_idx;
168 	__u64 tod;
169 	__u8 reserved[7];
170 } __packed;
171 
172 /* allow nested virtualization in KVM (if enabled by user space) */
173 static int nested;
174 module_param(nested, int, S_IRUGO);
175 MODULE_PARM_DESC(nested, "Nested virtualization support");
176 
177 /* allow 1m huge page guest backing, if !nested */
178 static int hpage;
179 module_param(hpage, int, 0444);
180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
181 
182 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
183 static u8 halt_poll_max_steal = 10;
184 module_param(halt_poll_max_steal, byte, 0644);
185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
186 
187 /*
188  * For now we handle at most 16 double words as this is what the s390 base
189  * kernel handles and stores in the prefix page. If we ever need to go beyond
190  * this, this requires changes to code, but the external uapi can stay.
191  */
192 #define SIZE_INTERNAL 16
193 
194 /*
195  * Base feature mask that defines default mask for facilities. Consists of the
196  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
197  */
198 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
199 /*
200  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
201  * and defines the facilities that can be enabled via a cpu model.
202  */
203 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
204 
205 static unsigned long kvm_s390_fac_size(void)
206 {
207 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
208 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
209 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
210 		sizeof(S390_lowcore.stfle_fac_list));
211 
212 	return SIZE_INTERNAL;
213 }
214 
215 /* available cpu features supported by kvm */
216 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
217 /* available subfunctions indicated via query / "test bit" */
218 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
219 
220 static struct gmap_notifier gmap_notifier;
221 static struct gmap_notifier vsie_gmap_notifier;
222 debug_info_t *kvm_s390_dbf;
223 
224 /* Section: not file related */
225 int kvm_arch_hardware_enable(void)
226 {
227 	/* every s390 is virtualization enabled ;-) */
228 	return 0;
229 }
230 
231 int kvm_arch_check_processor_compat(void)
232 {
233 	return 0;
234 }
235 
236 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
237 			      unsigned long end);
238 
239 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
240 {
241 	u8 delta_idx = 0;
242 
243 	/*
244 	 * The TOD jumps by delta, we have to compensate this by adding
245 	 * -delta to the epoch.
246 	 */
247 	delta = -delta;
248 
249 	/* sign-extension - we're adding to signed values below */
250 	if ((s64)delta < 0)
251 		delta_idx = -1;
252 
253 	scb->epoch += delta;
254 	if (scb->ecd & ECD_MEF) {
255 		scb->epdx += delta_idx;
256 		if (scb->epoch < delta)
257 			scb->epdx += 1;
258 	}
259 }
260 
261 /*
262  * This callback is executed during stop_machine(). All CPUs are therefore
263  * temporarily stopped. In order not to change guest behavior, we have to
264  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
265  * so a CPU won't be stopped while calculating with the epoch.
266  */
267 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
268 			  void *v)
269 {
270 	struct kvm *kvm;
271 	struct kvm_vcpu *vcpu;
272 	int i;
273 	unsigned long long *delta = v;
274 
275 	list_for_each_entry(kvm, &vm_list, vm_list) {
276 		kvm_for_each_vcpu(i, vcpu, kvm) {
277 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
278 			if (i == 0) {
279 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
280 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
281 			}
282 			if (vcpu->arch.cputm_enabled)
283 				vcpu->arch.cputm_start += *delta;
284 			if (vcpu->arch.vsie_block)
285 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
286 						   *delta);
287 		}
288 	}
289 	return NOTIFY_OK;
290 }
291 
292 static struct notifier_block kvm_clock_notifier = {
293 	.notifier_call = kvm_clock_sync,
294 };
295 
296 int kvm_arch_hardware_setup(void)
297 {
298 	gmap_notifier.notifier_call = kvm_gmap_notifier;
299 	gmap_register_pte_notifier(&gmap_notifier);
300 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
301 	gmap_register_pte_notifier(&vsie_gmap_notifier);
302 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
303 				       &kvm_clock_notifier);
304 	return 0;
305 }
306 
307 void kvm_arch_hardware_unsetup(void)
308 {
309 	gmap_unregister_pte_notifier(&gmap_notifier);
310 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
311 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
312 					 &kvm_clock_notifier);
313 }
314 
315 static void allow_cpu_feat(unsigned long nr)
316 {
317 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
318 }
319 
320 static inline int plo_test_bit(unsigned char nr)
321 {
322 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
323 	int cc;
324 
325 	asm volatile(
326 		/* Parameter registers are ignored for "test bit" */
327 		"	plo	0,0,0,0(0)\n"
328 		"	ipm	%0\n"
329 		"	srl	%0,28\n"
330 		: "=d" (cc)
331 		: "d" (r0)
332 		: "cc");
333 	return cc == 0;
334 }
335 
336 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
337 {
338 	register unsigned long r0 asm("0") = 0;	/* query function */
339 	register unsigned long r1 asm("1") = (unsigned long) query;
340 
341 	asm volatile(
342 		/* Parameter regs are ignored */
343 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
344 		:
345 		: "d" (r0), "a" (r1), [opc] "i" (opcode)
346 		: "cc", "memory");
347 }
348 
349 #define INSN_SORTL 0xb938
350 #define INSN_DFLTCC 0xb939
351 
352 static void kvm_s390_cpu_feat_init(void)
353 {
354 	int i;
355 
356 	for (i = 0; i < 256; ++i) {
357 		if (plo_test_bit(i))
358 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
359 	}
360 
361 	if (test_facility(28)) /* TOD-clock steering */
362 		ptff(kvm_s390_available_subfunc.ptff,
363 		     sizeof(kvm_s390_available_subfunc.ptff),
364 		     PTFF_QAF);
365 
366 	if (test_facility(17)) { /* MSA */
367 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
368 			      kvm_s390_available_subfunc.kmac);
369 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
370 			      kvm_s390_available_subfunc.kmc);
371 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
372 			      kvm_s390_available_subfunc.km);
373 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
374 			      kvm_s390_available_subfunc.kimd);
375 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
376 			      kvm_s390_available_subfunc.klmd);
377 	}
378 	if (test_facility(76)) /* MSA3 */
379 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
380 			      kvm_s390_available_subfunc.pckmo);
381 	if (test_facility(77)) { /* MSA4 */
382 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
383 			      kvm_s390_available_subfunc.kmctr);
384 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
385 			      kvm_s390_available_subfunc.kmf);
386 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
387 			      kvm_s390_available_subfunc.kmo);
388 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
389 			      kvm_s390_available_subfunc.pcc);
390 	}
391 	if (test_facility(57)) /* MSA5 */
392 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
393 			      kvm_s390_available_subfunc.ppno);
394 
395 	if (test_facility(146)) /* MSA8 */
396 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
397 			      kvm_s390_available_subfunc.kma);
398 
399 	if (test_facility(155)) /* MSA9 */
400 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
401 			      kvm_s390_available_subfunc.kdsa);
402 
403 	if (test_facility(150)) /* SORTL */
404 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
405 
406 	if (test_facility(151)) /* DFLTCC */
407 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
408 
409 	if (MACHINE_HAS_ESOP)
410 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
411 	/*
412 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
413 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
414 	 */
415 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
416 	    !test_facility(3) || !nested)
417 		return;
418 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
419 	if (sclp.has_64bscao)
420 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
421 	if (sclp.has_siif)
422 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
423 	if (sclp.has_gpere)
424 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
425 	if (sclp.has_gsls)
426 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
427 	if (sclp.has_ib)
428 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
429 	if (sclp.has_cei)
430 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
431 	if (sclp.has_ibs)
432 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
433 	if (sclp.has_kss)
434 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
435 	/*
436 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
437 	 * all skey handling functions read/set the skey from the PGSTE
438 	 * instead of the real storage key.
439 	 *
440 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
441 	 * pages being detected as preserved although they are resident.
442 	 *
443 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
444 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
445 	 *
446 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
447 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
448 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
449 	 *
450 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
451 	 * cannot easily shadow the SCA because of the ipte lock.
452 	 */
453 }
454 
455 int kvm_arch_init(void *opaque)
456 {
457 	int rc = -ENOMEM;
458 
459 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
460 	if (!kvm_s390_dbf)
461 		return -ENOMEM;
462 
463 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
464 		goto out;
465 
466 	kvm_s390_cpu_feat_init();
467 
468 	/* Register floating interrupt controller interface. */
469 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
470 	if (rc) {
471 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
472 		goto out;
473 	}
474 
475 	rc = kvm_s390_gib_init(GAL_ISC);
476 	if (rc)
477 		goto out;
478 
479 	return 0;
480 
481 out:
482 	kvm_arch_exit();
483 	return rc;
484 }
485 
486 void kvm_arch_exit(void)
487 {
488 	kvm_s390_gib_destroy();
489 	debug_unregister(kvm_s390_dbf);
490 }
491 
492 /* Section: device related */
493 long kvm_arch_dev_ioctl(struct file *filp,
494 			unsigned int ioctl, unsigned long arg)
495 {
496 	if (ioctl == KVM_S390_ENABLE_SIE)
497 		return s390_enable_sie();
498 	return -EINVAL;
499 }
500 
501 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
502 {
503 	int r;
504 
505 	switch (ext) {
506 	case KVM_CAP_S390_PSW:
507 	case KVM_CAP_S390_GMAP:
508 	case KVM_CAP_SYNC_MMU:
509 #ifdef CONFIG_KVM_S390_UCONTROL
510 	case KVM_CAP_S390_UCONTROL:
511 #endif
512 	case KVM_CAP_ASYNC_PF:
513 	case KVM_CAP_SYNC_REGS:
514 	case KVM_CAP_ONE_REG:
515 	case KVM_CAP_ENABLE_CAP:
516 	case KVM_CAP_S390_CSS_SUPPORT:
517 	case KVM_CAP_IOEVENTFD:
518 	case KVM_CAP_DEVICE_CTRL:
519 	case KVM_CAP_S390_IRQCHIP:
520 	case KVM_CAP_VM_ATTRIBUTES:
521 	case KVM_CAP_MP_STATE:
522 	case KVM_CAP_IMMEDIATE_EXIT:
523 	case KVM_CAP_S390_INJECT_IRQ:
524 	case KVM_CAP_S390_USER_SIGP:
525 	case KVM_CAP_S390_USER_STSI:
526 	case KVM_CAP_S390_SKEYS:
527 	case KVM_CAP_S390_IRQ_STATE:
528 	case KVM_CAP_S390_USER_INSTR0:
529 	case KVM_CAP_S390_CMMA_MIGRATION:
530 	case KVM_CAP_S390_AIS:
531 	case KVM_CAP_S390_AIS_MIGRATION:
532 		r = 1;
533 		break;
534 	case KVM_CAP_S390_HPAGE_1M:
535 		r = 0;
536 		if (hpage && !kvm_is_ucontrol(kvm))
537 			r = 1;
538 		break;
539 	case KVM_CAP_S390_MEM_OP:
540 		r = MEM_OP_MAX_SIZE;
541 		break;
542 	case KVM_CAP_NR_VCPUS:
543 	case KVM_CAP_MAX_VCPUS:
544 	case KVM_CAP_MAX_VCPU_ID:
545 		r = KVM_S390_BSCA_CPU_SLOTS;
546 		if (!kvm_s390_use_sca_entries())
547 			r = KVM_MAX_VCPUS;
548 		else if (sclp.has_esca && sclp.has_64bscao)
549 			r = KVM_S390_ESCA_CPU_SLOTS;
550 		break;
551 	case KVM_CAP_S390_COW:
552 		r = MACHINE_HAS_ESOP;
553 		break;
554 	case KVM_CAP_S390_VECTOR_REGISTERS:
555 		r = MACHINE_HAS_VX;
556 		break;
557 	case KVM_CAP_S390_RI:
558 		r = test_facility(64);
559 		break;
560 	case KVM_CAP_S390_GS:
561 		r = test_facility(133);
562 		break;
563 	case KVM_CAP_S390_BPB:
564 		r = test_facility(82);
565 		break;
566 	default:
567 		r = 0;
568 	}
569 	return r;
570 }
571 
572 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
573 				    struct kvm_memory_slot *memslot)
574 {
575 	int i;
576 	gfn_t cur_gfn, last_gfn;
577 	unsigned long gaddr, vmaddr;
578 	struct gmap *gmap = kvm->arch.gmap;
579 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
580 
581 	/* Loop over all guest segments */
582 	cur_gfn = memslot->base_gfn;
583 	last_gfn = memslot->base_gfn + memslot->npages;
584 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
585 		gaddr = gfn_to_gpa(cur_gfn);
586 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
587 		if (kvm_is_error_hva(vmaddr))
588 			continue;
589 
590 		bitmap_zero(bitmap, _PAGE_ENTRIES);
591 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
592 		for (i = 0; i < _PAGE_ENTRIES; i++) {
593 			if (test_bit(i, bitmap))
594 				mark_page_dirty(kvm, cur_gfn + i);
595 		}
596 
597 		if (fatal_signal_pending(current))
598 			return;
599 		cond_resched();
600 	}
601 }
602 
603 /* Section: vm related */
604 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
605 
606 /*
607  * Get (and clear) the dirty memory log for a memory slot.
608  */
609 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
610 			       struct kvm_dirty_log *log)
611 {
612 	int r;
613 	unsigned long n;
614 	struct kvm_memslots *slots;
615 	struct kvm_memory_slot *memslot;
616 	int is_dirty = 0;
617 
618 	if (kvm_is_ucontrol(kvm))
619 		return -EINVAL;
620 
621 	mutex_lock(&kvm->slots_lock);
622 
623 	r = -EINVAL;
624 	if (log->slot >= KVM_USER_MEM_SLOTS)
625 		goto out;
626 
627 	slots = kvm_memslots(kvm);
628 	memslot = id_to_memslot(slots, log->slot);
629 	r = -ENOENT;
630 	if (!memslot->dirty_bitmap)
631 		goto out;
632 
633 	kvm_s390_sync_dirty_log(kvm, memslot);
634 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
635 	if (r)
636 		goto out;
637 
638 	/* Clear the dirty log */
639 	if (is_dirty) {
640 		n = kvm_dirty_bitmap_bytes(memslot);
641 		memset(memslot->dirty_bitmap, 0, n);
642 	}
643 	r = 0;
644 out:
645 	mutex_unlock(&kvm->slots_lock);
646 	return r;
647 }
648 
649 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
650 {
651 	unsigned int i;
652 	struct kvm_vcpu *vcpu;
653 
654 	kvm_for_each_vcpu(i, vcpu, kvm) {
655 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
656 	}
657 }
658 
659 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
660 {
661 	int r;
662 
663 	if (cap->flags)
664 		return -EINVAL;
665 
666 	switch (cap->cap) {
667 	case KVM_CAP_S390_IRQCHIP:
668 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
669 		kvm->arch.use_irqchip = 1;
670 		r = 0;
671 		break;
672 	case KVM_CAP_S390_USER_SIGP:
673 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
674 		kvm->arch.user_sigp = 1;
675 		r = 0;
676 		break;
677 	case KVM_CAP_S390_VECTOR_REGISTERS:
678 		mutex_lock(&kvm->lock);
679 		if (kvm->created_vcpus) {
680 			r = -EBUSY;
681 		} else if (MACHINE_HAS_VX) {
682 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
683 			set_kvm_facility(kvm->arch.model.fac_list, 129);
684 			if (test_facility(134)) {
685 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
686 				set_kvm_facility(kvm->arch.model.fac_list, 134);
687 			}
688 			if (test_facility(135)) {
689 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
690 				set_kvm_facility(kvm->arch.model.fac_list, 135);
691 			}
692 			if (test_facility(148)) {
693 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
694 				set_kvm_facility(kvm->arch.model.fac_list, 148);
695 			}
696 			if (test_facility(152)) {
697 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
698 				set_kvm_facility(kvm->arch.model.fac_list, 152);
699 			}
700 			r = 0;
701 		} else
702 			r = -EINVAL;
703 		mutex_unlock(&kvm->lock);
704 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
705 			 r ? "(not available)" : "(success)");
706 		break;
707 	case KVM_CAP_S390_RI:
708 		r = -EINVAL;
709 		mutex_lock(&kvm->lock);
710 		if (kvm->created_vcpus) {
711 			r = -EBUSY;
712 		} else if (test_facility(64)) {
713 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
714 			set_kvm_facility(kvm->arch.model.fac_list, 64);
715 			r = 0;
716 		}
717 		mutex_unlock(&kvm->lock);
718 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
719 			 r ? "(not available)" : "(success)");
720 		break;
721 	case KVM_CAP_S390_AIS:
722 		mutex_lock(&kvm->lock);
723 		if (kvm->created_vcpus) {
724 			r = -EBUSY;
725 		} else {
726 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
727 			set_kvm_facility(kvm->arch.model.fac_list, 72);
728 			r = 0;
729 		}
730 		mutex_unlock(&kvm->lock);
731 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
732 			 r ? "(not available)" : "(success)");
733 		break;
734 	case KVM_CAP_S390_GS:
735 		r = -EINVAL;
736 		mutex_lock(&kvm->lock);
737 		if (kvm->created_vcpus) {
738 			r = -EBUSY;
739 		} else if (test_facility(133)) {
740 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
741 			set_kvm_facility(kvm->arch.model.fac_list, 133);
742 			r = 0;
743 		}
744 		mutex_unlock(&kvm->lock);
745 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
746 			 r ? "(not available)" : "(success)");
747 		break;
748 	case KVM_CAP_S390_HPAGE_1M:
749 		mutex_lock(&kvm->lock);
750 		if (kvm->created_vcpus)
751 			r = -EBUSY;
752 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
753 			r = -EINVAL;
754 		else {
755 			r = 0;
756 			down_write(&kvm->mm->mmap_sem);
757 			kvm->mm->context.allow_gmap_hpage_1m = 1;
758 			up_write(&kvm->mm->mmap_sem);
759 			/*
760 			 * We might have to create fake 4k page
761 			 * tables. To avoid that the hardware works on
762 			 * stale PGSTEs, we emulate these instructions.
763 			 */
764 			kvm->arch.use_skf = 0;
765 			kvm->arch.use_pfmfi = 0;
766 		}
767 		mutex_unlock(&kvm->lock);
768 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
769 			 r ? "(not available)" : "(success)");
770 		break;
771 	case KVM_CAP_S390_USER_STSI:
772 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
773 		kvm->arch.user_stsi = 1;
774 		r = 0;
775 		break;
776 	case KVM_CAP_S390_USER_INSTR0:
777 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
778 		kvm->arch.user_instr0 = 1;
779 		icpt_operexc_on_all_vcpus(kvm);
780 		r = 0;
781 		break;
782 	default:
783 		r = -EINVAL;
784 		break;
785 	}
786 	return r;
787 }
788 
789 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
790 {
791 	int ret;
792 
793 	switch (attr->attr) {
794 	case KVM_S390_VM_MEM_LIMIT_SIZE:
795 		ret = 0;
796 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
797 			 kvm->arch.mem_limit);
798 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
799 			ret = -EFAULT;
800 		break;
801 	default:
802 		ret = -ENXIO;
803 		break;
804 	}
805 	return ret;
806 }
807 
808 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
809 {
810 	int ret;
811 	unsigned int idx;
812 	switch (attr->attr) {
813 	case KVM_S390_VM_MEM_ENABLE_CMMA:
814 		ret = -ENXIO;
815 		if (!sclp.has_cmma)
816 			break;
817 
818 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
819 		mutex_lock(&kvm->lock);
820 		if (kvm->created_vcpus)
821 			ret = -EBUSY;
822 		else if (kvm->mm->context.allow_gmap_hpage_1m)
823 			ret = -EINVAL;
824 		else {
825 			kvm->arch.use_cmma = 1;
826 			/* Not compatible with cmma. */
827 			kvm->arch.use_pfmfi = 0;
828 			ret = 0;
829 		}
830 		mutex_unlock(&kvm->lock);
831 		break;
832 	case KVM_S390_VM_MEM_CLR_CMMA:
833 		ret = -ENXIO;
834 		if (!sclp.has_cmma)
835 			break;
836 		ret = -EINVAL;
837 		if (!kvm->arch.use_cmma)
838 			break;
839 
840 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
841 		mutex_lock(&kvm->lock);
842 		idx = srcu_read_lock(&kvm->srcu);
843 		s390_reset_cmma(kvm->arch.gmap->mm);
844 		srcu_read_unlock(&kvm->srcu, idx);
845 		mutex_unlock(&kvm->lock);
846 		ret = 0;
847 		break;
848 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
849 		unsigned long new_limit;
850 
851 		if (kvm_is_ucontrol(kvm))
852 			return -EINVAL;
853 
854 		if (get_user(new_limit, (u64 __user *)attr->addr))
855 			return -EFAULT;
856 
857 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
858 		    new_limit > kvm->arch.mem_limit)
859 			return -E2BIG;
860 
861 		if (!new_limit)
862 			return -EINVAL;
863 
864 		/* gmap_create takes last usable address */
865 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
866 			new_limit -= 1;
867 
868 		ret = -EBUSY;
869 		mutex_lock(&kvm->lock);
870 		if (!kvm->created_vcpus) {
871 			/* gmap_create will round the limit up */
872 			struct gmap *new = gmap_create(current->mm, new_limit);
873 
874 			if (!new) {
875 				ret = -ENOMEM;
876 			} else {
877 				gmap_remove(kvm->arch.gmap);
878 				new->private = kvm;
879 				kvm->arch.gmap = new;
880 				ret = 0;
881 			}
882 		}
883 		mutex_unlock(&kvm->lock);
884 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
885 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
886 			 (void *) kvm->arch.gmap->asce);
887 		break;
888 	}
889 	default:
890 		ret = -ENXIO;
891 		break;
892 	}
893 	return ret;
894 }
895 
896 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
897 
898 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
899 {
900 	struct kvm_vcpu *vcpu;
901 	int i;
902 
903 	kvm_s390_vcpu_block_all(kvm);
904 
905 	kvm_for_each_vcpu(i, vcpu, kvm) {
906 		kvm_s390_vcpu_crypto_setup(vcpu);
907 		/* recreate the shadow crycb by leaving the VSIE handler */
908 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
909 	}
910 
911 	kvm_s390_vcpu_unblock_all(kvm);
912 }
913 
914 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
915 {
916 	mutex_lock(&kvm->lock);
917 	switch (attr->attr) {
918 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
919 		if (!test_kvm_facility(kvm, 76)) {
920 			mutex_unlock(&kvm->lock);
921 			return -EINVAL;
922 		}
923 		get_random_bytes(
924 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
925 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
926 		kvm->arch.crypto.aes_kw = 1;
927 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
928 		break;
929 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
930 		if (!test_kvm_facility(kvm, 76)) {
931 			mutex_unlock(&kvm->lock);
932 			return -EINVAL;
933 		}
934 		get_random_bytes(
935 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
936 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
937 		kvm->arch.crypto.dea_kw = 1;
938 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
939 		break;
940 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
941 		if (!test_kvm_facility(kvm, 76)) {
942 			mutex_unlock(&kvm->lock);
943 			return -EINVAL;
944 		}
945 		kvm->arch.crypto.aes_kw = 0;
946 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
947 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
948 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
949 		break;
950 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
951 		if (!test_kvm_facility(kvm, 76)) {
952 			mutex_unlock(&kvm->lock);
953 			return -EINVAL;
954 		}
955 		kvm->arch.crypto.dea_kw = 0;
956 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
957 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
958 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
959 		break;
960 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
961 		if (!ap_instructions_available()) {
962 			mutex_unlock(&kvm->lock);
963 			return -EOPNOTSUPP;
964 		}
965 		kvm->arch.crypto.apie = 1;
966 		break;
967 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
968 		if (!ap_instructions_available()) {
969 			mutex_unlock(&kvm->lock);
970 			return -EOPNOTSUPP;
971 		}
972 		kvm->arch.crypto.apie = 0;
973 		break;
974 	default:
975 		mutex_unlock(&kvm->lock);
976 		return -ENXIO;
977 	}
978 
979 	kvm_s390_vcpu_crypto_reset_all(kvm);
980 	mutex_unlock(&kvm->lock);
981 	return 0;
982 }
983 
984 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
985 {
986 	int cx;
987 	struct kvm_vcpu *vcpu;
988 
989 	kvm_for_each_vcpu(cx, vcpu, kvm)
990 		kvm_s390_sync_request(req, vcpu);
991 }
992 
993 /*
994  * Must be called with kvm->srcu held to avoid races on memslots, and with
995  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
996  */
997 static int kvm_s390_vm_start_migration(struct kvm *kvm)
998 {
999 	struct kvm_memory_slot *ms;
1000 	struct kvm_memslots *slots;
1001 	unsigned long ram_pages = 0;
1002 	int slotnr;
1003 
1004 	/* migration mode already enabled */
1005 	if (kvm->arch.migration_mode)
1006 		return 0;
1007 	slots = kvm_memslots(kvm);
1008 	if (!slots || !slots->used_slots)
1009 		return -EINVAL;
1010 
1011 	if (!kvm->arch.use_cmma) {
1012 		kvm->arch.migration_mode = 1;
1013 		return 0;
1014 	}
1015 	/* mark all the pages in active slots as dirty */
1016 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1017 		ms = slots->memslots + slotnr;
1018 		if (!ms->dirty_bitmap)
1019 			return -EINVAL;
1020 		/*
1021 		 * The second half of the bitmap is only used on x86,
1022 		 * and would be wasted otherwise, so we put it to good
1023 		 * use here to keep track of the state of the storage
1024 		 * attributes.
1025 		 */
1026 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1027 		ram_pages += ms->npages;
1028 	}
1029 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1030 	kvm->arch.migration_mode = 1;
1031 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1032 	return 0;
1033 }
1034 
1035 /*
1036  * Must be called with kvm->slots_lock to avoid races with ourselves and
1037  * kvm_s390_vm_start_migration.
1038  */
1039 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1040 {
1041 	/* migration mode already disabled */
1042 	if (!kvm->arch.migration_mode)
1043 		return 0;
1044 	kvm->arch.migration_mode = 0;
1045 	if (kvm->arch.use_cmma)
1046 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1047 	return 0;
1048 }
1049 
1050 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1051 				     struct kvm_device_attr *attr)
1052 {
1053 	int res = -ENXIO;
1054 
1055 	mutex_lock(&kvm->slots_lock);
1056 	switch (attr->attr) {
1057 	case KVM_S390_VM_MIGRATION_START:
1058 		res = kvm_s390_vm_start_migration(kvm);
1059 		break;
1060 	case KVM_S390_VM_MIGRATION_STOP:
1061 		res = kvm_s390_vm_stop_migration(kvm);
1062 		break;
1063 	default:
1064 		break;
1065 	}
1066 	mutex_unlock(&kvm->slots_lock);
1067 
1068 	return res;
1069 }
1070 
1071 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1072 				     struct kvm_device_attr *attr)
1073 {
1074 	u64 mig = kvm->arch.migration_mode;
1075 
1076 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1077 		return -ENXIO;
1078 
1079 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1080 		return -EFAULT;
1081 	return 0;
1082 }
1083 
1084 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1085 {
1086 	struct kvm_s390_vm_tod_clock gtod;
1087 
1088 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1089 		return -EFAULT;
1090 
1091 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1092 		return -EINVAL;
1093 	kvm_s390_set_tod_clock(kvm, &gtod);
1094 
1095 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1096 		gtod.epoch_idx, gtod.tod);
1097 
1098 	return 0;
1099 }
1100 
1101 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1102 {
1103 	u8 gtod_high;
1104 
1105 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1106 					   sizeof(gtod_high)))
1107 		return -EFAULT;
1108 
1109 	if (gtod_high != 0)
1110 		return -EINVAL;
1111 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1112 
1113 	return 0;
1114 }
1115 
1116 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1117 {
1118 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1119 
1120 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1121 			   sizeof(gtod.tod)))
1122 		return -EFAULT;
1123 
1124 	kvm_s390_set_tod_clock(kvm, &gtod);
1125 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1126 	return 0;
1127 }
1128 
1129 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1130 {
1131 	int ret;
1132 
1133 	if (attr->flags)
1134 		return -EINVAL;
1135 
1136 	switch (attr->attr) {
1137 	case KVM_S390_VM_TOD_EXT:
1138 		ret = kvm_s390_set_tod_ext(kvm, attr);
1139 		break;
1140 	case KVM_S390_VM_TOD_HIGH:
1141 		ret = kvm_s390_set_tod_high(kvm, attr);
1142 		break;
1143 	case KVM_S390_VM_TOD_LOW:
1144 		ret = kvm_s390_set_tod_low(kvm, attr);
1145 		break;
1146 	default:
1147 		ret = -ENXIO;
1148 		break;
1149 	}
1150 	return ret;
1151 }
1152 
1153 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1154 				   struct kvm_s390_vm_tod_clock *gtod)
1155 {
1156 	struct kvm_s390_tod_clock_ext htod;
1157 
1158 	preempt_disable();
1159 
1160 	get_tod_clock_ext((char *)&htod);
1161 
1162 	gtod->tod = htod.tod + kvm->arch.epoch;
1163 	gtod->epoch_idx = 0;
1164 	if (test_kvm_facility(kvm, 139)) {
1165 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1166 		if (gtod->tod < htod.tod)
1167 			gtod->epoch_idx += 1;
1168 	}
1169 
1170 	preempt_enable();
1171 }
1172 
1173 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1174 {
1175 	struct kvm_s390_vm_tod_clock gtod;
1176 
1177 	memset(&gtod, 0, sizeof(gtod));
1178 	kvm_s390_get_tod_clock(kvm, &gtod);
1179 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1180 		return -EFAULT;
1181 
1182 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1183 		gtod.epoch_idx, gtod.tod);
1184 	return 0;
1185 }
1186 
1187 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1188 {
1189 	u8 gtod_high = 0;
1190 
1191 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1192 					 sizeof(gtod_high)))
1193 		return -EFAULT;
1194 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1195 
1196 	return 0;
1197 }
1198 
1199 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1200 {
1201 	u64 gtod;
1202 
1203 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1204 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1205 		return -EFAULT;
1206 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1207 
1208 	return 0;
1209 }
1210 
1211 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1212 {
1213 	int ret;
1214 
1215 	if (attr->flags)
1216 		return -EINVAL;
1217 
1218 	switch (attr->attr) {
1219 	case KVM_S390_VM_TOD_EXT:
1220 		ret = kvm_s390_get_tod_ext(kvm, attr);
1221 		break;
1222 	case KVM_S390_VM_TOD_HIGH:
1223 		ret = kvm_s390_get_tod_high(kvm, attr);
1224 		break;
1225 	case KVM_S390_VM_TOD_LOW:
1226 		ret = kvm_s390_get_tod_low(kvm, attr);
1227 		break;
1228 	default:
1229 		ret = -ENXIO;
1230 		break;
1231 	}
1232 	return ret;
1233 }
1234 
1235 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1236 {
1237 	struct kvm_s390_vm_cpu_processor *proc;
1238 	u16 lowest_ibc, unblocked_ibc;
1239 	int ret = 0;
1240 
1241 	mutex_lock(&kvm->lock);
1242 	if (kvm->created_vcpus) {
1243 		ret = -EBUSY;
1244 		goto out;
1245 	}
1246 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1247 	if (!proc) {
1248 		ret = -ENOMEM;
1249 		goto out;
1250 	}
1251 	if (!copy_from_user(proc, (void __user *)attr->addr,
1252 			    sizeof(*proc))) {
1253 		kvm->arch.model.cpuid = proc->cpuid;
1254 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1255 		unblocked_ibc = sclp.ibc & 0xfff;
1256 		if (lowest_ibc && proc->ibc) {
1257 			if (proc->ibc > unblocked_ibc)
1258 				kvm->arch.model.ibc = unblocked_ibc;
1259 			else if (proc->ibc < lowest_ibc)
1260 				kvm->arch.model.ibc = lowest_ibc;
1261 			else
1262 				kvm->arch.model.ibc = proc->ibc;
1263 		}
1264 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1265 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1266 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1267 			 kvm->arch.model.ibc,
1268 			 kvm->arch.model.cpuid);
1269 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1270 			 kvm->arch.model.fac_list[0],
1271 			 kvm->arch.model.fac_list[1],
1272 			 kvm->arch.model.fac_list[2]);
1273 	} else
1274 		ret = -EFAULT;
1275 	kfree(proc);
1276 out:
1277 	mutex_unlock(&kvm->lock);
1278 	return ret;
1279 }
1280 
1281 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1282 				       struct kvm_device_attr *attr)
1283 {
1284 	struct kvm_s390_vm_cpu_feat data;
1285 
1286 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1287 		return -EFAULT;
1288 	if (!bitmap_subset((unsigned long *) data.feat,
1289 			   kvm_s390_available_cpu_feat,
1290 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1291 		return -EINVAL;
1292 
1293 	mutex_lock(&kvm->lock);
1294 	if (kvm->created_vcpus) {
1295 		mutex_unlock(&kvm->lock);
1296 		return -EBUSY;
1297 	}
1298 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1299 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1300 	mutex_unlock(&kvm->lock);
1301 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1302 			 data.feat[0],
1303 			 data.feat[1],
1304 			 data.feat[2]);
1305 	return 0;
1306 }
1307 
1308 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1309 					  struct kvm_device_attr *attr)
1310 {
1311 	mutex_lock(&kvm->lock);
1312 	if (kvm->created_vcpus) {
1313 		mutex_unlock(&kvm->lock);
1314 		return -EBUSY;
1315 	}
1316 
1317 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1318 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1319 		mutex_unlock(&kvm->lock);
1320 		return -EFAULT;
1321 	}
1322 	mutex_unlock(&kvm->lock);
1323 
1324 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1325 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1326 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1327 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1328 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1329 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1330 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1331 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1332 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1333 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1334 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1335 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1336 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1337 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1338 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1339 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1340 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1341 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1342 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1343 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1344 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1345 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1346 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1347 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1348 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1349 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1350 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1351 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1352 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1353 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1354 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1355 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1356 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1357 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1358 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1359 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1360 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1361 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1362 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1363 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1364 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1365 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1366 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1367 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1368 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1369 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1370 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1371 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1372 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1373 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1375 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1376 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1378 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1381 
1382 	return 0;
1383 }
1384 
1385 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1386 {
1387 	int ret = -ENXIO;
1388 
1389 	switch (attr->attr) {
1390 	case KVM_S390_VM_CPU_PROCESSOR:
1391 		ret = kvm_s390_set_processor(kvm, attr);
1392 		break;
1393 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1394 		ret = kvm_s390_set_processor_feat(kvm, attr);
1395 		break;
1396 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1397 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1398 		break;
1399 	}
1400 	return ret;
1401 }
1402 
1403 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1404 {
1405 	struct kvm_s390_vm_cpu_processor *proc;
1406 	int ret = 0;
1407 
1408 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1409 	if (!proc) {
1410 		ret = -ENOMEM;
1411 		goto out;
1412 	}
1413 	proc->cpuid = kvm->arch.model.cpuid;
1414 	proc->ibc = kvm->arch.model.ibc;
1415 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1416 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1417 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1418 		 kvm->arch.model.ibc,
1419 		 kvm->arch.model.cpuid);
1420 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1421 		 kvm->arch.model.fac_list[0],
1422 		 kvm->arch.model.fac_list[1],
1423 		 kvm->arch.model.fac_list[2]);
1424 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1425 		ret = -EFAULT;
1426 	kfree(proc);
1427 out:
1428 	return ret;
1429 }
1430 
1431 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1432 {
1433 	struct kvm_s390_vm_cpu_machine *mach;
1434 	int ret = 0;
1435 
1436 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1437 	if (!mach) {
1438 		ret = -ENOMEM;
1439 		goto out;
1440 	}
1441 	get_cpu_id((struct cpuid *) &mach->cpuid);
1442 	mach->ibc = sclp.ibc;
1443 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1444 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1445 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1446 	       sizeof(S390_lowcore.stfle_fac_list));
1447 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1448 		 kvm->arch.model.ibc,
1449 		 kvm->arch.model.cpuid);
1450 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1451 		 mach->fac_mask[0],
1452 		 mach->fac_mask[1],
1453 		 mach->fac_mask[2]);
1454 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1455 		 mach->fac_list[0],
1456 		 mach->fac_list[1],
1457 		 mach->fac_list[2]);
1458 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1459 		ret = -EFAULT;
1460 	kfree(mach);
1461 out:
1462 	return ret;
1463 }
1464 
1465 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1466 				       struct kvm_device_attr *attr)
1467 {
1468 	struct kvm_s390_vm_cpu_feat data;
1469 
1470 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1471 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1472 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1473 		return -EFAULT;
1474 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1475 			 data.feat[0],
1476 			 data.feat[1],
1477 			 data.feat[2]);
1478 	return 0;
1479 }
1480 
1481 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1482 				     struct kvm_device_attr *attr)
1483 {
1484 	struct kvm_s390_vm_cpu_feat data;
1485 
1486 	bitmap_copy((unsigned long *) data.feat,
1487 		    kvm_s390_available_cpu_feat,
1488 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1489 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1490 		return -EFAULT;
1491 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1492 			 data.feat[0],
1493 			 data.feat[1],
1494 			 data.feat[2]);
1495 	return 0;
1496 }
1497 
1498 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1499 					  struct kvm_device_attr *attr)
1500 {
1501 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1502 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1503 		return -EFAULT;
1504 
1505 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1506 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1507 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1508 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1509 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1510 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1511 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1512 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1513 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1514 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1515 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1516 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1517 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1518 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1519 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1520 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1521 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1522 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1523 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1524 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1525 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1526 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1527 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1528 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1529 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1530 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1531 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1532 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1533 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1534 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1535 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1536 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1537 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1538 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1539 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1540 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1541 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1542 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1543 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1544 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1545 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1546 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1547 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1548 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1549 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1550 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1551 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1552 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1553 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1554 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1556 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1557 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1559 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1562 
1563 	return 0;
1564 }
1565 
1566 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1567 					struct kvm_device_attr *attr)
1568 {
1569 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1570 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1571 		return -EFAULT;
1572 
1573 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1574 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1575 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1576 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1577 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1578 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1579 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1580 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1581 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1582 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1583 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1584 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1585 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1586 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1587 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1588 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1589 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1590 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1591 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1592 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1593 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1594 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1595 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1596 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1597 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1598 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1599 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1600 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1601 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1602 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1603 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1604 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1605 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1606 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1607 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1608 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1609 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1610 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1611 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1612 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1613 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1614 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1615 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1616 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1617 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1618 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1619 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1620 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1621 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1622 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1624 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1625 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1627 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1630 
1631 	return 0;
1632 }
1633 
1634 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1635 {
1636 	int ret = -ENXIO;
1637 
1638 	switch (attr->attr) {
1639 	case KVM_S390_VM_CPU_PROCESSOR:
1640 		ret = kvm_s390_get_processor(kvm, attr);
1641 		break;
1642 	case KVM_S390_VM_CPU_MACHINE:
1643 		ret = kvm_s390_get_machine(kvm, attr);
1644 		break;
1645 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1646 		ret = kvm_s390_get_processor_feat(kvm, attr);
1647 		break;
1648 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1649 		ret = kvm_s390_get_machine_feat(kvm, attr);
1650 		break;
1651 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1652 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1653 		break;
1654 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1655 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1656 		break;
1657 	}
1658 	return ret;
1659 }
1660 
1661 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1662 {
1663 	int ret;
1664 
1665 	switch (attr->group) {
1666 	case KVM_S390_VM_MEM_CTRL:
1667 		ret = kvm_s390_set_mem_control(kvm, attr);
1668 		break;
1669 	case KVM_S390_VM_TOD:
1670 		ret = kvm_s390_set_tod(kvm, attr);
1671 		break;
1672 	case KVM_S390_VM_CPU_MODEL:
1673 		ret = kvm_s390_set_cpu_model(kvm, attr);
1674 		break;
1675 	case KVM_S390_VM_CRYPTO:
1676 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1677 		break;
1678 	case KVM_S390_VM_MIGRATION:
1679 		ret = kvm_s390_vm_set_migration(kvm, attr);
1680 		break;
1681 	default:
1682 		ret = -ENXIO;
1683 		break;
1684 	}
1685 
1686 	return ret;
1687 }
1688 
1689 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1690 {
1691 	int ret;
1692 
1693 	switch (attr->group) {
1694 	case KVM_S390_VM_MEM_CTRL:
1695 		ret = kvm_s390_get_mem_control(kvm, attr);
1696 		break;
1697 	case KVM_S390_VM_TOD:
1698 		ret = kvm_s390_get_tod(kvm, attr);
1699 		break;
1700 	case KVM_S390_VM_CPU_MODEL:
1701 		ret = kvm_s390_get_cpu_model(kvm, attr);
1702 		break;
1703 	case KVM_S390_VM_MIGRATION:
1704 		ret = kvm_s390_vm_get_migration(kvm, attr);
1705 		break;
1706 	default:
1707 		ret = -ENXIO;
1708 		break;
1709 	}
1710 
1711 	return ret;
1712 }
1713 
1714 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1715 {
1716 	int ret;
1717 
1718 	switch (attr->group) {
1719 	case KVM_S390_VM_MEM_CTRL:
1720 		switch (attr->attr) {
1721 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1722 		case KVM_S390_VM_MEM_CLR_CMMA:
1723 			ret = sclp.has_cmma ? 0 : -ENXIO;
1724 			break;
1725 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1726 			ret = 0;
1727 			break;
1728 		default:
1729 			ret = -ENXIO;
1730 			break;
1731 		}
1732 		break;
1733 	case KVM_S390_VM_TOD:
1734 		switch (attr->attr) {
1735 		case KVM_S390_VM_TOD_LOW:
1736 		case KVM_S390_VM_TOD_HIGH:
1737 			ret = 0;
1738 			break;
1739 		default:
1740 			ret = -ENXIO;
1741 			break;
1742 		}
1743 		break;
1744 	case KVM_S390_VM_CPU_MODEL:
1745 		switch (attr->attr) {
1746 		case KVM_S390_VM_CPU_PROCESSOR:
1747 		case KVM_S390_VM_CPU_MACHINE:
1748 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1749 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1750 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1751 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1752 			ret = 0;
1753 			break;
1754 		default:
1755 			ret = -ENXIO;
1756 			break;
1757 		}
1758 		break;
1759 	case KVM_S390_VM_CRYPTO:
1760 		switch (attr->attr) {
1761 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1762 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1763 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1764 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1765 			ret = 0;
1766 			break;
1767 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1768 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1769 			ret = ap_instructions_available() ? 0 : -ENXIO;
1770 			break;
1771 		default:
1772 			ret = -ENXIO;
1773 			break;
1774 		}
1775 		break;
1776 	case KVM_S390_VM_MIGRATION:
1777 		ret = 0;
1778 		break;
1779 	default:
1780 		ret = -ENXIO;
1781 		break;
1782 	}
1783 
1784 	return ret;
1785 }
1786 
1787 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1788 {
1789 	uint8_t *keys;
1790 	uint64_t hva;
1791 	int srcu_idx, i, r = 0;
1792 
1793 	if (args->flags != 0)
1794 		return -EINVAL;
1795 
1796 	/* Is this guest using storage keys? */
1797 	if (!mm_uses_skeys(current->mm))
1798 		return KVM_S390_GET_SKEYS_NONE;
1799 
1800 	/* Enforce sane limit on memory allocation */
1801 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1802 		return -EINVAL;
1803 
1804 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1805 	if (!keys)
1806 		return -ENOMEM;
1807 
1808 	down_read(&current->mm->mmap_sem);
1809 	srcu_idx = srcu_read_lock(&kvm->srcu);
1810 	for (i = 0; i < args->count; i++) {
1811 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1812 		if (kvm_is_error_hva(hva)) {
1813 			r = -EFAULT;
1814 			break;
1815 		}
1816 
1817 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1818 		if (r)
1819 			break;
1820 	}
1821 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1822 	up_read(&current->mm->mmap_sem);
1823 
1824 	if (!r) {
1825 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1826 				 sizeof(uint8_t) * args->count);
1827 		if (r)
1828 			r = -EFAULT;
1829 	}
1830 
1831 	kvfree(keys);
1832 	return r;
1833 }
1834 
1835 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1836 {
1837 	uint8_t *keys;
1838 	uint64_t hva;
1839 	int srcu_idx, i, r = 0;
1840 	bool unlocked;
1841 
1842 	if (args->flags != 0)
1843 		return -EINVAL;
1844 
1845 	/* Enforce sane limit on memory allocation */
1846 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1847 		return -EINVAL;
1848 
1849 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1850 	if (!keys)
1851 		return -ENOMEM;
1852 
1853 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1854 			   sizeof(uint8_t) * args->count);
1855 	if (r) {
1856 		r = -EFAULT;
1857 		goto out;
1858 	}
1859 
1860 	/* Enable storage key handling for the guest */
1861 	r = s390_enable_skey();
1862 	if (r)
1863 		goto out;
1864 
1865 	i = 0;
1866 	down_read(&current->mm->mmap_sem);
1867 	srcu_idx = srcu_read_lock(&kvm->srcu);
1868         while (i < args->count) {
1869 		unlocked = false;
1870 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1871 		if (kvm_is_error_hva(hva)) {
1872 			r = -EFAULT;
1873 			break;
1874 		}
1875 
1876 		/* Lowest order bit is reserved */
1877 		if (keys[i] & 0x01) {
1878 			r = -EINVAL;
1879 			break;
1880 		}
1881 
1882 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1883 		if (r) {
1884 			r = fixup_user_fault(current, current->mm, hva,
1885 					     FAULT_FLAG_WRITE, &unlocked);
1886 			if (r)
1887 				break;
1888 		}
1889 		if (!r)
1890 			i++;
1891 	}
1892 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1893 	up_read(&current->mm->mmap_sem);
1894 out:
1895 	kvfree(keys);
1896 	return r;
1897 }
1898 
1899 /*
1900  * Base address and length must be sent at the start of each block, therefore
1901  * it's cheaper to send some clean data, as long as it's less than the size of
1902  * two longs.
1903  */
1904 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1905 /* for consistency */
1906 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1907 
1908 /*
1909  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1910  * address falls in a hole. In that case the index of one of the memslots
1911  * bordering the hole is returned.
1912  */
1913 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1914 {
1915 	int start = 0, end = slots->used_slots;
1916 	int slot = atomic_read(&slots->lru_slot);
1917 	struct kvm_memory_slot *memslots = slots->memslots;
1918 
1919 	if (gfn >= memslots[slot].base_gfn &&
1920 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1921 		return slot;
1922 
1923 	while (start < end) {
1924 		slot = start + (end - start) / 2;
1925 
1926 		if (gfn >= memslots[slot].base_gfn)
1927 			end = slot;
1928 		else
1929 			start = slot + 1;
1930 	}
1931 
1932 	if (gfn >= memslots[start].base_gfn &&
1933 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1934 		atomic_set(&slots->lru_slot, start);
1935 	}
1936 
1937 	return start;
1938 }
1939 
1940 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1941 			      u8 *res, unsigned long bufsize)
1942 {
1943 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1944 
1945 	args->count = 0;
1946 	while (args->count < bufsize) {
1947 		hva = gfn_to_hva(kvm, cur_gfn);
1948 		/*
1949 		 * We return an error if the first value was invalid, but we
1950 		 * return successfully if at least one value was copied.
1951 		 */
1952 		if (kvm_is_error_hva(hva))
1953 			return args->count ? 0 : -EFAULT;
1954 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1955 			pgstev = 0;
1956 		res[args->count++] = (pgstev >> 24) & 0x43;
1957 		cur_gfn++;
1958 	}
1959 
1960 	return 0;
1961 }
1962 
1963 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1964 					      unsigned long cur_gfn)
1965 {
1966 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1967 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1968 	unsigned long ofs = cur_gfn - ms->base_gfn;
1969 
1970 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1971 		slotidx--;
1972 		/* If we are above the highest slot, wrap around */
1973 		if (slotidx < 0)
1974 			slotidx = slots->used_slots - 1;
1975 
1976 		ms = slots->memslots + slotidx;
1977 		ofs = 0;
1978 	}
1979 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1980 	while ((slotidx > 0) && (ofs >= ms->npages)) {
1981 		slotidx--;
1982 		ms = slots->memslots + slotidx;
1983 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1984 	}
1985 	return ms->base_gfn + ofs;
1986 }
1987 
1988 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1989 			     u8 *res, unsigned long bufsize)
1990 {
1991 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1992 	struct kvm_memslots *slots = kvm_memslots(kvm);
1993 	struct kvm_memory_slot *ms;
1994 
1995 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1996 	ms = gfn_to_memslot(kvm, cur_gfn);
1997 	args->count = 0;
1998 	args->start_gfn = cur_gfn;
1999 	if (!ms)
2000 		return 0;
2001 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2002 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2003 
2004 	while (args->count < bufsize) {
2005 		hva = gfn_to_hva(kvm, cur_gfn);
2006 		if (kvm_is_error_hva(hva))
2007 			return 0;
2008 		/* Decrement only if we actually flipped the bit to 0 */
2009 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2010 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2011 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2012 			pgstev = 0;
2013 		/* Save the value */
2014 		res[args->count++] = (pgstev >> 24) & 0x43;
2015 		/* If the next bit is too far away, stop. */
2016 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2017 			return 0;
2018 		/* If we reached the previous "next", find the next one */
2019 		if (cur_gfn == next_gfn)
2020 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2021 		/* Reached the end of memory or of the buffer, stop */
2022 		if ((next_gfn >= mem_end) ||
2023 		    (next_gfn - args->start_gfn >= bufsize))
2024 			return 0;
2025 		cur_gfn++;
2026 		/* Reached the end of the current memslot, take the next one. */
2027 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2028 			ms = gfn_to_memslot(kvm, cur_gfn);
2029 			if (!ms)
2030 				return 0;
2031 		}
2032 	}
2033 	return 0;
2034 }
2035 
2036 /*
2037  * This function searches for the next page with dirty CMMA attributes, and
2038  * saves the attributes in the buffer up to either the end of the buffer or
2039  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2040  * no trailing clean bytes are saved.
2041  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2042  * output buffer will indicate 0 as length.
2043  */
2044 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2045 				  struct kvm_s390_cmma_log *args)
2046 {
2047 	unsigned long bufsize;
2048 	int srcu_idx, peek, ret;
2049 	u8 *values;
2050 
2051 	if (!kvm->arch.use_cmma)
2052 		return -ENXIO;
2053 	/* Invalid/unsupported flags were specified */
2054 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2055 		return -EINVAL;
2056 	/* Migration mode query, and we are not doing a migration */
2057 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2058 	if (!peek && !kvm->arch.migration_mode)
2059 		return -EINVAL;
2060 	/* CMMA is disabled or was not used, or the buffer has length zero */
2061 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2062 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2063 		memset(args, 0, sizeof(*args));
2064 		return 0;
2065 	}
2066 	/* We are not peeking, and there are no dirty pages */
2067 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2068 		memset(args, 0, sizeof(*args));
2069 		return 0;
2070 	}
2071 
2072 	values = vmalloc(bufsize);
2073 	if (!values)
2074 		return -ENOMEM;
2075 
2076 	down_read(&kvm->mm->mmap_sem);
2077 	srcu_idx = srcu_read_lock(&kvm->srcu);
2078 	if (peek)
2079 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2080 	else
2081 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2082 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2083 	up_read(&kvm->mm->mmap_sem);
2084 
2085 	if (kvm->arch.migration_mode)
2086 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2087 	else
2088 		args->remaining = 0;
2089 
2090 	if (copy_to_user((void __user *)args->values, values, args->count))
2091 		ret = -EFAULT;
2092 
2093 	vfree(values);
2094 	return ret;
2095 }
2096 
2097 /*
2098  * This function sets the CMMA attributes for the given pages. If the input
2099  * buffer has zero length, no action is taken, otherwise the attributes are
2100  * set and the mm->context.uses_cmm flag is set.
2101  */
2102 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2103 				  const struct kvm_s390_cmma_log *args)
2104 {
2105 	unsigned long hva, mask, pgstev, i;
2106 	uint8_t *bits;
2107 	int srcu_idx, r = 0;
2108 
2109 	mask = args->mask;
2110 
2111 	if (!kvm->arch.use_cmma)
2112 		return -ENXIO;
2113 	/* invalid/unsupported flags */
2114 	if (args->flags != 0)
2115 		return -EINVAL;
2116 	/* Enforce sane limit on memory allocation */
2117 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2118 		return -EINVAL;
2119 	/* Nothing to do */
2120 	if (args->count == 0)
2121 		return 0;
2122 
2123 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2124 	if (!bits)
2125 		return -ENOMEM;
2126 
2127 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2128 	if (r) {
2129 		r = -EFAULT;
2130 		goto out;
2131 	}
2132 
2133 	down_read(&kvm->mm->mmap_sem);
2134 	srcu_idx = srcu_read_lock(&kvm->srcu);
2135 	for (i = 0; i < args->count; i++) {
2136 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2137 		if (kvm_is_error_hva(hva)) {
2138 			r = -EFAULT;
2139 			break;
2140 		}
2141 
2142 		pgstev = bits[i];
2143 		pgstev = pgstev << 24;
2144 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2145 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2146 	}
2147 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2148 	up_read(&kvm->mm->mmap_sem);
2149 
2150 	if (!kvm->mm->context.uses_cmm) {
2151 		down_write(&kvm->mm->mmap_sem);
2152 		kvm->mm->context.uses_cmm = 1;
2153 		up_write(&kvm->mm->mmap_sem);
2154 	}
2155 out:
2156 	vfree(bits);
2157 	return r;
2158 }
2159 
2160 long kvm_arch_vm_ioctl(struct file *filp,
2161 		       unsigned int ioctl, unsigned long arg)
2162 {
2163 	struct kvm *kvm = filp->private_data;
2164 	void __user *argp = (void __user *)arg;
2165 	struct kvm_device_attr attr;
2166 	int r;
2167 
2168 	switch (ioctl) {
2169 	case KVM_S390_INTERRUPT: {
2170 		struct kvm_s390_interrupt s390int;
2171 
2172 		r = -EFAULT;
2173 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2174 			break;
2175 		r = kvm_s390_inject_vm(kvm, &s390int);
2176 		break;
2177 	}
2178 	case KVM_CREATE_IRQCHIP: {
2179 		struct kvm_irq_routing_entry routing;
2180 
2181 		r = -EINVAL;
2182 		if (kvm->arch.use_irqchip) {
2183 			/* Set up dummy routing. */
2184 			memset(&routing, 0, sizeof(routing));
2185 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2186 		}
2187 		break;
2188 	}
2189 	case KVM_SET_DEVICE_ATTR: {
2190 		r = -EFAULT;
2191 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2192 			break;
2193 		r = kvm_s390_vm_set_attr(kvm, &attr);
2194 		break;
2195 	}
2196 	case KVM_GET_DEVICE_ATTR: {
2197 		r = -EFAULT;
2198 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2199 			break;
2200 		r = kvm_s390_vm_get_attr(kvm, &attr);
2201 		break;
2202 	}
2203 	case KVM_HAS_DEVICE_ATTR: {
2204 		r = -EFAULT;
2205 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2206 			break;
2207 		r = kvm_s390_vm_has_attr(kvm, &attr);
2208 		break;
2209 	}
2210 	case KVM_S390_GET_SKEYS: {
2211 		struct kvm_s390_skeys args;
2212 
2213 		r = -EFAULT;
2214 		if (copy_from_user(&args, argp,
2215 				   sizeof(struct kvm_s390_skeys)))
2216 			break;
2217 		r = kvm_s390_get_skeys(kvm, &args);
2218 		break;
2219 	}
2220 	case KVM_S390_SET_SKEYS: {
2221 		struct kvm_s390_skeys args;
2222 
2223 		r = -EFAULT;
2224 		if (copy_from_user(&args, argp,
2225 				   sizeof(struct kvm_s390_skeys)))
2226 			break;
2227 		r = kvm_s390_set_skeys(kvm, &args);
2228 		break;
2229 	}
2230 	case KVM_S390_GET_CMMA_BITS: {
2231 		struct kvm_s390_cmma_log args;
2232 
2233 		r = -EFAULT;
2234 		if (copy_from_user(&args, argp, sizeof(args)))
2235 			break;
2236 		mutex_lock(&kvm->slots_lock);
2237 		r = kvm_s390_get_cmma_bits(kvm, &args);
2238 		mutex_unlock(&kvm->slots_lock);
2239 		if (!r) {
2240 			r = copy_to_user(argp, &args, sizeof(args));
2241 			if (r)
2242 				r = -EFAULT;
2243 		}
2244 		break;
2245 	}
2246 	case KVM_S390_SET_CMMA_BITS: {
2247 		struct kvm_s390_cmma_log args;
2248 
2249 		r = -EFAULT;
2250 		if (copy_from_user(&args, argp, sizeof(args)))
2251 			break;
2252 		mutex_lock(&kvm->slots_lock);
2253 		r = kvm_s390_set_cmma_bits(kvm, &args);
2254 		mutex_unlock(&kvm->slots_lock);
2255 		break;
2256 	}
2257 	default:
2258 		r = -ENOTTY;
2259 	}
2260 
2261 	return r;
2262 }
2263 
2264 static int kvm_s390_apxa_installed(void)
2265 {
2266 	struct ap_config_info info;
2267 
2268 	if (ap_instructions_available()) {
2269 		if (ap_qci(&info) == 0)
2270 			return info.apxa;
2271 	}
2272 
2273 	return 0;
2274 }
2275 
2276 /*
2277  * The format of the crypto control block (CRYCB) is specified in the 3 low
2278  * order bits of the CRYCB designation (CRYCBD) field as follows:
2279  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2280  *	     AP extended addressing (APXA) facility are installed.
2281  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2282  * Format 2: Both the APXA and MSAX3 facilities are installed
2283  */
2284 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2285 {
2286 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2287 
2288 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2289 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2290 
2291 	/* Check whether MSAX3 is installed */
2292 	if (!test_kvm_facility(kvm, 76))
2293 		return;
2294 
2295 	if (kvm_s390_apxa_installed())
2296 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2297 	else
2298 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2299 }
2300 
2301 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2302 			       unsigned long *aqm, unsigned long *adm)
2303 {
2304 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2305 
2306 	mutex_lock(&kvm->lock);
2307 	kvm_s390_vcpu_block_all(kvm);
2308 
2309 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2310 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2311 		memcpy(crycb->apcb1.apm, apm, 32);
2312 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2313 			 apm[0], apm[1], apm[2], apm[3]);
2314 		memcpy(crycb->apcb1.aqm, aqm, 32);
2315 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2316 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2317 		memcpy(crycb->apcb1.adm, adm, 32);
2318 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2319 			 adm[0], adm[1], adm[2], adm[3]);
2320 		break;
2321 	case CRYCB_FORMAT1:
2322 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2323 		memcpy(crycb->apcb0.apm, apm, 8);
2324 		memcpy(crycb->apcb0.aqm, aqm, 2);
2325 		memcpy(crycb->apcb0.adm, adm, 2);
2326 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2327 			 apm[0], *((unsigned short *)aqm),
2328 			 *((unsigned short *)adm));
2329 		break;
2330 	default:	/* Can not happen */
2331 		break;
2332 	}
2333 
2334 	/* recreate the shadow crycb for each vcpu */
2335 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2336 	kvm_s390_vcpu_unblock_all(kvm);
2337 	mutex_unlock(&kvm->lock);
2338 }
2339 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2340 
2341 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2342 {
2343 	mutex_lock(&kvm->lock);
2344 	kvm_s390_vcpu_block_all(kvm);
2345 
2346 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2347 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2348 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2349 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2350 
2351 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2352 	/* recreate the shadow crycb for each vcpu */
2353 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2354 	kvm_s390_vcpu_unblock_all(kvm);
2355 	mutex_unlock(&kvm->lock);
2356 }
2357 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2358 
2359 static u64 kvm_s390_get_initial_cpuid(void)
2360 {
2361 	struct cpuid cpuid;
2362 
2363 	get_cpu_id(&cpuid);
2364 	cpuid.version = 0xff;
2365 	return *((u64 *) &cpuid);
2366 }
2367 
2368 static void kvm_s390_crypto_init(struct kvm *kvm)
2369 {
2370 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2371 	kvm_s390_set_crycb_format(kvm);
2372 
2373 	if (!test_kvm_facility(kvm, 76))
2374 		return;
2375 
2376 	/* Enable AES/DEA protected key functions by default */
2377 	kvm->arch.crypto.aes_kw = 1;
2378 	kvm->arch.crypto.dea_kw = 1;
2379 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2380 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2381 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2382 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2383 }
2384 
2385 static void sca_dispose(struct kvm *kvm)
2386 {
2387 	if (kvm->arch.use_esca)
2388 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2389 	else
2390 		free_page((unsigned long)(kvm->arch.sca));
2391 	kvm->arch.sca = NULL;
2392 }
2393 
2394 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2395 {
2396 	gfp_t alloc_flags = GFP_KERNEL;
2397 	int i, rc;
2398 	char debug_name[16];
2399 	static unsigned long sca_offset;
2400 
2401 	rc = -EINVAL;
2402 #ifdef CONFIG_KVM_S390_UCONTROL
2403 	if (type & ~KVM_VM_S390_UCONTROL)
2404 		goto out_err;
2405 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2406 		goto out_err;
2407 #else
2408 	if (type)
2409 		goto out_err;
2410 #endif
2411 
2412 	rc = s390_enable_sie();
2413 	if (rc)
2414 		goto out_err;
2415 
2416 	rc = -ENOMEM;
2417 
2418 	if (!sclp.has_64bscao)
2419 		alloc_flags |= GFP_DMA;
2420 	rwlock_init(&kvm->arch.sca_lock);
2421 	/* start with basic SCA */
2422 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2423 	if (!kvm->arch.sca)
2424 		goto out_err;
2425 	mutex_lock(&kvm_lock);
2426 	sca_offset += 16;
2427 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2428 		sca_offset = 0;
2429 	kvm->arch.sca = (struct bsca_block *)
2430 			((char *) kvm->arch.sca + sca_offset);
2431 	mutex_unlock(&kvm_lock);
2432 
2433 	sprintf(debug_name, "kvm-%u", current->pid);
2434 
2435 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2436 	if (!kvm->arch.dbf)
2437 		goto out_err;
2438 
2439 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2440 	kvm->arch.sie_page2 =
2441 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2442 	if (!kvm->arch.sie_page2)
2443 		goto out_err;
2444 
2445 	kvm->arch.sie_page2->kvm = kvm;
2446 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2447 
2448 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2449 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2450 					      (kvm_s390_fac_base[i] |
2451 					       kvm_s390_fac_ext[i]);
2452 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2453 					      kvm_s390_fac_base[i];
2454 	}
2455 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2456 
2457 	/* we are always in czam mode - even on pre z14 machines */
2458 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2459 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2460 	/* we emulate STHYI in kvm */
2461 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2462 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2463 	if (MACHINE_HAS_TLB_GUEST) {
2464 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2465 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2466 	}
2467 
2468 	if (css_general_characteristics.aiv && test_facility(65))
2469 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2470 
2471 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2472 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2473 
2474 	kvm_s390_crypto_init(kvm);
2475 
2476 	mutex_init(&kvm->arch.float_int.ais_lock);
2477 	spin_lock_init(&kvm->arch.float_int.lock);
2478 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2479 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2480 	init_waitqueue_head(&kvm->arch.ipte_wq);
2481 	mutex_init(&kvm->arch.ipte_mutex);
2482 
2483 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2484 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2485 
2486 	if (type & KVM_VM_S390_UCONTROL) {
2487 		kvm->arch.gmap = NULL;
2488 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2489 	} else {
2490 		if (sclp.hamax == U64_MAX)
2491 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2492 		else
2493 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2494 						    sclp.hamax + 1);
2495 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2496 		if (!kvm->arch.gmap)
2497 			goto out_err;
2498 		kvm->arch.gmap->private = kvm;
2499 		kvm->arch.gmap->pfault_enabled = 0;
2500 	}
2501 
2502 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2503 	kvm->arch.use_skf = sclp.has_skey;
2504 	spin_lock_init(&kvm->arch.start_stop_lock);
2505 	kvm_s390_vsie_init(kvm);
2506 	kvm_s390_gisa_init(kvm);
2507 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2508 
2509 	return 0;
2510 out_err:
2511 	free_page((unsigned long)kvm->arch.sie_page2);
2512 	debug_unregister(kvm->arch.dbf);
2513 	sca_dispose(kvm);
2514 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2515 	return rc;
2516 }
2517 
2518 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2519 {
2520 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2521 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2522 	kvm_s390_clear_local_irqs(vcpu);
2523 	kvm_clear_async_pf_completion_queue(vcpu);
2524 	if (!kvm_is_ucontrol(vcpu->kvm))
2525 		sca_del_vcpu(vcpu);
2526 
2527 	if (kvm_is_ucontrol(vcpu->kvm))
2528 		gmap_remove(vcpu->arch.gmap);
2529 
2530 	if (vcpu->kvm->arch.use_cmma)
2531 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2532 	free_page((unsigned long)(vcpu->arch.sie_block));
2533 
2534 	kvm_vcpu_uninit(vcpu);
2535 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2536 }
2537 
2538 static void kvm_free_vcpus(struct kvm *kvm)
2539 {
2540 	unsigned int i;
2541 	struct kvm_vcpu *vcpu;
2542 
2543 	kvm_for_each_vcpu(i, vcpu, kvm)
2544 		kvm_arch_vcpu_destroy(vcpu);
2545 
2546 	mutex_lock(&kvm->lock);
2547 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2548 		kvm->vcpus[i] = NULL;
2549 
2550 	atomic_set(&kvm->online_vcpus, 0);
2551 	mutex_unlock(&kvm->lock);
2552 }
2553 
2554 void kvm_arch_destroy_vm(struct kvm *kvm)
2555 {
2556 	kvm_free_vcpus(kvm);
2557 	sca_dispose(kvm);
2558 	debug_unregister(kvm->arch.dbf);
2559 	kvm_s390_gisa_destroy(kvm);
2560 	free_page((unsigned long)kvm->arch.sie_page2);
2561 	if (!kvm_is_ucontrol(kvm))
2562 		gmap_remove(kvm->arch.gmap);
2563 	kvm_s390_destroy_adapters(kvm);
2564 	kvm_s390_clear_float_irqs(kvm);
2565 	kvm_s390_vsie_destroy(kvm);
2566 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2567 }
2568 
2569 /* Section: vcpu related */
2570 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2571 {
2572 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2573 	if (!vcpu->arch.gmap)
2574 		return -ENOMEM;
2575 	vcpu->arch.gmap->private = vcpu->kvm;
2576 
2577 	return 0;
2578 }
2579 
2580 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2581 {
2582 	if (!kvm_s390_use_sca_entries())
2583 		return;
2584 	read_lock(&vcpu->kvm->arch.sca_lock);
2585 	if (vcpu->kvm->arch.use_esca) {
2586 		struct esca_block *sca = vcpu->kvm->arch.sca;
2587 
2588 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2589 		sca->cpu[vcpu->vcpu_id].sda = 0;
2590 	} else {
2591 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2592 
2593 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2594 		sca->cpu[vcpu->vcpu_id].sda = 0;
2595 	}
2596 	read_unlock(&vcpu->kvm->arch.sca_lock);
2597 }
2598 
2599 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2600 {
2601 	if (!kvm_s390_use_sca_entries()) {
2602 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2603 
2604 		/* we still need the basic sca for the ipte control */
2605 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2606 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2607 		return;
2608 	}
2609 	read_lock(&vcpu->kvm->arch.sca_lock);
2610 	if (vcpu->kvm->arch.use_esca) {
2611 		struct esca_block *sca = vcpu->kvm->arch.sca;
2612 
2613 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2614 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2615 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2616 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2617 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2618 	} else {
2619 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2620 
2621 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2622 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2623 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2624 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2625 	}
2626 	read_unlock(&vcpu->kvm->arch.sca_lock);
2627 }
2628 
2629 /* Basic SCA to Extended SCA data copy routines */
2630 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2631 {
2632 	d->sda = s->sda;
2633 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2634 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2635 }
2636 
2637 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2638 {
2639 	int i;
2640 
2641 	d->ipte_control = s->ipte_control;
2642 	d->mcn[0] = s->mcn;
2643 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2644 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2645 }
2646 
2647 static int sca_switch_to_extended(struct kvm *kvm)
2648 {
2649 	struct bsca_block *old_sca = kvm->arch.sca;
2650 	struct esca_block *new_sca;
2651 	struct kvm_vcpu *vcpu;
2652 	unsigned int vcpu_idx;
2653 	u32 scaol, scaoh;
2654 
2655 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2656 	if (!new_sca)
2657 		return -ENOMEM;
2658 
2659 	scaoh = (u32)((u64)(new_sca) >> 32);
2660 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2661 
2662 	kvm_s390_vcpu_block_all(kvm);
2663 	write_lock(&kvm->arch.sca_lock);
2664 
2665 	sca_copy_b_to_e(new_sca, old_sca);
2666 
2667 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2668 		vcpu->arch.sie_block->scaoh = scaoh;
2669 		vcpu->arch.sie_block->scaol = scaol;
2670 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2671 	}
2672 	kvm->arch.sca = new_sca;
2673 	kvm->arch.use_esca = 1;
2674 
2675 	write_unlock(&kvm->arch.sca_lock);
2676 	kvm_s390_vcpu_unblock_all(kvm);
2677 
2678 	free_page((unsigned long)old_sca);
2679 
2680 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2681 		 old_sca, kvm->arch.sca);
2682 	return 0;
2683 }
2684 
2685 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2686 {
2687 	int rc;
2688 
2689 	if (!kvm_s390_use_sca_entries()) {
2690 		if (id < KVM_MAX_VCPUS)
2691 			return true;
2692 		return false;
2693 	}
2694 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2695 		return true;
2696 	if (!sclp.has_esca || !sclp.has_64bscao)
2697 		return false;
2698 
2699 	mutex_lock(&kvm->lock);
2700 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2701 	mutex_unlock(&kvm->lock);
2702 
2703 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2704 }
2705 
2706 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2707 {
2708 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2709 	kvm_clear_async_pf_completion_queue(vcpu);
2710 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2711 				    KVM_SYNC_GPRS |
2712 				    KVM_SYNC_ACRS |
2713 				    KVM_SYNC_CRS |
2714 				    KVM_SYNC_ARCH0 |
2715 				    KVM_SYNC_PFAULT;
2716 	kvm_s390_set_prefix(vcpu, 0);
2717 	if (test_kvm_facility(vcpu->kvm, 64))
2718 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2719 	if (test_kvm_facility(vcpu->kvm, 82))
2720 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2721 	if (test_kvm_facility(vcpu->kvm, 133))
2722 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2723 	if (test_kvm_facility(vcpu->kvm, 156))
2724 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2725 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2726 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2727 	 */
2728 	if (MACHINE_HAS_VX)
2729 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2730 	else
2731 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2732 
2733 	if (kvm_is_ucontrol(vcpu->kvm))
2734 		return __kvm_ucontrol_vcpu_init(vcpu);
2735 
2736 	return 0;
2737 }
2738 
2739 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2740 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2741 {
2742 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2743 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2744 	vcpu->arch.cputm_start = get_tod_clock_fast();
2745 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2746 }
2747 
2748 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2749 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2750 {
2751 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2752 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2753 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2754 	vcpu->arch.cputm_start = 0;
2755 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2756 }
2757 
2758 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2759 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2760 {
2761 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2762 	vcpu->arch.cputm_enabled = true;
2763 	__start_cpu_timer_accounting(vcpu);
2764 }
2765 
2766 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2767 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2768 {
2769 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2770 	__stop_cpu_timer_accounting(vcpu);
2771 	vcpu->arch.cputm_enabled = false;
2772 }
2773 
2774 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2775 {
2776 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2777 	__enable_cpu_timer_accounting(vcpu);
2778 	preempt_enable();
2779 }
2780 
2781 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2782 {
2783 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2784 	__disable_cpu_timer_accounting(vcpu);
2785 	preempt_enable();
2786 }
2787 
2788 /* set the cpu timer - may only be called from the VCPU thread itself */
2789 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2790 {
2791 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2792 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2793 	if (vcpu->arch.cputm_enabled)
2794 		vcpu->arch.cputm_start = get_tod_clock_fast();
2795 	vcpu->arch.sie_block->cputm = cputm;
2796 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2797 	preempt_enable();
2798 }
2799 
2800 /* update and get the cpu timer - can also be called from other VCPU threads */
2801 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2802 {
2803 	unsigned int seq;
2804 	__u64 value;
2805 
2806 	if (unlikely(!vcpu->arch.cputm_enabled))
2807 		return vcpu->arch.sie_block->cputm;
2808 
2809 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2810 	do {
2811 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2812 		/*
2813 		 * If the writer would ever execute a read in the critical
2814 		 * section, e.g. in irq context, we have a deadlock.
2815 		 */
2816 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2817 		value = vcpu->arch.sie_block->cputm;
2818 		/* if cputm_start is 0, accounting is being started/stopped */
2819 		if (likely(vcpu->arch.cputm_start))
2820 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2821 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2822 	preempt_enable();
2823 	return value;
2824 }
2825 
2826 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2827 {
2828 
2829 	gmap_enable(vcpu->arch.enabled_gmap);
2830 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2831 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2832 		__start_cpu_timer_accounting(vcpu);
2833 	vcpu->cpu = cpu;
2834 }
2835 
2836 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2837 {
2838 	vcpu->cpu = -1;
2839 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2840 		__stop_cpu_timer_accounting(vcpu);
2841 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2842 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2843 	gmap_disable(vcpu->arch.enabled_gmap);
2844 
2845 }
2846 
2847 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2848 {
2849 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2850 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2851 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2852 	kvm_s390_set_prefix(vcpu, 0);
2853 	kvm_s390_set_cpu_timer(vcpu, 0);
2854 	vcpu->arch.sie_block->ckc       = 0UL;
2855 	vcpu->arch.sie_block->todpr     = 0;
2856 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2857 	vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2858 					CR0_INTERRUPT_KEY_SUBMASK |
2859 					CR0_MEASUREMENT_ALERT_SUBMASK;
2860 	vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2861 					CR14_UNUSED_33 |
2862 					CR14_EXTERNAL_DAMAGE_SUBMASK;
2863 	/* make sure the new fpc will be lazily loaded */
2864 	save_fpu_regs();
2865 	current->thread.fpu.fpc = 0;
2866 	vcpu->arch.sie_block->gbea = 1;
2867 	vcpu->arch.sie_block->pp = 0;
2868 	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2869 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2870 	kvm_clear_async_pf_completion_queue(vcpu);
2871 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2872 		kvm_s390_vcpu_stop(vcpu);
2873 	kvm_s390_clear_local_irqs(vcpu);
2874 }
2875 
2876 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2877 {
2878 	mutex_lock(&vcpu->kvm->lock);
2879 	preempt_disable();
2880 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2881 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2882 	preempt_enable();
2883 	mutex_unlock(&vcpu->kvm->lock);
2884 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2885 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2886 		sca_add_vcpu(vcpu);
2887 	}
2888 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2889 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2890 	/* make vcpu_load load the right gmap on the first trigger */
2891 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2892 }
2893 
2894 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2895 {
2896 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2897 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2898 		return true;
2899 	return false;
2900 }
2901 
2902 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2903 {
2904 	/* At least one ECC subfunction must be present */
2905 	return kvm_has_pckmo_subfunc(kvm, 32) ||
2906 	       kvm_has_pckmo_subfunc(kvm, 33) ||
2907 	       kvm_has_pckmo_subfunc(kvm, 34) ||
2908 	       kvm_has_pckmo_subfunc(kvm, 40) ||
2909 	       kvm_has_pckmo_subfunc(kvm, 41);
2910 
2911 }
2912 
2913 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2914 {
2915 	/*
2916 	 * If the AP instructions are not being interpreted and the MSAX3
2917 	 * facility is not configured for the guest, there is nothing to set up.
2918 	 */
2919 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2920 		return;
2921 
2922 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2923 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2924 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
2925 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2926 
2927 	if (vcpu->kvm->arch.crypto.apie)
2928 		vcpu->arch.sie_block->eca |= ECA_APIE;
2929 
2930 	/* Set up protected key support */
2931 	if (vcpu->kvm->arch.crypto.aes_kw) {
2932 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2933 		/* ecc is also wrapped with AES key */
2934 		if (kvm_has_pckmo_ecc(vcpu->kvm))
2935 			vcpu->arch.sie_block->ecd |= ECD_ECC;
2936 	}
2937 
2938 	if (vcpu->kvm->arch.crypto.dea_kw)
2939 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2940 }
2941 
2942 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2943 {
2944 	free_page(vcpu->arch.sie_block->cbrlo);
2945 	vcpu->arch.sie_block->cbrlo = 0;
2946 }
2947 
2948 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2949 {
2950 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2951 	if (!vcpu->arch.sie_block->cbrlo)
2952 		return -ENOMEM;
2953 	return 0;
2954 }
2955 
2956 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2957 {
2958 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2959 
2960 	vcpu->arch.sie_block->ibc = model->ibc;
2961 	if (test_kvm_facility(vcpu->kvm, 7))
2962 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2963 }
2964 
2965 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2966 {
2967 	int rc = 0;
2968 
2969 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2970 						    CPUSTAT_SM |
2971 						    CPUSTAT_STOPPED);
2972 
2973 	if (test_kvm_facility(vcpu->kvm, 78))
2974 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2975 	else if (test_kvm_facility(vcpu->kvm, 8))
2976 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2977 
2978 	kvm_s390_vcpu_setup_model(vcpu);
2979 
2980 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2981 	if (MACHINE_HAS_ESOP)
2982 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2983 	if (test_kvm_facility(vcpu->kvm, 9))
2984 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2985 	if (test_kvm_facility(vcpu->kvm, 73))
2986 		vcpu->arch.sie_block->ecb |= ECB_TE;
2987 
2988 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2989 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2990 	if (test_kvm_facility(vcpu->kvm, 130))
2991 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2992 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2993 	if (sclp.has_cei)
2994 		vcpu->arch.sie_block->eca |= ECA_CEI;
2995 	if (sclp.has_ib)
2996 		vcpu->arch.sie_block->eca |= ECA_IB;
2997 	if (sclp.has_siif)
2998 		vcpu->arch.sie_block->eca |= ECA_SII;
2999 	if (sclp.has_sigpif)
3000 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3001 	if (test_kvm_facility(vcpu->kvm, 129)) {
3002 		vcpu->arch.sie_block->eca |= ECA_VX;
3003 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3004 	}
3005 	if (test_kvm_facility(vcpu->kvm, 139))
3006 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3007 	if (test_kvm_facility(vcpu->kvm, 156))
3008 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3009 	if (vcpu->arch.sie_block->gd) {
3010 		vcpu->arch.sie_block->eca |= ECA_AIV;
3011 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3012 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3013 	}
3014 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3015 					| SDNXC;
3016 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3017 
3018 	if (sclp.has_kss)
3019 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3020 	else
3021 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3022 
3023 	if (vcpu->kvm->arch.use_cmma) {
3024 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3025 		if (rc)
3026 			return rc;
3027 	}
3028 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3029 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3030 
3031 	vcpu->arch.sie_block->hpid = HPID_KVM;
3032 
3033 	kvm_s390_vcpu_crypto_setup(vcpu);
3034 
3035 	return rc;
3036 }
3037 
3038 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
3039 				      unsigned int id)
3040 {
3041 	struct kvm_vcpu *vcpu;
3042 	struct sie_page *sie_page;
3043 	int rc = -EINVAL;
3044 
3045 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3046 		goto out;
3047 
3048 	rc = -ENOMEM;
3049 
3050 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
3051 	if (!vcpu)
3052 		goto out;
3053 
3054 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3055 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3056 	if (!sie_page)
3057 		goto out_free_cpu;
3058 
3059 	vcpu->arch.sie_block = &sie_page->sie_block;
3060 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3061 
3062 	/* the real guest size will always be smaller than msl */
3063 	vcpu->arch.sie_block->mso = 0;
3064 	vcpu->arch.sie_block->msl = sclp.hamax;
3065 
3066 	vcpu->arch.sie_block->icpua = id;
3067 	spin_lock_init(&vcpu->arch.local_int.lock);
3068 	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
3069 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3070 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3071 	seqcount_init(&vcpu->arch.cputm_seqcount);
3072 
3073 	rc = kvm_vcpu_init(vcpu, kvm, id);
3074 	if (rc)
3075 		goto out_free_sie_block;
3076 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
3077 		 vcpu->arch.sie_block);
3078 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
3079 
3080 	return vcpu;
3081 out_free_sie_block:
3082 	free_page((unsigned long)(vcpu->arch.sie_block));
3083 out_free_cpu:
3084 	kmem_cache_free(kvm_vcpu_cache, vcpu);
3085 out:
3086 	return ERR_PTR(rc);
3087 }
3088 
3089 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3090 {
3091 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3092 }
3093 
3094 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3095 {
3096 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3097 }
3098 
3099 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3100 {
3101 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3102 	exit_sie(vcpu);
3103 }
3104 
3105 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3106 {
3107 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3108 }
3109 
3110 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3111 {
3112 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3113 	exit_sie(vcpu);
3114 }
3115 
3116 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3117 {
3118 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3119 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3120 }
3121 
3122 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3123 {
3124 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3125 }
3126 
3127 /*
3128  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3129  * If the CPU is not running (e.g. waiting as idle) the function will
3130  * return immediately. */
3131 void exit_sie(struct kvm_vcpu *vcpu)
3132 {
3133 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3134 	kvm_s390_vsie_kick(vcpu);
3135 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3136 		cpu_relax();
3137 }
3138 
3139 /* Kick a guest cpu out of SIE to process a request synchronously */
3140 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3141 {
3142 	kvm_make_request(req, vcpu);
3143 	kvm_s390_vcpu_request(vcpu);
3144 }
3145 
3146 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3147 			      unsigned long end)
3148 {
3149 	struct kvm *kvm = gmap->private;
3150 	struct kvm_vcpu *vcpu;
3151 	unsigned long prefix;
3152 	int i;
3153 
3154 	if (gmap_is_shadow(gmap))
3155 		return;
3156 	if (start >= 1UL << 31)
3157 		/* We are only interested in prefix pages */
3158 		return;
3159 	kvm_for_each_vcpu(i, vcpu, kvm) {
3160 		/* match against both prefix pages */
3161 		prefix = kvm_s390_get_prefix(vcpu);
3162 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3163 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3164 				   start, end);
3165 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3166 		}
3167 	}
3168 }
3169 
3170 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3171 {
3172 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3173 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3174 	    halt_poll_max_steal) {
3175 		vcpu->stat.halt_no_poll_steal++;
3176 		return true;
3177 	}
3178 	return false;
3179 }
3180 
3181 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3182 {
3183 	/* kvm common code refers to this, but never calls it */
3184 	BUG();
3185 	return 0;
3186 }
3187 
3188 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3189 					   struct kvm_one_reg *reg)
3190 {
3191 	int r = -EINVAL;
3192 
3193 	switch (reg->id) {
3194 	case KVM_REG_S390_TODPR:
3195 		r = put_user(vcpu->arch.sie_block->todpr,
3196 			     (u32 __user *)reg->addr);
3197 		break;
3198 	case KVM_REG_S390_EPOCHDIFF:
3199 		r = put_user(vcpu->arch.sie_block->epoch,
3200 			     (u64 __user *)reg->addr);
3201 		break;
3202 	case KVM_REG_S390_CPU_TIMER:
3203 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3204 			     (u64 __user *)reg->addr);
3205 		break;
3206 	case KVM_REG_S390_CLOCK_COMP:
3207 		r = put_user(vcpu->arch.sie_block->ckc,
3208 			     (u64 __user *)reg->addr);
3209 		break;
3210 	case KVM_REG_S390_PFTOKEN:
3211 		r = put_user(vcpu->arch.pfault_token,
3212 			     (u64 __user *)reg->addr);
3213 		break;
3214 	case KVM_REG_S390_PFCOMPARE:
3215 		r = put_user(vcpu->arch.pfault_compare,
3216 			     (u64 __user *)reg->addr);
3217 		break;
3218 	case KVM_REG_S390_PFSELECT:
3219 		r = put_user(vcpu->arch.pfault_select,
3220 			     (u64 __user *)reg->addr);
3221 		break;
3222 	case KVM_REG_S390_PP:
3223 		r = put_user(vcpu->arch.sie_block->pp,
3224 			     (u64 __user *)reg->addr);
3225 		break;
3226 	case KVM_REG_S390_GBEA:
3227 		r = put_user(vcpu->arch.sie_block->gbea,
3228 			     (u64 __user *)reg->addr);
3229 		break;
3230 	default:
3231 		break;
3232 	}
3233 
3234 	return r;
3235 }
3236 
3237 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3238 					   struct kvm_one_reg *reg)
3239 {
3240 	int r = -EINVAL;
3241 	__u64 val;
3242 
3243 	switch (reg->id) {
3244 	case KVM_REG_S390_TODPR:
3245 		r = get_user(vcpu->arch.sie_block->todpr,
3246 			     (u32 __user *)reg->addr);
3247 		break;
3248 	case KVM_REG_S390_EPOCHDIFF:
3249 		r = get_user(vcpu->arch.sie_block->epoch,
3250 			     (u64 __user *)reg->addr);
3251 		break;
3252 	case KVM_REG_S390_CPU_TIMER:
3253 		r = get_user(val, (u64 __user *)reg->addr);
3254 		if (!r)
3255 			kvm_s390_set_cpu_timer(vcpu, val);
3256 		break;
3257 	case KVM_REG_S390_CLOCK_COMP:
3258 		r = get_user(vcpu->arch.sie_block->ckc,
3259 			     (u64 __user *)reg->addr);
3260 		break;
3261 	case KVM_REG_S390_PFTOKEN:
3262 		r = get_user(vcpu->arch.pfault_token,
3263 			     (u64 __user *)reg->addr);
3264 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3265 			kvm_clear_async_pf_completion_queue(vcpu);
3266 		break;
3267 	case KVM_REG_S390_PFCOMPARE:
3268 		r = get_user(vcpu->arch.pfault_compare,
3269 			     (u64 __user *)reg->addr);
3270 		break;
3271 	case KVM_REG_S390_PFSELECT:
3272 		r = get_user(vcpu->arch.pfault_select,
3273 			     (u64 __user *)reg->addr);
3274 		break;
3275 	case KVM_REG_S390_PP:
3276 		r = get_user(vcpu->arch.sie_block->pp,
3277 			     (u64 __user *)reg->addr);
3278 		break;
3279 	case KVM_REG_S390_GBEA:
3280 		r = get_user(vcpu->arch.sie_block->gbea,
3281 			     (u64 __user *)reg->addr);
3282 		break;
3283 	default:
3284 		break;
3285 	}
3286 
3287 	return r;
3288 }
3289 
3290 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3291 {
3292 	kvm_s390_vcpu_initial_reset(vcpu);
3293 	return 0;
3294 }
3295 
3296 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3297 {
3298 	vcpu_load(vcpu);
3299 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3300 	vcpu_put(vcpu);
3301 	return 0;
3302 }
3303 
3304 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3305 {
3306 	vcpu_load(vcpu);
3307 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3308 	vcpu_put(vcpu);
3309 	return 0;
3310 }
3311 
3312 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3313 				  struct kvm_sregs *sregs)
3314 {
3315 	vcpu_load(vcpu);
3316 
3317 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3318 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3319 
3320 	vcpu_put(vcpu);
3321 	return 0;
3322 }
3323 
3324 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3325 				  struct kvm_sregs *sregs)
3326 {
3327 	vcpu_load(vcpu);
3328 
3329 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3330 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3331 
3332 	vcpu_put(vcpu);
3333 	return 0;
3334 }
3335 
3336 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3337 {
3338 	int ret = 0;
3339 
3340 	vcpu_load(vcpu);
3341 
3342 	if (test_fp_ctl(fpu->fpc)) {
3343 		ret = -EINVAL;
3344 		goto out;
3345 	}
3346 	vcpu->run->s.regs.fpc = fpu->fpc;
3347 	if (MACHINE_HAS_VX)
3348 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3349 				 (freg_t *) fpu->fprs);
3350 	else
3351 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3352 
3353 out:
3354 	vcpu_put(vcpu);
3355 	return ret;
3356 }
3357 
3358 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3359 {
3360 	vcpu_load(vcpu);
3361 
3362 	/* make sure we have the latest values */
3363 	save_fpu_regs();
3364 	if (MACHINE_HAS_VX)
3365 		convert_vx_to_fp((freg_t *) fpu->fprs,
3366 				 (__vector128 *) vcpu->run->s.regs.vrs);
3367 	else
3368 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3369 	fpu->fpc = vcpu->run->s.regs.fpc;
3370 
3371 	vcpu_put(vcpu);
3372 	return 0;
3373 }
3374 
3375 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3376 {
3377 	int rc = 0;
3378 
3379 	if (!is_vcpu_stopped(vcpu))
3380 		rc = -EBUSY;
3381 	else {
3382 		vcpu->run->psw_mask = psw.mask;
3383 		vcpu->run->psw_addr = psw.addr;
3384 	}
3385 	return rc;
3386 }
3387 
3388 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3389 				  struct kvm_translation *tr)
3390 {
3391 	return -EINVAL; /* not implemented yet */
3392 }
3393 
3394 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3395 			      KVM_GUESTDBG_USE_HW_BP | \
3396 			      KVM_GUESTDBG_ENABLE)
3397 
3398 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3399 					struct kvm_guest_debug *dbg)
3400 {
3401 	int rc = 0;
3402 
3403 	vcpu_load(vcpu);
3404 
3405 	vcpu->guest_debug = 0;
3406 	kvm_s390_clear_bp_data(vcpu);
3407 
3408 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3409 		rc = -EINVAL;
3410 		goto out;
3411 	}
3412 	if (!sclp.has_gpere) {
3413 		rc = -EINVAL;
3414 		goto out;
3415 	}
3416 
3417 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3418 		vcpu->guest_debug = dbg->control;
3419 		/* enforce guest PER */
3420 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3421 
3422 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3423 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3424 	} else {
3425 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3426 		vcpu->arch.guestdbg.last_bp = 0;
3427 	}
3428 
3429 	if (rc) {
3430 		vcpu->guest_debug = 0;
3431 		kvm_s390_clear_bp_data(vcpu);
3432 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3433 	}
3434 
3435 out:
3436 	vcpu_put(vcpu);
3437 	return rc;
3438 }
3439 
3440 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3441 				    struct kvm_mp_state *mp_state)
3442 {
3443 	int ret;
3444 
3445 	vcpu_load(vcpu);
3446 
3447 	/* CHECK_STOP and LOAD are not supported yet */
3448 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3449 				      KVM_MP_STATE_OPERATING;
3450 
3451 	vcpu_put(vcpu);
3452 	return ret;
3453 }
3454 
3455 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3456 				    struct kvm_mp_state *mp_state)
3457 {
3458 	int rc = 0;
3459 
3460 	vcpu_load(vcpu);
3461 
3462 	/* user space knows about this interface - let it control the state */
3463 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3464 
3465 	switch (mp_state->mp_state) {
3466 	case KVM_MP_STATE_STOPPED:
3467 		kvm_s390_vcpu_stop(vcpu);
3468 		break;
3469 	case KVM_MP_STATE_OPERATING:
3470 		kvm_s390_vcpu_start(vcpu);
3471 		break;
3472 	case KVM_MP_STATE_LOAD:
3473 	case KVM_MP_STATE_CHECK_STOP:
3474 		/* fall through - CHECK_STOP and LOAD are not supported yet */
3475 	default:
3476 		rc = -ENXIO;
3477 	}
3478 
3479 	vcpu_put(vcpu);
3480 	return rc;
3481 }
3482 
3483 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3484 {
3485 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3486 }
3487 
3488 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3489 {
3490 retry:
3491 	kvm_s390_vcpu_request_handled(vcpu);
3492 	if (!kvm_request_pending(vcpu))
3493 		return 0;
3494 	/*
3495 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3496 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3497 	 * This ensures that the ipte instruction for this request has
3498 	 * already finished. We might race against a second unmapper that
3499 	 * wants to set the blocking bit. Lets just retry the request loop.
3500 	 */
3501 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3502 		int rc;
3503 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3504 					  kvm_s390_get_prefix(vcpu),
3505 					  PAGE_SIZE * 2, PROT_WRITE);
3506 		if (rc) {
3507 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3508 			return rc;
3509 		}
3510 		goto retry;
3511 	}
3512 
3513 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3514 		vcpu->arch.sie_block->ihcpu = 0xffff;
3515 		goto retry;
3516 	}
3517 
3518 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3519 		if (!ibs_enabled(vcpu)) {
3520 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3521 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3522 		}
3523 		goto retry;
3524 	}
3525 
3526 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3527 		if (ibs_enabled(vcpu)) {
3528 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3529 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3530 		}
3531 		goto retry;
3532 	}
3533 
3534 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3535 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3536 		goto retry;
3537 	}
3538 
3539 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3540 		/*
3541 		 * Disable CMM virtualization; we will emulate the ESSA
3542 		 * instruction manually, in order to provide additional
3543 		 * functionalities needed for live migration.
3544 		 */
3545 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3546 		goto retry;
3547 	}
3548 
3549 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3550 		/*
3551 		 * Re-enable CMM virtualization if CMMA is available and
3552 		 * CMM has been used.
3553 		 */
3554 		if ((vcpu->kvm->arch.use_cmma) &&
3555 		    (vcpu->kvm->mm->context.uses_cmm))
3556 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3557 		goto retry;
3558 	}
3559 
3560 	/* nothing to do, just clear the request */
3561 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3562 	/* we left the vsie handler, nothing to do, just clear the request */
3563 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3564 
3565 	return 0;
3566 }
3567 
3568 void kvm_s390_set_tod_clock(struct kvm *kvm,
3569 			    const struct kvm_s390_vm_tod_clock *gtod)
3570 {
3571 	struct kvm_vcpu *vcpu;
3572 	struct kvm_s390_tod_clock_ext htod;
3573 	int i;
3574 
3575 	mutex_lock(&kvm->lock);
3576 	preempt_disable();
3577 
3578 	get_tod_clock_ext((char *)&htod);
3579 
3580 	kvm->arch.epoch = gtod->tod - htod.tod;
3581 	kvm->arch.epdx = 0;
3582 	if (test_kvm_facility(kvm, 139)) {
3583 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3584 		if (kvm->arch.epoch > gtod->tod)
3585 			kvm->arch.epdx -= 1;
3586 	}
3587 
3588 	kvm_s390_vcpu_block_all(kvm);
3589 	kvm_for_each_vcpu(i, vcpu, kvm) {
3590 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3591 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3592 	}
3593 
3594 	kvm_s390_vcpu_unblock_all(kvm);
3595 	preempt_enable();
3596 	mutex_unlock(&kvm->lock);
3597 }
3598 
3599 /**
3600  * kvm_arch_fault_in_page - fault-in guest page if necessary
3601  * @vcpu: The corresponding virtual cpu
3602  * @gpa: Guest physical address
3603  * @writable: Whether the page should be writable or not
3604  *
3605  * Make sure that a guest page has been faulted-in on the host.
3606  *
3607  * Return: Zero on success, negative error code otherwise.
3608  */
3609 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3610 {
3611 	return gmap_fault(vcpu->arch.gmap, gpa,
3612 			  writable ? FAULT_FLAG_WRITE : 0);
3613 }
3614 
3615 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3616 				      unsigned long token)
3617 {
3618 	struct kvm_s390_interrupt inti;
3619 	struct kvm_s390_irq irq;
3620 
3621 	if (start_token) {
3622 		irq.u.ext.ext_params2 = token;
3623 		irq.type = KVM_S390_INT_PFAULT_INIT;
3624 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3625 	} else {
3626 		inti.type = KVM_S390_INT_PFAULT_DONE;
3627 		inti.parm64 = token;
3628 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3629 	}
3630 }
3631 
3632 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3633 				     struct kvm_async_pf *work)
3634 {
3635 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3636 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3637 }
3638 
3639 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3640 				 struct kvm_async_pf *work)
3641 {
3642 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3643 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3644 }
3645 
3646 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3647 			       struct kvm_async_pf *work)
3648 {
3649 	/* s390 will always inject the page directly */
3650 }
3651 
3652 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3653 {
3654 	/*
3655 	 * s390 will always inject the page directly,
3656 	 * but we still want check_async_completion to cleanup
3657 	 */
3658 	return true;
3659 }
3660 
3661 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3662 {
3663 	hva_t hva;
3664 	struct kvm_arch_async_pf arch;
3665 	int rc;
3666 
3667 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3668 		return 0;
3669 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3670 	    vcpu->arch.pfault_compare)
3671 		return 0;
3672 	if (psw_extint_disabled(vcpu))
3673 		return 0;
3674 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3675 		return 0;
3676 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3677 		return 0;
3678 	if (!vcpu->arch.gmap->pfault_enabled)
3679 		return 0;
3680 
3681 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3682 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3683 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3684 		return 0;
3685 
3686 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3687 	return rc;
3688 }
3689 
3690 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3691 {
3692 	int rc, cpuflags;
3693 
3694 	/*
3695 	 * On s390 notifications for arriving pages will be delivered directly
3696 	 * to the guest but the house keeping for completed pfaults is
3697 	 * handled outside the worker.
3698 	 */
3699 	kvm_check_async_pf_completion(vcpu);
3700 
3701 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3702 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3703 
3704 	if (need_resched())
3705 		schedule();
3706 
3707 	if (test_cpu_flag(CIF_MCCK_PENDING))
3708 		s390_handle_mcck();
3709 
3710 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3711 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3712 		if (rc)
3713 			return rc;
3714 	}
3715 
3716 	rc = kvm_s390_handle_requests(vcpu);
3717 	if (rc)
3718 		return rc;
3719 
3720 	if (guestdbg_enabled(vcpu)) {
3721 		kvm_s390_backup_guest_per_regs(vcpu);
3722 		kvm_s390_patch_guest_per_regs(vcpu);
3723 	}
3724 
3725 	clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3726 
3727 	vcpu->arch.sie_block->icptcode = 0;
3728 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3729 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3730 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3731 
3732 	return 0;
3733 }
3734 
3735 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3736 {
3737 	struct kvm_s390_pgm_info pgm_info = {
3738 		.code = PGM_ADDRESSING,
3739 	};
3740 	u8 opcode, ilen;
3741 	int rc;
3742 
3743 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3744 	trace_kvm_s390_sie_fault(vcpu);
3745 
3746 	/*
3747 	 * We want to inject an addressing exception, which is defined as a
3748 	 * suppressing or terminating exception. However, since we came here
3749 	 * by a DAT access exception, the PSW still points to the faulting
3750 	 * instruction since DAT exceptions are nullifying. So we've got
3751 	 * to look up the current opcode to get the length of the instruction
3752 	 * to be able to forward the PSW.
3753 	 */
3754 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3755 	ilen = insn_length(opcode);
3756 	if (rc < 0) {
3757 		return rc;
3758 	} else if (rc) {
3759 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3760 		 * Forward by arbitrary ilc, injection will take care of
3761 		 * nullification if necessary.
3762 		 */
3763 		pgm_info = vcpu->arch.pgm;
3764 		ilen = 4;
3765 	}
3766 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3767 	kvm_s390_forward_psw(vcpu, ilen);
3768 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3769 }
3770 
3771 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3772 {
3773 	struct mcck_volatile_info *mcck_info;
3774 	struct sie_page *sie_page;
3775 
3776 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3777 		   vcpu->arch.sie_block->icptcode);
3778 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3779 
3780 	if (guestdbg_enabled(vcpu))
3781 		kvm_s390_restore_guest_per_regs(vcpu);
3782 
3783 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3784 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3785 
3786 	if (exit_reason == -EINTR) {
3787 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3788 		sie_page = container_of(vcpu->arch.sie_block,
3789 					struct sie_page, sie_block);
3790 		mcck_info = &sie_page->mcck_info;
3791 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3792 		return 0;
3793 	}
3794 
3795 	if (vcpu->arch.sie_block->icptcode > 0) {
3796 		int rc = kvm_handle_sie_intercept(vcpu);
3797 
3798 		if (rc != -EOPNOTSUPP)
3799 			return rc;
3800 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3801 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3802 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3803 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3804 		return -EREMOTE;
3805 	} else if (exit_reason != -EFAULT) {
3806 		vcpu->stat.exit_null++;
3807 		return 0;
3808 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3809 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3810 		vcpu->run->s390_ucontrol.trans_exc_code =
3811 						current->thread.gmap_addr;
3812 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3813 		return -EREMOTE;
3814 	} else if (current->thread.gmap_pfault) {
3815 		trace_kvm_s390_major_guest_pfault(vcpu);
3816 		current->thread.gmap_pfault = 0;
3817 		if (kvm_arch_setup_async_pf(vcpu))
3818 			return 0;
3819 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3820 	}
3821 	return vcpu_post_run_fault_in_sie(vcpu);
3822 }
3823 
3824 static int __vcpu_run(struct kvm_vcpu *vcpu)
3825 {
3826 	int rc, exit_reason;
3827 
3828 	/*
3829 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3830 	 * ning the guest), so that memslots (and other stuff) are protected
3831 	 */
3832 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3833 
3834 	do {
3835 		rc = vcpu_pre_run(vcpu);
3836 		if (rc)
3837 			break;
3838 
3839 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3840 		/*
3841 		 * As PF_VCPU will be used in fault handler, between
3842 		 * guest_enter and guest_exit should be no uaccess.
3843 		 */
3844 		local_irq_disable();
3845 		guest_enter_irqoff();
3846 		__disable_cpu_timer_accounting(vcpu);
3847 		local_irq_enable();
3848 		exit_reason = sie64a(vcpu->arch.sie_block,
3849 				     vcpu->run->s.regs.gprs);
3850 		local_irq_disable();
3851 		__enable_cpu_timer_accounting(vcpu);
3852 		guest_exit_irqoff();
3853 		local_irq_enable();
3854 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3855 
3856 		rc = vcpu_post_run(vcpu, exit_reason);
3857 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3858 
3859 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3860 	return rc;
3861 }
3862 
3863 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3864 {
3865 	struct runtime_instr_cb *riccb;
3866 	struct gs_cb *gscb;
3867 
3868 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3869 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3870 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3871 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3872 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3873 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3874 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3875 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3876 		/* some control register changes require a tlb flush */
3877 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3878 	}
3879 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3880 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3881 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3882 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3883 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3884 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3885 	}
3886 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3887 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3888 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3889 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3890 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3891 			kvm_clear_async_pf_completion_queue(vcpu);
3892 	}
3893 	/*
3894 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3895 	 * we should enable RI here instead of doing the lazy enablement.
3896 	 */
3897 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3898 	    test_kvm_facility(vcpu->kvm, 64) &&
3899 	    riccb->v &&
3900 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3901 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3902 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3903 	}
3904 	/*
3905 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3906 	 * we should enable GS here instead of doing the lazy enablement.
3907 	 */
3908 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3909 	    test_kvm_facility(vcpu->kvm, 133) &&
3910 	    gscb->gssm &&
3911 	    !vcpu->arch.gs_enabled) {
3912 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3913 		vcpu->arch.sie_block->ecb |= ECB_GS;
3914 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3915 		vcpu->arch.gs_enabled = 1;
3916 	}
3917 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3918 	    test_kvm_facility(vcpu->kvm, 82)) {
3919 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3920 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3921 	}
3922 	save_access_regs(vcpu->arch.host_acrs);
3923 	restore_access_regs(vcpu->run->s.regs.acrs);
3924 	/* save host (userspace) fprs/vrs */
3925 	save_fpu_regs();
3926 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3927 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3928 	if (MACHINE_HAS_VX)
3929 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3930 	else
3931 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3932 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3933 	if (test_fp_ctl(current->thread.fpu.fpc))
3934 		/* User space provided an invalid FPC, let's clear it */
3935 		current->thread.fpu.fpc = 0;
3936 	if (MACHINE_HAS_GS) {
3937 		preempt_disable();
3938 		__ctl_set_bit(2, 4);
3939 		if (current->thread.gs_cb) {
3940 			vcpu->arch.host_gscb = current->thread.gs_cb;
3941 			save_gs_cb(vcpu->arch.host_gscb);
3942 		}
3943 		if (vcpu->arch.gs_enabled) {
3944 			current->thread.gs_cb = (struct gs_cb *)
3945 						&vcpu->run->s.regs.gscb;
3946 			restore_gs_cb(current->thread.gs_cb);
3947 		}
3948 		preempt_enable();
3949 	}
3950 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
3951 
3952 	kvm_run->kvm_dirty_regs = 0;
3953 }
3954 
3955 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3956 {
3957 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3958 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3959 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3960 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3961 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3962 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3963 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3964 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3965 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3966 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3967 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3968 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3969 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3970 	save_access_regs(vcpu->run->s.regs.acrs);
3971 	restore_access_regs(vcpu->arch.host_acrs);
3972 	/* Save guest register state */
3973 	save_fpu_regs();
3974 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3975 	/* Restore will be done lazily at return */
3976 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3977 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3978 	if (MACHINE_HAS_GS) {
3979 		__ctl_set_bit(2, 4);
3980 		if (vcpu->arch.gs_enabled)
3981 			save_gs_cb(current->thread.gs_cb);
3982 		preempt_disable();
3983 		current->thread.gs_cb = vcpu->arch.host_gscb;
3984 		restore_gs_cb(vcpu->arch.host_gscb);
3985 		preempt_enable();
3986 		if (!vcpu->arch.host_gscb)
3987 			__ctl_clear_bit(2, 4);
3988 		vcpu->arch.host_gscb = NULL;
3989 	}
3990 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
3991 }
3992 
3993 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3994 {
3995 	int rc;
3996 
3997 	if (kvm_run->immediate_exit)
3998 		return -EINTR;
3999 
4000 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4001 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4002 		return -EINVAL;
4003 
4004 	vcpu_load(vcpu);
4005 
4006 	if (guestdbg_exit_pending(vcpu)) {
4007 		kvm_s390_prepare_debug_exit(vcpu);
4008 		rc = 0;
4009 		goto out;
4010 	}
4011 
4012 	kvm_sigset_activate(vcpu);
4013 
4014 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4015 		kvm_s390_vcpu_start(vcpu);
4016 	} else if (is_vcpu_stopped(vcpu)) {
4017 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4018 				   vcpu->vcpu_id);
4019 		rc = -EINVAL;
4020 		goto out;
4021 	}
4022 
4023 	sync_regs(vcpu, kvm_run);
4024 	enable_cpu_timer_accounting(vcpu);
4025 
4026 	might_fault();
4027 	rc = __vcpu_run(vcpu);
4028 
4029 	if (signal_pending(current) && !rc) {
4030 		kvm_run->exit_reason = KVM_EXIT_INTR;
4031 		rc = -EINTR;
4032 	}
4033 
4034 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4035 		kvm_s390_prepare_debug_exit(vcpu);
4036 		rc = 0;
4037 	}
4038 
4039 	if (rc == -EREMOTE) {
4040 		/* userspace support is needed, kvm_run has been prepared */
4041 		rc = 0;
4042 	}
4043 
4044 	disable_cpu_timer_accounting(vcpu);
4045 	store_regs(vcpu, kvm_run);
4046 
4047 	kvm_sigset_deactivate(vcpu);
4048 
4049 	vcpu->stat.exit_userspace++;
4050 out:
4051 	vcpu_put(vcpu);
4052 	return rc;
4053 }
4054 
4055 /*
4056  * store status at address
4057  * we use have two special cases:
4058  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4059  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4060  */
4061 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4062 {
4063 	unsigned char archmode = 1;
4064 	freg_t fprs[NUM_FPRS];
4065 	unsigned int px;
4066 	u64 clkcomp, cputm;
4067 	int rc;
4068 
4069 	px = kvm_s390_get_prefix(vcpu);
4070 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4071 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4072 			return -EFAULT;
4073 		gpa = 0;
4074 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4075 		if (write_guest_real(vcpu, 163, &archmode, 1))
4076 			return -EFAULT;
4077 		gpa = px;
4078 	} else
4079 		gpa -= __LC_FPREGS_SAVE_AREA;
4080 
4081 	/* manually convert vector registers if necessary */
4082 	if (MACHINE_HAS_VX) {
4083 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4084 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4085 				     fprs, 128);
4086 	} else {
4087 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4088 				     vcpu->run->s.regs.fprs, 128);
4089 	}
4090 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4091 			      vcpu->run->s.regs.gprs, 128);
4092 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4093 			      &vcpu->arch.sie_block->gpsw, 16);
4094 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4095 			      &px, 4);
4096 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4097 			      &vcpu->run->s.regs.fpc, 4);
4098 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4099 			      &vcpu->arch.sie_block->todpr, 4);
4100 	cputm = kvm_s390_get_cpu_timer(vcpu);
4101 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4102 			      &cputm, 8);
4103 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4104 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4105 			      &clkcomp, 8);
4106 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4107 			      &vcpu->run->s.regs.acrs, 64);
4108 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4109 			      &vcpu->arch.sie_block->gcr, 128);
4110 	return rc ? -EFAULT : 0;
4111 }
4112 
4113 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4114 {
4115 	/*
4116 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4117 	 * switch in the run ioctl. Let's update our copies before we save
4118 	 * it into the save area
4119 	 */
4120 	save_fpu_regs();
4121 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4122 	save_access_regs(vcpu->run->s.regs.acrs);
4123 
4124 	return kvm_s390_store_status_unloaded(vcpu, addr);
4125 }
4126 
4127 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4128 {
4129 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4130 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4131 }
4132 
4133 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4134 {
4135 	unsigned int i;
4136 	struct kvm_vcpu *vcpu;
4137 
4138 	kvm_for_each_vcpu(i, vcpu, kvm) {
4139 		__disable_ibs_on_vcpu(vcpu);
4140 	}
4141 }
4142 
4143 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4144 {
4145 	if (!sclp.has_ibs)
4146 		return;
4147 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4148 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4149 }
4150 
4151 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4152 {
4153 	int i, online_vcpus, started_vcpus = 0;
4154 
4155 	if (!is_vcpu_stopped(vcpu))
4156 		return;
4157 
4158 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4159 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4160 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4161 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4162 
4163 	for (i = 0; i < online_vcpus; i++) {
4164 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4165 			started_vcpus++;
4166 	}
4167 
4168 	if (started_vcpus == 0) {
4169 		/* we're the only active VCPU -> speed it up */
4170 		__enable_ibs_on_vcpu(vcpu);
4171 	} else if (started_vcpus == 1) {
4172 		/*
4173 		 * As we are starting a second VCPU, we have to disable
4174 		 * the IBS facility on all VCPUs to remove potentially
4175 		 * oustanding ENABLE requests.
4176 		 */
4177 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4178 	}
4179 
4180 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4181 	/*
4182 	 * Another VCPU might have used IBS while we were offline.
4183 	 * Let's play safe and flush the VCPU at startup.
4184 	 */
4185 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4186 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4187 	return;
4188 }
4189 
4190 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4191 {
4192 	int i, online_vcpus, started_vcpus = 0;
4193 	struct kvm_vcpu *started_vcpu = NULL;
4194 
4195 	if (is_vcpu_stopped(vcpu))
4196 		return;
4197 
4198 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4199 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4200 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4201 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4202 
4203 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4204 	kvm_s390_clear_stop_irq(vcpu);
4205 
4206 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4207 	__disable_ibs_on_vcpu(vcpu);
4208 
4209 	for (i = 0; i < online_vcpus; i++) {
4210 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4211 			started_vcpus++;
4212 			started_vcpu = vcpu->kvm->vcpus[i];
4213 		}
4214 	}
4215 
4216 	if (started_vcpus == 1) {
4217 		/*
4218 		 * As we only have one VCPU left, we want to enable the
4219 		 * IBS facility for that VCPU to speed it up.
4220 		 */
4221 		__enable_ibs_on_vcpu(started_vcpu);
4222 	}
4223 
4224 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4225 	return;
4226 }
4227 
4228 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4229 				     struct kvm_enable_cap *cap)
4230 {
4231 	int r;
4232 
4233 	if (cap->flags)
4234 		return -EINVAL;
4235 
4236 	switch (cap->cap) {
4237 	case KVM_CAP_S390_CSS_SUPPORT:
4238 		if (!vcpu->kvm->arch.css_support) {
4239 			vcpu->kvm->arch.css_support = 1;
4240 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4241 			trace_kvm_s390_enable_css(vcpu->kvm);
4242 		}
4243 		r = 0;
4244 		break;
4245 	default:
4246 		r = -EINVAL;
4247 		break;
4248 	}
4249 	return r;
4250 }
4251 
4252 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4253 				  struct kvm_s390_mem_op *mop)
4254 {
4255 	void __user *uaddr = (void __user *)mop->buf;
4256 	void *tmpbuf = NULL;
4257 	int r, srcu_idx;
4258 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4259 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4260 
4261 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4262 		return -EINVAL;
4263 
4264 	if (mop->size > MEM_OP_MAX_SIZE)
4265 		return -E2BIG;
4266 
4267 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4268 		tmpbuf = vmalloc(mop->size);
4269 		if (!tmpbuf)
4270 			return -ENOMEM;
4271 	}
4272 
4273 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4274 
4275 	switch (mop->op) {
4276 	case KVM_S390_MEMOP_LOGICAL_READ:
4277 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4278 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4279 					    mop->size, GACC_FETCH);
4280 			break;
4281 		}
4282 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4283 		if (r == 0) {
4284 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4285 				r = -EFAULT;
4286 		}
4287 		break;
4288 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4289 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4290 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4291 					    mop->size, GACC_STORE);
4292 			break;
4293 		}
4294 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4295 			r = -EFAULT;
4296 			break;
4297 		}
4298 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4299 		break;
4300 	default:
4301 		r = -EINVAL;
4302 	}
4303 
4304 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4305 
4306 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4307 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4308 
4309 	vfree(tmpbuf);
4310 	return r;
4311 }
4312 
4313 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4314 			       unsigned int ioctl, unsigned long arg)
4315 {
4316 	struct kvm_vcpu *vcpu = filp->private_data;
4317 	void __user *argp = (void __user *)arg;
4318 
4319 	switch (ioctl) {
4320 	case KVM_S390_IRQ: {
4321 		struct kvm_s390_irq s390irq;
4322 
4323 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4324 			return -EFAULT;
4325 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4326 	}
4327 	case KVM_S390_INTERRUPT: {
4328 		struct kvm_s390_interrupt s390int;
4329 		struct kvm_s390_irq s390irq = {};
4330 
4331 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4332 			return -EFAULT;
4333 		if (s390int_to_s390irq(&s390int, &s390irq))
4334 			return -EINVAL;
4335 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4336 	}
4337 	}
4338 	return -ENOIOCTLCMD;
4339 }
4340 
4341 long kvm_arch_vcpu_ioctl(struct file *filp,
4342 			 unsigned int ioctl, unsigned long arg)
4343 {
4344 	struct kvm_vcpu *vcpu = filp->private_data;
4345 	void __user *argp = (void __user *)arg;
4346 	int idx;
4347 	long r;
4348 
4349 	vcpu_load(vcpu);
4350 
4351 	switch (ioctl) {
4352 	case KVM_S390_STORE_STATUS:
4353 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4354 		r = kvm_s390_vcpu_store_status(vcpu, arg);
4355 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4356 		break;
4357 	case KVM_S390_SET_INITIAL_PSW: {
4358 		psw_t psw;
4359 
4360 		r = -EFAULT;
4361 		if (copy_from_user(&psw, argp, sizeof(psw)))
4362 			break;
4363 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4364 		break;
4365 	}
4366 	case KVM_S390_INITIAL_RESET:
4367 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4368 		break;
4369 	case KVM_SET_ONE_REG:
4370 	case KVM_GET_ONE_REG: {
4371 		struct kvm_one_reg reg;
4372 		r = -EFAULT;
4373 		if (copy_from_user(&reg, argp, sizeof(reg)))
4374 			break;
4375 		if (ioctl == KVM_SET_ONE_REG)
4376 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4377 		else
4378 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4379 		break;
4380 	}
4381 #ifdef CONFIG_KVM_S390_UCONTROL
4382 	case KVM_S390_UCAS_MAP: {
4383 		struct kvm_s390_ucas_mapping ucasmap;
4384 
4385 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4386 			r = -EFAULT;
4387 			break;
4388 		}
4389 
4390 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4391 			r = -EINVAL;
4392 			break;
4393 		}
4394 
4395 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4396 				     ucasmap.vcpu_addr, ucasmap.length);
4397 		break;
4398 	}
4399 	case KVM_S390_UCAS_UNMAP: {
4400 		struct kvm_s390_ucas_mapping ucasmap;
4401 
4402 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4403 			r = -EFAULT;
4404 			break;
4405 		}
4406 
4407 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4408 			r = -EINVAL;
4409 			break;
4410 		}
4411 
4412 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4413 			ucasmap.length);
4414 		break;
4415 	}
4416 #endif
4417 	case KVM_S390_VCPU_FAULT: {
4418 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4419 		break;
4420 	}
4421 	case KVM_ENABLE_CAP:
4422 	{
4423 		struct kvm_enable_cap cap;
4424 		r = -EFAULT;
4425 		if (copy_from_user(&cap, argp, sizeof(cap)))
4426 			break;
4427 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4428 		break;
4429 	}
4430 	case KVM_S390_MEM_OP: {
4431 		struct kvm_s390_mem_op mem_op;
4432 
4433 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4434 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4435 		else
4436 			r = -EFAULT;
4437 		break;
4438 	}
4439 	case KVM_S390_SET_IRQ_STATE: {
4440 		struct kvm_s390_irq_state irq_state;
4441 
4442 		r = -EFAULT;
4443 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4444 			break;
4445 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4446 		    irq_state.len == 0 ||
4447 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4448 			r = -EINVAL;
4449 			break;
4450 		}
4451 		/* do not use irq_state.flags, it will break old QEMUs */
4452 		r = kvm_s390_set_irq_state(vcpu,
4453 					   (void __user *) irq_state.buf,
4454 					   irq_state.len);
4455 		break;
4456 	}
4457 	case KVM_S390_GET_IRQ_STATE: {
4458 		struct kvm_s390_irq_state irq_state;
4459 
4460 		r = -EFAULT;
4461 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4462 			break;
4463 		if (irq_state.len == 0) {
4464 			r = -EINVAL;
4465 			break;
4466 		}
4467 		/* do not use irq_state.flags, it will break old QEMUs */
4468 		r = kvm_s390_get_irq_state(vcpu,
4469 					   (__u8 __user *)  irq_state.buf,
4470 					   irq_state.len);
4471 		break;
4472 	}
4473 	default:
4474 		r = -ENOTTY;
4475 	}
4476 
4477 	vcpu_put(vcpu);
4478 	return r;
4479 }
4480 
4481 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4482 {
4483 #ifdef CONFIG_KVM_S390_UCONTROL
4484 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4485 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4486 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4487 		get_page(vmf->page);
4488 		return 0;
4489 	}
4490 #endif
4491 	return VM_FAULT_SIGBUS;
4492 }
4493 
4494 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4495 			    unsigned long npages)
4496 {
4497 	return 0;
4498 }
4499 
4500 /* Section: memory related */
4501 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4502 				   struct kvm_memory_slot *memslot,
4503 				   const struct kvm_userspace_memory_region *mem,
4504 				   enum kvm_mr_change change)
4505 {
4506 	/* A few sanity checks. We can have memory slots which have to be
4507 	   located/ended at a segment boundary (1MB). The memory in userland is
4508 	   ok to be fragmented into various different vmas. It is okay to mmap()
4509 	   and munmap() stuff in this slot after doing this call at any time */
4510 
4511 	if (mem->userspace_addr & 0xffffful)
4512 		return -EINVAL;
4513 
4514 	if (mem->memory_size & 0xffffful)
4515 		return -EINVAL;
4516 
4517 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4518 		return -EINVAL;
4519 
4520 	return 0;
4521 }
4522 
4523 void kvm_arch_commit_memory_region(struct kvm *kvm,
4524 				const struct kvm_userspace_memory_region *mem,
4525 				const struct kvm_memory_slot *old,
4526 				const struct kvm_memory_slot *new,
4527 				enum kvm_mr_change change)
4528 {
4529 	int rc = 0;
4530 
4531 	switch (change) {
4532 	case KVM_MR_DELETE:
4533 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4534 					old->npages * PAGE_SIZE);
4535 		break;
4536 	case KVM_MR_MOVE:
4537 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4538 					old->npages * PAGE_SIZE);
4539 		if (rc)
4540 			break;
4541 		/* FALLTHROUGH */
4542 	case KVM_MR_CREATE:
4543 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4544 				      mem->guest_phys_addr, mem->memory_size);
4545 		break;
4546 	case KVM_MR_FLAGS_ONLY:
4547 		break;
4548 	default:
4549 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
4550 	}
4551 	if (rc)
4552 		pr_warn("failed to commit memory region\n");
4553 	return;
4554 }
4555 
4556 static inline unsigned long nonhyp_mask(int i)
4557 {
4558 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4559 
4560 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4561 }
4562 
4563 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4564 {
4565 	vcpu->valid_wakeup = false;
4566 }
4567 
4568 static int __init kvm_s390_init(void)
4569 {
4570 	int i;
4571 
4572 	if (!sclp.has_sief2) {
4573 		pr_info("SIE is not available\n");
4574 		return -ENODEV;
4575 	}
4576 
4577 	if (nested && hpage) {
4578 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4579 		return -EINVAL;
4580 	}
4581 
4582 	for (i = 0; i < 16; i++)
4583 		kvm_s390_fac_base[i] |=
4584 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4585 
4586 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4587 }
4588 
4589 static void __exit kvm_s390_exit(void)
4590 {
4591 	kvm_exit();
4592 }
4593 
4594 module_init(kvm_s390_init);
4595 module_exit(kvm_s390_exit);
4596 
4597 /*
4598  * Enable autoloading of the kvm module.
4599  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4600  * since x86 takes a different approach.
4601  */
4602 #include <linux/miscdevice.h>
4603 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4604 MODULE_ALIAS("devname:kvm");
4605